kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/kernel.h>
   8 #include <linux/types.h>
   9 #include <linux/slab.h>
  10 #include <linux/bpf.h>
  11 #include <linux/btf.h>
  12 #include <linux/bpf_verifier.h>
  13 #include <linux/filter.h>
  14 #include <net/netlink.h>
  15 #include <linux/file.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/stringify.h>
  18 #include <linux/bsearch.h>
  19 #include <linux/sort.h>
  20 #include <linux/perf_event.h>
  21 #include <linux/ctype.h>
  22 #include <linux/error-injection.h>
  23 #include <linux/bpf_lsm.h>
  24 #include <linux/btf_ids.h>
  25
  26 #include "disasm.h"
  27
  28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  30         [_id] = & _name ## _verifier_ops,
  31 #define BPF_MAP_TYPE(_id, _ops)
  32 #define BPF_LINK_TYPE(_id, _name)
  33 #include <linux/bpf_types.h>
  34 #undef BPF_PROG_TYPE
  35 #undef BPF_MAP_TYPE
  36 #undef BPF_LINK_TYPE
  37 };
  38
  39 /* bpf_check() is a static code analyzer that walks eBPF program
  40  * instruction by instruction and updates register/stack state.
  41  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  42  *
  43  * The first pass is depth-first-search to check that the program is a DAG.
  44  * It rejects the following programs:
  45  * - larger than BPF_MAXINSNS insns
  46  * - if loop is present (detected via back-edge)
  47  * - unreachable insns exist (shouldn't be a forest. program = one function)
  48  * - out of bounds or malformed jumps
  49  * The second pass is all possible path descent from the 1st insn.
  50  * Since it's analyzing all pathes through the program, the length of the
  51  * analysis is limited to 64k insn, which may be hit even if total number of
  52  * insn is less then 4K, but there are too many branches that change stack/regs.
  53  * Number of 'branches to be analyzed' is limited to 1k
  54  *
  55  * On entry to each instruction, each register has a type, and the instruction
  56  * changes the types of the registers depending on instruction semantics.
  57  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  58  * copied to R1.
  59  *
  60  * All registers are 64-bit.
  61  * R0 - return register
  62  * R1-R5 argument passing registers
  63  * R6-R9 callee saved registers
  64  * R10 - frame pointer read-only
  65  *
  66  * At the start of BPF program the register R1 contains a pointer to bpf_context
  67  * and has type PTR_TO_CTX.
  68  *
  69  * Verifier tracks arithmetic operations on pointers in case:
  70  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  71  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  72  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  73  * and 2nd arithmetic instruction is pattern matched to recognize
  74  * that it wants to construct a pointer to some element within stack.
  75  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  76  * (and -20 constant is saved for further stack bounds checking).
  77  * Meaning that this reg is a pointer to stack plus known immediate constant.
  78  *
  79  * Most of the time the registers have SCALAR_VALUE type, which
  80  * means the register has some value, but it's not a valid pointer.
  81  * (like pointer plus pointer becomes SCALAR_VALUE type)
  82  *
  83  * When verifier sees load or store instructions the type of base register
  84  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  85  * four pointer types recognized by check_mem_access() function.
  86  *
  87  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  88  * and the range of [ptr, ptr + map's value_size) is accessible.
  89  *
  90  * registers used to pass values to function calls are checked against
  91  * function argument constraints.
  92  *
  93  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  94  * It means that the register type passed to this function must be
  95  * PTR_TO_STACK and it will be used inside the function as
  96  * 'pointer to map element key'
  97  *
  98  * For example the argument constraints for bpf_map_lookup_elem():
  99  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 100  *   .arg1_type = ARG_CONST_MAP_PTR,
 101  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 102  *
 103  * ret_type says that this function returns 'pointer to map elem value or null'
 104  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 105  * 2nd argument should be a pointer to stack, which will be used inside
 106  * the helper function as a pointer to map element key.
 107  *
 108  * On the kernel side the helper function looks like:
 109  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 110  * {
 111  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 112  *    void *key = (void *) (unsigned long) r2;
 113  *    void *value;
 114  *
 115  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 116  *    [key, key + map->key_size) bytes are valid and were initialized on
 117  *    the stack of eBPF program.
 118  * }
 119  *
 120  * Corresponding eBPF program may look like:
 121  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 122  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 123  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 124  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 125  * here verifier looks at prototype of map_lookup_elem() and sees:
 126  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 127  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 128  *
 129  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 130  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 131  * and were initialized prior to this call.
 132  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 133  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 134  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 135  * returns ether pointer to map value or NULL.
 136  *
 137  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 138  * insn, the register holding that pointer in the true branch changes state to
 139  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 140  * branch. See check_cond_jmp_op().
 141  *
 142  * After the call R0 is set to return type of the function and registers R1-R5
 143  * are set to NOT_INIT to indicate that they are no longer readable.
 144  *
 145  * The following reference types represent a potential reference to a kernel
 146  * resource which, after first being allocated, must be checked and freed by
 147  * the BPF program:
 148  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 149  *
 150  * When the verifier sees a helper call return a reference type, it allocates a
 151  * pointer id for the reference and stores it in the current function state.
 152  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 153  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 154  * passes through a NULL-check conditional. For the branch wherein the state is
 155  * changed to CONST_IMM, the verifier releases the reference.
 156  *
 157  * For each helper function that allocates a reference, such as
 158  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 159  * bpf_sk_release(). When a reference type passes into the release function,
 160  * the verifier also releases the reference. If any unchecked or unreleased
 161  * reference remains at the end of the program, the verifier rejects it.
 162  */
 163
 164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 165 struct bpf_verifier_stack_elem {
 166         /* verifer state is 'st'
 167          * before processing instruction 'insn_idx'
 168          * and after processing instruction 'prev_insn_idx'
 169          */
 170         struct bpf_verifier_state st;
 171         int insn_idx;
 172         int prev_insn_idx;
 173         struct bpf_verifier_stack_elem *next;
 174         /* length of verifier log at the time this state was pushed on stack */
 175         u32 log_pos;
 176 };
 177
 178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 179 #define BPF_COMPLEXITY_LIMIT_STATES     64
 180
 181 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 182 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 183
 184 #define BPF_MAP_PTR_UNPRIV      1UL
 185 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 186                                           POISON_POINTER_DELTA))
 187 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 188
 189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 190 {
 191         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 192 }
 193
 194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 195 {
 196         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 197 }
 198
 199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 200                               const struct bpf_map *map, bool unpriv)
 201 {
 202         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 203         unpriv |= bpf_map_ptr_unpriv(aux);
 204         aux->map_ptr_state = (unsigned long)map |
 205                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 206 }
 207
 208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 209 {
 210         return aux->map_key_state & BPF_MAP_KEY_POISON;
 211 }
 212
 213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 214 {
 215         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 216 }
 217
 218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 219 {
 220         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 221 }
 222
 223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 224 {
 225         bool poisoned = bpf_map_key_poisoned(aux);
 226
 227         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 228                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 229 }
 230
 231 struct bpf_call_arg_meta {
 232         struct bpf_map *map_ptr;
 233         bool raw_mode;
 234         bool pkt_access;
 235         int regno;
 236         int access_size;
 237         int mem_size;
 238         u64 msize_max_value;
 239         int ref_obj_id;
 240         int func_id;
 241         u32 btf_id;
 242         u32 ret_btf_id;
 243 };
 244
 245 struct btf *btf_vmlinux;
 246
 247 static DEFINE_MUTEX(bpf_verifier_lock);
 248
 249 static const struct bpf_line_info *
 250 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 251 {
 252         const struct bpf_line_info *linfo;
 253         const struct bpf_prog *prog;
 254         u32 i, nr_linfo;
 255
 256         prog = env->prog;
 257         nr_linfo = prog->aux->nr_linfo;
 258
 259         if (!nr_linfo || insn_off >= prog->len)
 260                 return NULL;
 261
 262         linfo = prog->aux->linfo;
 263         for (i = 1; i < nr_linfo; i++)
 264                 if (insn_off < linfo[i].insn_off)
 265                         break;
 266
 267         return &linfo[i - 1];
 268 }
 269
 270 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 271                        va_list args)
 272 {
 273         unsigned int n;
 274
 275         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 276
 277         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 278                   "verifier log line truncated - local buffer too short\n");
 279
 280         n = min(log->len_total - log->len_used - 1, n);
 281         log->kbuf[n] = '\0';
 282
 283         if (log->level == BPF_LOG_KERNEL) {
 284                 pr_err("BPF:%s\n", log->kbuf);
 285                 return;
 286         }
 287         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 288                 log->len_used += n;
 289         else
 290                 log->ubuf = NULL;
 291 }
 292
 293 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 294 {
 295         char zero = 0;
 296
 297         if (!bpf_verifier_log_needed(log))
 298                 return;
 299
 300         log->len_used = new_pos;
 301         if (put_user(zero, log->ubuf + new_pos))
 302                 log->ubuf = NULL;
 303 }
 304
 305 /* log_level controls verbosity level of eBPF verifier.
 306  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 307  * so the user can figure out what's wrong with the program
 308  */
 309 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 310                                            const char *fmt, ...)
 311 {
 312         va_list args;
 313
 314         if (!bpf_verifier_log_needed(&env->log))
 315                 return;
 316
 317         va_start(args, fmt);
 318         bpf_verifier_vlog(&env->log, fmt, args);
 319         va_end(args);
 320 }
 321 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 322
 323 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 324 {
 325         struct bpf_verifier_env *env = private_data;
 326         va_list args;
 327
 328         if (!bpf_verifier_log_needed(&env->log))
 329                 return;
 330
 331         va_start(args, fmt);
 332         bpf_verifier_vlog(&env->log, fmt, args);
 333         va_end(args);
 334 }
 335
 336 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 337                             const char *fmt, ...)
 338 {
 339         va_list args;
 340
 341         if (!bpf_verifier_log_needed(log))
 342                 return;
 343
 344         va_start(args, fmt);
 345         bpf_verifier_vlog(log, fmt, args);
 346         va_end(args);
 347 }
 348
 349 static const char *ltrim(const char *s)
 350 {
 351         while (isspace(*s))
 352                 s++;
 353
 354         return s;
 355 }
 356
 357 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 358                                          u32 insn_off,
 359                                          const char *prefix_fmt, ...)
 360 {
 361         const struct bpf_line_info *linfo;
 362
 363         if (!bpf_verifier_log_needed(&env->log))
 364                 return;
 365
 366         linfo = find_linfo(env, insn_off);
 367         if (!linfo || linfo == env->prev_linfo)
 368                 return;
 369
 370         if (prefix_fmt) {
 371                 va_list args;
 372
 373                 va_start(args, prefix_fmt);
 374                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 375                 va_end(args);
 376         }
 377
 378         verbose(env, "%s\n",
 379                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 380                                          linfo->line_off)));
 381
 382         env->prev_linfo = linfo;
 383 }
 384
 385 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 386 {
 387         return type == PTR_TO_PACKET ||
 388                type == PTR_TO_PACKET_META;
 389 }
 390
 391 static bool type_is_sk_pointer(enum bpf_reg_type type)
 392 {
 393         return type == PTR_TO_SOCKET ||
 394                 type == PTR_TO_SOCK_COMMON ||
 395                 type == PTR_TO_TCP_SOCK ||
 396                 type == PTR_TO_XDP_SOCK;
 397 }
 398
 399 static bool reg_type_not_null(enum bpf_reg_type type)
 400 {
 401         return type == PTR_TO_SOCKET ||
 402                 type == PTR_TO_TCP_SOCK ||
 403                 type == PTR_TO_MAP_VALUE ||
 404                 type == PTR_TO_SOCK_COMMON;
 405 }
 406
 407 static bool reg_type_may_be_null(enum bpf_reg_type type)
 408 {
 409         return type == PTR_TO_MAP_VALUE_OR_NULL ||
 410                type == PTR_TO_SOCKET_OR_NULL ||
 411                type == PTR_TO_SOCK_COMMON_OR_NULL ||
 412                type == PTR_TO_TCP_SOCK_OR_NULL ||
 413                type == PTR_TO_BTF_ID_OR_NULL ||
 414                type == PTR_TO_MEM_OR_NULL ||
 415                type == PTR_TO_RDONLY_BUF_OR_NULL ||
 416                type == PTR_TO_RDWR_BUF_OR_NULL;
 417 }
 418
 419 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 420 {
 421         return reg->type == PTR_TO_MAP_VALUE &&
 422                 map_value_has_spin_lock(reg->map_ptr);
 423 }
 424
 425 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 426 {
 427         return type == PTR_TO_SOCKET ||
 428                 type == PTR_TO_SOCKET_OR_NULL ||
 429                 type == PTR_TO_TCP_SOCK ||
 430                 type == PTR_TO_TCP_SOCK_OR_NULL ||
 431                 type == PTR_TO_MEM ||
 432                 type == PTR_TO_MEM_OR_NULL;
 433 }
 434
 435 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 436 {
 437         return type == ARG_PTR_TO_SOCK_COMMON;
 438 }
 439
 440 static bool arg_type_may_be_null(enum bpf_arg_type type)
 441 {
 442         return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
 443                type == ARG_PTR_TO_MEM_OR_NULL ||
 444                type == ARG_PTR_TO_CTX_OR_NULL ||
 445                type == ARG_PTR_TO_SOCKET_OR_NULL ||
 446                type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
 447 }
 448
 449 /* Determine whether the function releases some resources allocated by another
 450  * function call. The first reference type argument will be assumed to be
 451  * released by release_reference().
 452  */
 453 static bool is_release_function(enum bpf_func_id func_id)
 454 {
 455         return func_id == BPF_FUNC_sk_release ||
 456                func_id == BPF_FUNC_ringbuf_submit ||
 457                func_id == BPF_FUNC_ringbuf_discard;
 458 }
 459
 460 static bool may_be_acquire_function(enum bpf_func_id func_id)
 461 {
 462         return func_id == BPF_FUNC_sk_lookup_tcp ||
 463                 func_id == BPF_FUNC_sk_lookup_udp ||
 464                 func_id == BPF_FUNC_skc_lookup_tcp ||
 465                 func_id == BPF_FUNC_map_lookup_elem ||
 466                 func_id == BPF_FUNC_ringbuf_reserve;
 467 }
 468
 469 static bool is_acquire_function(enum bpf_func_id func_id,
 470                                 const struct bpf_map *map)
 471 {
 472         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 473
 474         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 475             func_id == BPF_FUNC_sk_lookup_udp ||
 476             func_id == BPF_FUNC_skc_lookup_tcp ||
 477             func_id == BPF_FUNC_ringbuf_reserve)
 478                 return true;
 479
 480         if (func_id == BPF_FUNC_map_lookup_elem &&
 481             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 482              map_type == BPF_MAP_TYPE_SOCKHASH))
 483                 return true;
 484
 485         return false;
 486 }
 487
 488 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 489 {
 490         return func_id == BPF_FUNC_tcp_sock ||
 491                 func_id == BPF_FUNC_sk_fullsock ||
 492                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 493                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 494                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 495                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 496                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 497 }
 498
 499 /* string representation of 'enum bpf_reg_type' */
 500 static const char * const reg_type_str[] = {
 501         [NOT_INIT]              = "?",
 502         [SCALAR_VALUE]          = "inv",
 503         [PTR_TO_CTX]            = "ctx",
 504         [CONST_PTR_TO_MAP]      = "map_ptr",
 505         [PTR_TO_MAP_VALUE]      = "map_value",
 506         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 507         [PTR_TO_STACK]          = "fp",
 508         [PTR_TO_PACKET]         = "pkt",
 509         [PTR_TO_PACKET_META]    = "pkt_meta",
 510         [PTR_TO_PACKET_END]     = "pkt_end",
 511         [PTR_TO_FLOW_KEYS]      = "flow_keys",
 512         [PTR_TO_SOCKET]         = "sock",
 513         [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 514         [PTR_TO_SOCK_COMMON]    = "sock_common",
 515         [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 516         [PTR_TO_TCP_SOCK]       = "tcp_sock",
 517         [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 518         [PTR_TO_TP_BUFFER]      = "tp_buffer",
 519         [PTR_TO_XDP_SOCK]       = "xdp_sock",
 520         [PTR_TO_BTF_ID]         = "ptr_",
 521         [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
 522         [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
 523         [PTR_TO_MEM]            = "mem",
 524         [PTR_TO_MEM_OR_NULL]    = "mem_or_null",
 525         [PTR_TO_RDONLY_BUF]     = "rdonly_buf",
 526         [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 527         [PTR_TO_RDWR_BUF]       = "rdwr_buf",
 528         [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
 529 };
 530
 531 static char slot_type_char[] = {
 532         [STACK_INVALID] = '?',
 533         [STACK_SPILL]   = 'r',
 534         [STACK_MISC]    = 'm',
 535         [STACK_ZERO]    = '0',
 536 };
 537
 538 static void print_liveness(struct bpf_verifier_env *env,
 539                            enum bpf_reg_liveness live)
 540 {
 541         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 542             verbose(env, "_");
 543         if (live & REG_LIVE_READ)
 544                 verbose(env, "r");
 545         if (live & REG_LIVE_WRITTEN)
 546                 verbose(env, "w");
 547         if (live & REG_LIVE_DONE)
 548                 verbose(env, "D");
 549 }
 550
 551 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 552                                    const struct bpf_reg_state *reg)
 553 {
 554         struct bpf_verifier_state *cur = env->cur_state;
 555
 556         return cur->frame[reg->frameno];
 557 }
 558
 559 const char *kernel_type_name(u32 id)
 560 {
 561         return btf_name_by_offset(btf_vmlinux,
 562                                   btf_type_by_id(btf_vmlinux, id)->name_off);
 563 }
 564
 565 static void print_verifier_state(struct bpf_verifier_env *env,
 566                                  const struct bpf_func_state *state)
 567 {
 568         const struct bpf_reg_state *reg;
 569         enum bpf_reg_type t;
 570         int i;
 571
 572         if (state->frameno)
 573                 verbose(env, " frame%d:", state->frameno);
 574         for (i = 0; i < MAX_BPF_REG; i++) {
 575                 reg = &state->regs[i];
 576                 t = reg->type;
 577                 if (t == NOT_INIT)
 578                         continue;
 579                 verbose(env, " R%d", i);
 580                 print_liveness(env, reg->live);
 581                 verbose(env, "=%s", reg_type_str[t]);
 582                 if (t == SCALAR_VALUE && reg->precise)
 583                         verbose(env, "P");
 584                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 585                     tnum_is_const(reg->var_off)) {
 586                         /* reg->off should be 0 for SCALAR_VALUE */
 587                         verbose(env, "%lld", reg->var_off.value + reg->off);
 588                 } else {
 589                         if (t == PTR_TO_BTF_ID ||
 590                             t == PTR_TO_BTF_ID_OR_NULL ||
 591                             t == PTR_TO_PERCPU_BTF_ID)
 592                                 verbose(env, "%s", kernel_type_name(reg->btf_id));
 593                         verbose(env, "(id=%d", reg->id);
 594                         if (reg_type_may_be_refcounted_or_null(t))
 595                                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 596                         if (t != SCALAR_VALUE)
 597                                 verbose(env, ",off=%d", reg->off);
 598                         if (type_is_pkt_pointer(t))
 599                                 verbose(env, ",r=%d", reg->range);
 600                         else if (t == CONST_PTR_TO_MAP ||
 601                                  t == PTR_TO_MAP_VALUE ||
 602                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 603                                 verbose(env, ",ks=%d,vs=%d",
 604                                         reg->map_ptr->key_size,
 605                                         reg->map_ptr->value_size);
 606                         if (tnum_is_const(reg->var_off)) {
 607                                 /* Typically an immediate SCALAR_VALUE, but
 608                                  * could be a pointer whose offset is too big
 609                                  * for reg->off
 610                                  */
 611                                 verbose(env, ",imm=%llx", reg->var_off.value);
 612                         } else {
 613                                 if (reg->smin_value != reg->umin_value &&
 614                                     reg->smin_value != S64_MIN)
 615                                         verbose(env, ",smin_value=%lld",
 616                                                 (long long)reg->smin_value);
 617                                 if (reg->smax_value != reg->umax_value &&
 618                                     reg->smax_value != S64_MAX)
 619                                         verbose(env, ",smax_value=%lld",
 620                                                 (long long)reg->smax_value);
 621                                 if (reg->umin_value != 0)
 622                                         verbose(env, ",umin_value=%llu",
 623                                                 (unsigned long long)reg->umin_value);
 624                                 if (reg->umax_value != U64_MAX)
 625                                         verbose(env, ",umax_value=%llu",
 626                                                 (unsigned long long)reg->umax_value);
 627                                 if (!tnum_is_unknown(reg->var_off)) {
 628                                         char tn_buf[48];
 629
 630                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 631                                         verbose(env, ",var_off=%s", tn_buf);
 632                                 }
 633                                 if (reg->s32_min_value != reg->smin_value &&
 634                                     reg->s32_min_value != S32_MIN)
 635                                         verbose(env, ",s32_min_value=%d",
 636                                                 (int)(reg->s32_min_value));
 637                                 if (reg->s32_max_value != reg->smax_value &&
 638                                     reg->s32_max_value != S32_MAX)
 639                                         verbose(env, ",s32_max_value=%d",
 640                                                 (int)(reg->s32_max_value));
 641                                 if (reg->u32_min_value != reg->umin_value &&
 642                                     reg->u32_min_value != U32_MIN)
 643                                         verbose(env, ",u32_min_value=%d",
 644                                                 (int)(reg->u32_min_value));
 645                                 if (reg->u32_max_value != reg->umax_value &&
 646                                     reg->u32_max_value != U32_MAX)
 647                                         verbose(env, ",u32_max_value=%d",
 648                                                 (int)(reg->u32_max_value));
 649                         }
 650                         verbose(env, ")");
 651                 }
 652         }
 653         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 654                 char types_buf[BPF_REG_SIZE + 1];
 655                 bool valid = false;
 656                 int j;
 657
 658                 for (j = 0; j < BPF_REG_SIZE; j++) {
 659                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 660                                 valid = true;
 661                         types_buf[j] = slot_type_char[
 662                                         state->stack[i].slot_type[j]];
 663                 }
 664                 types_buf[BPF_REG_SIZE] = 0;
 665                 if (!valid)
 666                         continue;
 667                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 668                 print_liveness(env, state->stack[i].spilled_ptr.live);
 669                 if (state->stack[i].slot_type[0] == STACK_SPILL) {
 670                         reg = &state->stack[i].spilled_ptr;
 671                         t = reg->type;
 672                         verbose(env, "=%s", reg_type_str[t]);
 673                         if (t == SCALAR_VALUE && reg->precise)
 674                                 verbose(env, "P");
 675                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 676                                 verbose(env, "%lld", reg->var_off.value + reg->off);
 677                 } else {
 678                         verbose(env, "=%s", types_buf);
 679                 }
 680         }
 681         if (state->acquired_refs && state->refs[0].id) {
 682                 verbose(env, " refs=%d", state->refs[0].id);
 683                 for (i = 1; i < state->acquired_refs; i++)
 684                         if (state->refs[i].id)
 685                                 verbose(env, ",%d", state->refs[i].id);
 686         }
 687         verbose(env, "\n");
 688 }
 689
 690 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)                         \
 691 static int copy_##NAME##_state(struct bpf_func_state *dst,              \
 692                                const struct bpf_func_state *src)        \
 693 {                                                                       \
 694         if (!src->FIELD)                                                \
 695                 return 0;                                               \
 696         if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {                    \
 697                 /* internal bug, make state invalid to reject the program */ \
 698                 memset(dst, 0, sizeof(*dst));                           \
 699                 return -EFAULT;                                         \
 700         }                                                               \
 701         memcpy(dst->FIELD, src->FIELD,                                  \
 702                sizeof(*src->FIELD) * (src->COUNT / SIZE));              \
 703         return 0;                                                       \
 704 }
 705 /* copy_reference_state() */
 706 COPY_STATE_FN(reference, acquired_refs, refs, 1)
 707 /* copy_stack_state() */
 708 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 709 #undef COPY_STATE_FN
 710
 711 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)                      \
 712 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 713                                   bool copy_old)                        \
 714 {                                                                       \
 715         u32 old_size = state->COUNT;                                    \
 716         struct bpf_##NAME##_state *new_##FIELD;                         \
 717         int slot = size / SIZE;                                         \
 718                                                                         \
 719         if (size <= old_size || !size) {                                \
 720                 if (copy_old)                                           \
 721                         return 0;                                       \
 722                 state->COUNT = slot * SIZE;                             \
 723                 if (!size && old_size) {                                \
 724                         kfree(state->FIELD);                            \
 725                         state->FIELD = NULL;                            \
 726                 }                                                       \
 727                 return 0;                                               \
 728         }                                                               \
 729         new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
 730                                     GFP_KERNEL);                        \
 731         if (!new_##FIELD)                                               \
 732                 return -ENOMEM;                                         \
 733         if (copy_old) {                                                 \
 734                 if (state->FIELD)                                       \
 735                         memcpy(new_##FIELD, state->FIELD,               \
 736                                sizeof(*new_##FIELD) * (old_size / SIZE)); \
 737                 memset(new_##FIELD + old_size / SIZE, 0,                \
 738                        sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
 739         }                                                               \
 740         state->COUNT = slot * SIZE;                                     \
 741         kfree(state->FIELD);                                            \
 742         state->FIELD = new_##FIELD;                                     \
 743         return 0;                                                       \
 744 }
 745 /* realloc_reference_state() */
 746 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 747 /* realloc_stack_state() */
 748 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 749 #undef REALLOC_STATE_FN
 750
 751 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 752  * make it consume minimal amount of memory. check_stack_write() access from
 753  * the program calls into realloc_func_state() to grow the stack size.
 754  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 755  * which realloc_stack_state() copies over. It points to previous
 756  * bpf_verifier_state which is never reallocated.
 757  */
 758 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
 759                               int refs_size, bool copy_old)
 760 {
 761         int err = realloc_reference_state(state, refs_size, copy_old);
 762         if (err)
 763                 return err;
 764         return realloc_stack_state(state, stack_size, copy_old);
 765 }
 766
 767 /* Acquire a pointer id from the env and update the state->refs to include
 768  * this new pointer reference.
 769  * On success, returns a valid pointer id to associate with the register
 770  * On failure, returns a negative errno.
 771  */
 772 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 773 {
 774         struct bpf_func_state *state = cur_func(env);
 775         int new_ofs = state->acquired_refs;
 776         int id, err;
 777
 778         err = realloc_reference_state(state, state->acquired_refs + 1, true);
 779         if (err)
 780                 return err;
 781         id = ++env->id_gen;
 782         state->refs[new_ofs].id = id;
 783         state->refs[new_ofs].insn_idx = insn_idx;
 784
 785         return id;
 786 }
 787
 788 /* release function corresponding to acquire_reference_state(). Idempotent. */
 789 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 790 {
 791         int i, last_idx;
 792
 793         last_idx = state->acquired_refs - 1;
 794         for (i = 0; i < state->acquired_refs; i++) {
 795                 if (state->refs[i].id == ptr_id) {
 796                         if (last_idx && i != last_idx)
 797                                 memcpy(&state->refs[i], &state->refs[last_idx],
 798                                        sizeof(*state->refs));
 799                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 800                         state->acquired_refs--;
 801                         return 0;
 802                 }
 803         }
 804         return -EINVAL;
 805 }
 806
 807 static int transfer_reference_state(struct bpf_func_state *dst,
 808                                     struct bpf_func_state *src)
 809 {
 810         int err = realloc_reference_state(dst, src->acquired_refs, false);
 811         if (err)
 812                 return err;
 813         err = copy_reference_state(dst, src);
 814         if (err)
 815                 return err;
 816         return 0;
 817 }
 818
 819 static void free_func_state(struct bpf_func_state *state)
 820 {
 821         if (!state)
 822                 return;
 823         kfree(state->refs);
 824         kfree(state->stack);
 825         kfree(state);
 826 }
 827
 828 static void clear_jmp_history(struct bpf_verifier_state *state)
 829 {
 830         kfree(state->jmp_history);
 831         state->jmp_history = NULL;
 832         state->jmp_history_cnt = 0;
 833 }
 834
 835 static void free_verifier_state(struct bpf_verifier_state *state,
 836                                 bool free_self)
 837 {
 838         int i;
 839
 840         for (i = 0; i <= state->curframe; i++) {
 841                 free_func_state(state->frame[i]);
 842                 state->frame[i] = NULL;
 843         }
 844         clear_jmp_history(state);
 845         if (free_self)
 846                 kfree(state);
 847 }
 848
 849 /* copy verifier state from src to dst growing dst stack space
 850  * when necessary to accommodate larger src stack
 851  */
 852 static int copy_func_state(struct bpf_func_state *dst,
 853                            const struct bpf_func_state *src)
 854 {
 855         int err;
 856
 857         err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
 858                                  false);
 859         if (err)
 860                 return err;
 861         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 862         err = copy_reference_state(dst, src);
 863         if (err)
 864                 return err;
 865         return copy_stack_state(dst, src);
 866 }
 867
 868 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 869                                const struct bpf_verifier_state *src)
 870 {
 871         struct bpf_func_state *dst;
 872         u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
 873         int i, err;
 874
 875         if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
 876                 kfree(dst_state->jmp_history);
 877                 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
 878                 if (!dst_state->jmp_history)
 879                         return -ENOMEM;
 880         }
 881         memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
 882         dst_state->jmp_history_cnt = src->jmp_history_cnt;
 883
 884         /* if dst has more stack frames then src frame, free them */
 885         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 886                 free_func_state(dst_state->frame[i]);
 887                 dst_state->frame[i] = NULL;
 888         }
 889         dst_state->speculative = src->speculative;
 890         dst_state->curframe = src->curframe;
 891         dst_state->active_spin_lock = src->active_spin_lock;
 892         dst_state->branches = src->branches;
 893         dst_state->parent = src->parent;
 894         dst_state->first_insn_idx = src->first_insn_idx;
 895         dst_state->last_insn_idx = src->last_insn_idx;
 896         for (i = 0; i <= src->curframe; i++) {
 897                 dst = dst_state->frame[i];
 898                 if (!dst) {
 899                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 900                         if (!dst)
 901                                 return -ENOMEM;
 902                         dst_state->frame[i] = dst;
 903                 }
 904                 err = copy_func_state(dst, src->frame[i]);
 905                 if (err)
 906                         return err;
 907         }
 908         return 0;
 909 }
 910
 911 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 912 {
 913         while (st) {
 914                 u32 br = --st->branches;
 915
 916                 /* WARN_ON(br > 1) technically makes sense here,
 917                  * but see comment in push_stack(), hence:
 918                  */
 919                 WARN_ONCE((int)br < 0,
 920                           "BUG update_branch_counts:branches_to_explore=%d\n",
 921                           br);
 922                 if (br)
 923                         break;
 924                 st = st->parent;
 925         }
 926 }
 927
 928 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 929                      int *insn_idx, bool pop_log)
 930 {
 931         struct bpf_verifier_state *cur = env->cur_state;
 932         struct bpf_verifier_stack_elem *elem, *head = env->head;
 933         int err;
 934
 935         if (env->head == NULL)
 936                 return -ENOENT;
 937
 938         if (cur) {
 939                 err = copy_verifier_state(cur, &head->st);
 940                 if (err)
 941                         return err;
 942         }
 943         if (pop_log)
 944                 bpf_vlog_reset(&env->log, head->log_pos);
 945         if (insn_idx)
 946                 *insn_idx = head->insn_idx;
 947         if (prev_insn_idx)
 948                 *prev_insn_idx = head->prev_insn_idx;
 949         elem = head->next;
 950         free_verifier_state(&head->st, false);
 951         kfree(head);
 952         env->head = elem;
 953         env->stack_size--;
 954         return 0;
 955 }
 956
 957 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 958                                              int insn_idx, int prev_insn_idx,
 959                                              bool speculative)
 960 {
 961         struct bpf_verifier_state *cur = env->cur_state;
 962         struct bpf_verifier_stack_elem *elem;
 963         int err;
 964
 965         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 966         if (!elem)
 967                 goto err;
 968
 969         elem->insn_idx = insn_idx;
 970         elem->prev_insn_idx = prev_insn_idx;
 971         elem->next = env->head;
 972         elem->log_pos = env->log.len_used;
 973         env->head = elem;
 974         env->stack_size++;
 975         err = copy_verifier_state(&elem->st, cur);
 976         if (err)
 977                 goto err;
 978         elem->st.speculative |= speculative;
 979         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 980                 verbose(env, "The sequence of %d jumps is too complex.\n",
 981                         env->stack_size);
 982                 goto err;
 983         }
 984         if (elem->st.parent) {
 985                 ++elem->st.parent->branches;
 986                 /* WARN_ON(branches > 2) technically makes sense here,
 987                  * but
 988                  * 1. speculative states will bump 'branches' for non-branch
 989                  * instructions
 990                  * 2. is_state_visited() heuristics may decide not to create
 991                  * a new state for a sequence of branches and all such current
 992                  * and cloned states will be pointing to a single parent state
 993                  * which might have large 'branches' count.
 994                  */
 995         }
 996         return &elem->st;
 997 err:
 998         free_verifier_state(env->cur_state, true);
 999         env->cur_state = NULL;
1000         /* pop all elements and return */
1001         while (!pop_stack(env, NULL, NULL, false));
1002         return NULL;
1003 }
1004
1005 #define CALLER_SAVED_REGS 6
1006 static const int caller_saved[CALLER_SAVED_REGS] = {
1007         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1008 };
1009
1010 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1011                                 struct bpf_reg_state *reg);
1012
1013 /* This helper doesn't clear reg->id */
1014 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1015 {
1016         reg->var_off = tnum_const(imm);
1017         reg->smin_value = (s64)imm;
1018         reg->smax_value = (s64)imm;
1019         reg->umin_value = imm;
1020         reg->umax_value = imm;
1021
1022         reg->s32_min_value = (s32)imm;
1023         reg->s32_max_value = (s32)imm;
1024         reg->u32_min_value = (u32)imm;
1025         reg->u32_max_value = (u32)imm;
1026 }
1027
1028 /* Mark the unknown part of a register (variable offset or scalar value) as
1029  * known to have the value @imm.
1030  */
1031 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1032 {
1033         /* Clear id, off, and union(map_ptr, range) */
1034         memset(((u8 *)reg) + sizeof(reg->type), 0,
1035                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1036         ___mark_reg_known(reg, imm);
1037 }
1038
1039 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1040 {
1041         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1042         reg->s32_min_value = (s32)imm;
1043         reg->s32_max_value = (s32)imm;
1044         reg->u32_min_value = (u32)imm;
1045         reg->u32_max_value = (u32)imm;
1046 }
1047
1048 /* Mark the 'variable offset' part of a register as zero.  This should be
1049  * used only on registers holding a pointer type.
1050  */
1051 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1052 {
1053         __mark_reg_known(reg, 0);
1054 }
1055
1056 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1057 {
1058         __mark_reg_known(reg, 0);
1059         reg->type = SCALAR_VALUE;
1060 }
1061
1062 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1063                                 struct bpf_reg_state *regs, u32 regno)
1064 {
1065         if (WARN_ON(regno >= MAX_BPF_REG)) {
1066                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1067                 /* Something bad happened, let's kill all regs */
1068                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1069                         __mark_reg_not_init(env, regs + regno);
1070                 return;
1071         }
1072         __mark_reg_known_zero(regs + regno);
1073 }
1074
1075 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1076 {
1077         return type_is_pkt_pointer(reg->type);
1078 }
1079
1080 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1081 {
1082         return reg_is_pkt_pointer(reg) ||
1083                reg->type == PTR_TO_PACKET_END;
1084 }
1085
1086 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1087 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1088                                     enum bpf_reg_type which)
1089 {
1090         /* The register can already have a range from prior markings.
1091          * This is fine as long as it hasn't been advanced from its
1092          * origin.
1093          */
1094         return reg->type == which &&
1095                reg->id == 0 &&
1096                reg->off == 0 &&
1097                tnum_equals_const(reg->var_off, 0);
1098 }
1099
1100 /* Reset the min/max bounds of a register */
1101 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1102 {
1103         reg->smin_value = S64_MIN;
1104         reg->smax_value = S64_MAX;
1105         reg->umin_value = 0;
1106         reg->umax_value = U64_MAX;
1107
1108         reg->s32_min_value = S32_MIN;
1109         reg->s32_max_value = S32_MAX;
1110         reg->u32_min_value = 0;
1111         reg->u32_max_value = U32_MAX;
1112 }
1113
1114 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1115 {
1116         reg->smin_value = S64_MIN;
1117         reg->smax_value = S64_MAX;
1118         reg->umin_value = 0;
1119         reg->umax_value = U64_MAX;
1120 }
1121
1122 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1123 {
1124         reg->s32_min_value = S32_MIN;
1125         reg->s32_max_value = S32_MAX;
1126         reg->u32_min_value = 0;
1127         reg->u32_max_value = U32_MAX;
1128 }
1129
1130 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1131 {
1132         struct tnum var32_off = tnum_subreg(reg->var_off);
1133
1134         /* min signed is max(sign bit) | min(other bits) */
1135         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1136                         var32_off.value | (var32_off.mask & S32_MIN));
1137         /* max signed is min(sign bit) | max(other bits) */
1138         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1139                         var32_off.value | (var32_off.mask & S32_MAX));
1140         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1141         reg->u32_max_value = min(reg->u32_max_value,
1142                                  (u32)(var32_off.value | var32_off.mask));
1143 }
1144
1145 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1146 {
1147         /* min signed is max(sign bit) | min(other bits) */
1148         reg->smin_value = max_t(s64, reg->smin_value,
1149                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1150         /* max signed is min(sign bit) | max(other bits) */
1151         reg->smax_value = min_t(s64, reg->smax_value,
1152                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1153         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1154         reg->umax_value = min(reg->umax_value,
1155                               reg->var_off.value | reg->var_off.mask);
1156 }
1157
1158 static void __update_reg_bounds(struct bpf_reg_state *reg)
1159 {
1160         __update_reg32_bounds(reg);
1161         __update_reg64_bounds(reg);
1162 }
1163
1164 /* Uses signed min/max values to inform unsigned, and vice-versa */
1165 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1166 {
1167         /* Learn sign from signed bounds.
1168          * If we cannot cross the sign boundary, then signed and unsigned bounds
1169          * are the same, so combine.  This works even in the negative case, e.g.
1170          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1171          */
1172         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1173                 reg->s32_min_value = reg->u32_min_value =
1174                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1175                 reg->s32_max_value = reg->u32_max_value =
1176                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1177                 return;
1178         }
1179         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1180          * boundary, so we must be careful.
1181          */
1182         if ((s32)reg->u32_max_value >= 0) {
1183                 /* Positive.  We can't learn anything from the smin, but smax
1184                  * is positive, hence safe.
1185                  */
1186                 reg->s32_min_value = reg->u32_min_value;
1187                 reg->s32_max_value = reg->u32_max_value =
1188                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1189         } else if ((s32)reg->u32_min_value < 0) {
1190                 /* Negative.  We can't learn anything from the smax, but smin
1191                  * is negative, hence safe.
1192                  */
1193                 reg->s32_min_value = reg->u32_min_value =
1194                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1195                 reg->s32_max_value = reg->u32_max_value;
1196         }
1197 }
1198
1199 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1200 {
1201         /* Learn sign from signed bounds.
1202          * If we cannot cross the sign boundary, then signed and unsigned bounds
1203          * are the same, so combine.  This works even in the negative case, e.g.
1204          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1205          */
1206         if (reg->smin_value >= 0 || reg->smax_value < 0) {
1207                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1208                                                           reg->umin_value);
1209                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1210                                                           reg->umax_value);
1211                 return;
1212         }
1213         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1214          * boundary, so we must be careful.
1215          */
1216         if ((s64)reg->umax_value >= 0) {
1217                 /* Positive.  We can't learn anything from the smin, but smax
1218                  * is positive, hence safe.
1219                  */
1220                 reg->smin_value = reg->umin_value;
1221                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1222                                                           reg->umax_value);
1223         } else if ((s64)reg->umin_value < 0) {
1224                 /* Negative.  We can't learn anything from the smax, but smin
1225                  * is negative, hence safe.
1226                  */
1227                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1228                                                           reg->umin_value);
1229                 reg->smax_value = reg->umax_value;
1230         }
1231 }
1232
1233 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1234 {
1235         __reg32_deduce_bounds(reg);
1236         __reg64_deduce_bounds(reg);
1237 }
1238
1239 /* Attempts to improve var_off based on unsigned min/max information */
1240 static void __reg_bound_offset(struct bpf_reg_state *reg)
1241 {
1242         struct tnum var64_off = tnum_intersect(reg->var_off,
1243                                                tnum_range(reg->umin_value,
1244                                                           reg->umax_value));
1245         struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1246                                                 tnum_range(reg->u32_min_value,
1247                                                            reg->u32_max_value));
1248
1249         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1250 }
1251
1252 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1253 {
1254         reg->umin_value = reg->u32_min_value;
1255         reg->umax_value = reg->u32_max_value;
1256         /* Attempt to pull 32-bit signed bounds into 64-bit bounds
1257          * but must be positive otherwise set to worse case bounds
1258          * and refine later from tnum.
1259          */
1260         if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
1261                 reg->smax_value = reg->s32_max_value;
1262         else
1263                 reg->smax_value = U32_MAX;
1264         if (reg->s32_min_value >= 0)
1265                 reg->smin_value = reg->s32_min_value;
1266         else
1267                 reg->smin_value = 0;
1268 }
1269
1270 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1271 {
1272         /* special case when 64-bit register has upper 32-bit register
1273          * zeroed. Typically happens after zext or <<32, >>32 sequence
1274          * allowing us to use 32-bit bounds directly,
1275          */
1276         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1277                 __reg_assign_32_into_64(reg);
1278         } else {
1279                 /* Otherwise the best we can do is push lower 32bit known and
1280                  * unknown bits into register (var_off set from jmp logic)
1281                  * then learn as much as possible from the 64-bit tnum
1282                  * known and unknown bits. The previous smin/smax bounds are
1283                  * invalid here because of jmp32 compare so mark them unknown
1284                  * so they do not impact tnum bounds calculation.
1285                  */
1286                 __mark_reg64_unbounded(reg);
1287                 __update_reg_bounds(reg);
1288         }
1289
1290         /* Intersecting with the old var_off might have improved our bounds
1291          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1292          * then new var_off is (0; 0x7f...fc) which improves our umax.
1293          */
1294         __reg_deduce_bounds(reg);
1295         __reg_bound_offset(reg);
1296         __update_reg_bounds(reg);
1297 }
1298
1299 static bool __reg64_bound_s32(s64 a)
1300 {
1301         return a >= S32_MIN && a <= S32_MAX;
1302 }
1303
1304 static bool __reg64_bound_u32(u64 a)
1305 {
1306         return a >= U32_MIN && a <= U32_MAX;
1307 }
1308
1309 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1310 {
1311         __mark_reg32_unbounded(reg);
1312
1313         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1314                 reg->s32_min_value = (s32)reg->smin_value;
1315                 reg->s32_max_value = (s32)reg->smax_value;
1316         }
1317         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1318                 reg->u32_min_value = (u32)reg->umin_value;
1319                 reg->u32_max_value = (u32)reg->umax_value;
1320         }
1321
1322         /* Intersecting with the old var_off might have improved our bounds
1323          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1324          * then new var_off is (0; 0x7f...fc) which improves our umax.
1325          */
1326         __reg_deduce_bounds(reg);
1327         __reg_bound_offset(reg);
1328         __update_reg_bounds(reg);
1329 }
1330
1331 /* Mark a register as having a completely unknown (scalar) value. */
1332 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1333                                struct bpf_reg_state *reg)
1334 {
1335         /*
1336          * Clear type, id, off, and union(map_ptr, range) and
1337          * padding between 'type' and union
1338          */
1339         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1340         reg->type = SCALAR_VALUE;
1341         reg->var_off = tnum_unknown;
1342         reg->frameno = 0;
1343         reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1344         __mark_reg_unbounded(reg);
1345 }
1346
1347 static void mark_reg_unknown(struct bpf_verifier_env *env,
1348                              struct bpf_reg_state *regs, u32 regno)
1349 {
1350         if (WARN_ON(regno >= MAX_BPF_REG)) {
1351                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1352                 /* Something bad happened, let's kill all regs except FP */
1353                 for (regno = 0; regno < BPF_REG_FP; regno++)
1354                         __mark_reg_not_init(env, regs + regno);
1355                 return;
1356         }
1357         __mark_reg_unknown(env, regs + regno);
1358 }
1359
1360 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1361                                 struct bpf_reg_state *reg)
1362 {
1363         __mark_reg_unknown(env, reg);
1364         reg->type = NOT_INIT;
1365 }
1366
1367 static void mark_reg_not_init(struct bpf_verifier_env *env,
1368                               struct bpf_reg_state *regs, u32 regno)
1369 {
1370         if (WARN_ON(regno >= MAX_BPF_REG)) {
1371                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1372                 /* Something bad happened, let's kill all regs except FP */
1373                 for (regno = 0; regno < BPF_REG_FP; regno++)
1374                         __mark_reg_not_init(env, regs + regno);
1375                 return;
1376         }
1377         __mark_reg_not_init(env, regs + regno);
1378 }
1379
1380 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1381                             struct bpf_reg_state *regs, u32 regno,
1382                             enum bpf_reg_type reg_type, u32 btf_id)
1383 {
1384         if (reg_type == SCALAR_VALUE) {
1385                 mark_reg_unknown(env, regs, regno);
1386                 return;
1387         }
1388         mark_reg_known_zero(env, regs, regno);
1389         regs[regno].type = PTR_TO_BTF_ID;
1390         regs[regno].btf_id = btf_id;
1391 }
1392
1393 #define DEF_NOT_SUBREG  (0)
1394 static void init_reg_state(struct bpf_verifier_env *env,
1395                            struct bpf_func_state *state)
1396 {
1397         struct bpf_reg_state *regs = state->regs;
1398         int i;
1399
1400         for (i = 0; i < MAX_BPF_REG; i++) {
1401                 mark_reg_not_init(env, regs, i);
1402                 regs[i].live = REG_LIVE_NONE;
1403                 regs[i].parent = NULL;
1404                 regs[i].subreg_def = DEF_NOT_SUBREG;
1405         }
1406
1407         /* frame pointer */
1408         regs[BPF_REG_FP].type = PTR_TO_STACK;
1409         mark_reg_known_zero(env, regs, BPF_REG_FP);
1410         regs[BPF_REG_FP].frameno = state->frameno;
1411 }
1412
1413 #define BPF_MAIN_FUNC (-1)
1414 static void init_func_state(struct bpf_verifier_env *env,
1415                             struct bpf_func_state *state,
1416                             int callsite, int frameno, int subprogno)
1417 {
1418         state->callsite = callsite;
1419         state->frameno = frameno;
1420         state->subprogno = subprogno;
1421         init_reg_state(env, state);
1422 }
1423
1424 enum reg_arg_type {
1425         SRC_OP,         /* register is used as source operand */
1426         DST_OP,         /* register is used as destination operand */
1427         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1428 };
1429
1430 static int cmp_subprogs(const void *a, const void *b)
1431 {
1432         return ((struct bpf_subprog_info *)a)->start -
1433                ((struct bpf_subprog_info *)b)->start;
1434 }
1435
1436 static int find_subprog(struct bpf_verifier_env *env, int off)
1437 {
1438         struct bpf_subprog_info *p;
1439
1440         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1441                     sizeof(env->subprog_info[0]), cmp_subprogs);
1442         if (!p)
1443                 return -ENOENT;
1444         return p - env->subprog_info;
1445
1446 }
1447
1448 static int add_subprog(struct bpf_verifier_env *env, int off)
1449 {
1450         int insn_cnt = env->prog->len;
1451         int ret;
1452
1453         if (off >= insn_cnt || off < 0) {
1454                 verbose(env, "call to invalid destination\n");
1455                 return -EINVAL;
1456         }
1457         ret = find_subprog(env, off);
1458         if (ret >= 0)
1459                 return 0;
1460         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1461                 verbose(env, "too many subprograms\n");
1462                 return -E2BIG;
1463         }
1464         env->subprog_info[env->subprog_cnt++].start = off;
1465         sort(env->subprog_info, env->subprog_cnt,
1466              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1467         return 0;
1468 }
1469
1470 static int check_subprogs(struct bpf_verifier_env *env)
1471 {
1472         int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1473         struct bpf_subprog_info *subprog = env->subprog_info;
1474         struct bpf_insn *insn = env->prog->insnsi;
1475         int insn_cnt = env->prog->len;
1476
1477         /* Add entry function. */
1478         ret = add_subprog(env, 0);
1479         if (ret < 0)
1480                 return ret;
1481
1482         /* determine subprog starts. The end is one before the next starts */
1483         for (i = 0; i < insn_cnt; i++) {
1484                 if (insn[i].code != (BPF_JMP | BPF_CALL))
1485                         continue;
1486                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1487                         continue;
1488                 if (!env->bpf_capable) {
1489                         verbose(env,
1490                                 "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1491                         return -EPERM;
1492                 }
1493                 ret = add_subprog(env, i + insn[i].imm + 1);
1494                 if (ret < 0)
1495                         return ret;
1496         }
1497
1498         /* Add a fake 'exit' subprog which could simplify subprog iteration
1499          * logic. 'subprog_cnt' should not be increased.
1500          */
1501         subprog[env->subprog_cnt].start = insn_cnt;
1502
1503         if (env->log.level & BPF_LOG_LEVEL2)
1504                 for (i = 0; i < env->subprog_cnt; i++)
1505                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
1506
1507         /* now check that all jumps are within the same subprog */
1508         subprog_start = subprog[cur_subprog].start;
1509         subprog_end = subprog[cur_subprog + 1].start;
1510         for (i = 0; i < insn_cnt; i++) {
1511                 u8 code = insn[i].code;
1512
1513                 if (code == (BPF_JMP | BPF_CALL) &&
1514                     insn[i].imm == BPF_FUNC_tail_call &&
1515                     insn[i].src_reg != BPF_PSEUDO_CALL)
1516                         subprog[cur_subprog].has_tail_call = true;
1517                 if (BPF_CLASS(code) == BPF_LD &&
1518                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1519                         subprog[cur_subprog].has_ld_abs = true;
1520                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1521                         goto next;
1522                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1523                         goto next;
1524                 off = i + insn[i].off + 1;
1525                 if (off < subprog_start || off >= subprog_end) {
1526                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
1527                         return -EINVAL;
1528                 }
1529 next:
1530                 if (i == subprog_end - 1) {
1531                         /* to avoid fall-through from one subprog into another
1532                          * the last insn of the subprog should be either exit
1533                          * or unconditional jump back
1534                          */
1535                         if (code != (BPF_JMP | BPF_EXIT) &&
1536                             code != (BPF_JMP | BPF_JA)) {
1537                                 verbose(env, "last insn is not an exit or jmp\n");
1538                                 return -EINVAL;
1539                         }
1540                         subprog_start = subprog_end;
1541                         cur_subprog++;
1542                         if (cur_subprog < env->subprog_cnt)
1543                                 subprog_end = subprog[cur_subprog + 1].start;
1544                 }
1545         }
1546         return 0;
1547 }
1548
1549 /* Parentage chain of this register (or stack slot) should take care of all
1550  * issues like callee-saved registers, stack slot allocation time, etc.
1551  */
1552 static int mark_reg_read(struct bpf_verifier_env *env,
1553                          const struct bpf_reg_state *state,
1554                          struct bpf_reg_state *parent, u8 flag)
1555 {
1556         bool writes = parent == state->parent; /* Observe write marks */
1557         int cnt = 0;
1558
1559         while (parent) {
1560                 /* if read wasn't screened by an earlier write ... */
1561                 if (writes && state->live & REG_LIVE_WRITTEN)
1562                         break;
1563                 if (parent->live & REG_LIVE_DONE) {
1564                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1565                                 reg_type_str[parent->type],
1566                                 parent->var_off.value, parent->off);
1567                         return -EFAULT;
1568                 }
1569                 /* The first condition is more likely to be true than the
1570                  * second, checked it first.
1571                  */
1572                 if ((parent->live & REG_LIVE_READ) == flag ||
1573                     parent->live & REG_LIVE_READ64)
1574                         /* The parentage chain never changes and
1575                          * this parent was already marked as LIVE_READ.
1576                          * There is no need to keep walking the chain again and
1577                          * keep re-marking all parents as LIVE_READ.
1578                          * This case happens when the same register is read
1579                          * multiple times without writes into it in-between.
1580                          * Also, if parent has the stronger REG_LIVE_READ64 set,
1581                          * then no need to set the weak REG_LIVE_READ32.
1582                          */
1583                         break;
1584                 /* ... then we depend on parent's value */
1585                 parent->live |= flag;
1586                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1587                 if (flag == REG_LIVE_READ64)
1588                         parent->live &= ~REG_LIVE_READ32;
1589                 state = parent;
1590                 parent = state->parent;
1591                 writes = true;
1592                 cnt++;
1593         }
1594
1595         if (env->longest_mark_read_walk < cnt)
1596                 env->longest_mark_read_walk = cnt;
1597         return 0;
1598 }
1599
1600 /* This function is supposed to be used by the following 32-bit optimization
1601  * code only. It returns TRUE if the source or destination register operates
1602  * on 64-bit, otherwise return FALSE.
1603  */
1604 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1605                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1606 {
1607         u8 code, class, op;
1608
1609         code = insn->code;
1610         class = BPF_CLASS(code);
1611         op = BPF_OP(code);
1612         if (class == BPF_JMP) {
1613                 /* BPF_EXIT for "main" will reach here. Return TRUE
1614                  * conservatively.
1615                  */
1616                 if (op == BPF_EXIT)
1617                         return true;
1618                 if (op == BPF_CALL) {
1619                         /* BPF to BPF call will reach here because of marking
1620                          * caller saved clobber with DST_OP_NO_MARK for which we
1621                          * don't care the register def because they are anyway
1622                          * marked as NOT_INIT already.
1623                          */
1624                         if (insn->src_reg == BPF_PSEUDO_CALL)
1625                                 return false;
1626                         /* Helper call will reach here because of arg type
1627                          * check, conservatively return TRUE.
1628                          */
1629                         if (t == SRC_OP)
1630                                 return true;
1631
1632                         return false;
1633                 }
1634         }
1635
1636         if (class == BPF_ALU64 || class == BPF_JMP ||
1637             /* BPF_END always use BPF_ALU class. */
1638             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1639                 return true;
1640
1641         if (class == BPF_ALU || class == BPF_JMP32)
1642                 return false;
1643
1644         if (class == BPF_LDX) {
1645                 if (t != SRC_OP)
1646                         return BPF_SIZE(code) == BPF_DW;
1647                 /* LDX source must be ptr. */
1648                 return true;
1649         }
1650
1651         if (class == BPF_STX) {
1652                 if (reg->type != SCALAR_VALUE)
1653                         return true;
1654                 return BPF_SIZE(code) == BPF_DW;
1655         }
1656
1657         if (class == BPF_LD) {
1658                 u8 mode = BPF_MODE(code);
1659
1660                 /* LD_IMM64 */
1661                 if (mode == BPF_IMM)
1662                         return true;
1663
1664                 /* Both LD_IND and LD_ABS return 32-bit data. */
1665                 if (t != SRC_OP)
1666                         return  false;
1667
1668                 /* Implicit ctx ptr. */
1669                 if (regno == BPF_REG_6)
1670                         return true;
1671
1672                 /* Explicit source could be any width. */
1673                 return true;
1674         }
1675
1676         if (class == BPF_ST)
1677                 /* The only source register for BPF_ST is a ptr. */
1678                 return true;
1679
1680         /* Conservatively return true at default. */
1681         return true;
1682 }
1683
1684 /* Return TRUE if INSN doesn't have explicit value define. */
1685 static bool insn_no_def(struct bpf_insn *insn)
1686 {
1687         u8 class = BPF_CLASS(insn->code);
1688
1689         return (class == BPF_JMP || class == BPF_JMP32 ||
1690                 class == BPF_STX || class == BPF_ST);
1691 }
1692
1693 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
1694 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1695 {
1696         if (insn_no_def(insn))
1697                 return false;
1698
1699         return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1700 }
1701
1702 static void mark_insn_zext(struct bpf_verifier_env *env,
1703                            struct bpf_reg_state *reg)
1704 {
1705         s32 def_idx = reg->subreg_def;
1706
1707         if (def_idx == DEF_NOT_SUBREG)
1708                 return;
1709
1710         env->insn_aux_data[def_idx - 1].zext_dst = true;
1711         /* The dst will be zero extended, so won't be sub-register anymore. */
1712         reg->subreg_def = DEF_NOT_SUBREG;
1713 }
1714
1715 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1716                          enum reg_arg_type t)
1717 {
1718         struct bpf_verifier_state *vstate = env->cur_state;
1719         struct bpf_func_state *state = vstate->frame[vstate->curframe];
1720         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1721         struct bpf_reg_state *reg, *regs = state->regs;
1722         bool rw64;
1723
1724         if (regno >= MAX_BPF_REG) {
1725                 verbose(env, "R%d is invalid\n", regno);
1726                 return -EINVAL;
1727         }
1728
1729         reg = &regs[regno];
1730         rw64 = is_reg64(env, insn, regno, reg, t);
1731         if (t == SRC_OP) {
1732                 /* check whether register used as source operand can be read */
1733                 if (reg->type == NOT_INIT) {
1734                         verbose(env, "R%d !read_ok\n", regno);
1735                         return -EACCES;
1736                 }
1737                 /* We don't need to worry about FP liveness because it's read-only */
1738                 if (regno == BPF_REG_FP)
1739                         return 0;
1740
1741                 if (rw64)
1742                         mark_insn_zext(env, reg);
1743
1744                 return mark_reg_read(env, reg, reg->parent,
1745                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1746         } else {
1747                 /* check whether register used as dest operand can be written to */
1748                 if (regno == BPF_REG_FP) {
1749                         verbose(env, "frame pointer is read only\n");
1750                         return -EACCES;
1751                 }
1752                 reg->live |= REG_LIVE_WRITTEN;
1753                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1754                 if (t == DST_OP)
1755                         mark_reg_unknown(env, regs, regno);
1756         }
1757         return 0;
1758 }
1759
1760 /* for any branch, call, exit record the history of jmps in the given state */
1761 static int push_jmp_history(struct bpf_verifier_env *env,
1762                             struct bpf_verifier_state *cur)
1763 {
1764         u32 cnt = cur->jmp_history_cnt;
1765         struct bpf_idx_pair *p;
1766
1767         cnt++;
1768         p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1769         if (!p)
1770                 return -ENOMEM;
1771         p[cnt - 1].idx = env->insn_idx;
1772         p[cnt - 1].prev_idx = env->prev_insn_idx;
1773         cur->jmp_history = p;
1774         cur->jmp_history_cnt = cnt;
1775         return 0;
1776 }
1777
1778 /* Backtrack one insn at a time. If idx is not at the top of recorded
1779  * history then previous instruction came from straight line execution.
1780  */
1781 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1782                              u32 *history)
1783 {
1784         u32 cnt = *history;
1785
1786         if (cnt && st->jmp_history[cnt - 1].idx == i) {
1787                 i = st->jmp_history[cnt - 1].prev_idx;
1788                 (*history)--;
1789         } else {
1790                 i--;
1791         }
1792         return i;
1793 }
1794
1795 /* For given verifier state backtrack_insn() is called from the last insn to
1796  * the first insn. Its purpose is to compute a bitmask of registers and
1797  * stack slots that needs precision in the parent verifier state.
1798  */
1799 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1800                           u32 *reg_mask, u64 *stack_mask)
1801 {
1802         const struct bpf_insn_cbs cbs = {
1803                 .cb_print       = verbose,
1804                 .private_data   = env,
1805         };
1806         struct bpf_insn *insn = env->prog->insnsi + idx;
1807         u8 class = BPF_CLASS(insn->code);
1808         u8 opcode = BPF_OP(insn->code);
1809         u8 mode = BPF_MODE(insn->code);
1810         u32 dreg = 1u << insn->dst_reg;
1811         u32 sreg = 1u << insn->src_reg;
1812         u32 spi;
1813
1814         if (insn->code == 0)
1815                 return 0;
1816         if (env->log.level & BPF_LOG_LEVEL) {
1817                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1818                 verbose(env, "%d: ", idx);
1819                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1820         }
1821
1822         if (class == BPF_ALU || class == BPF_ALU64) {
1823                 if (!(*reg_mask & dreg))
1824                         return 0;
1825                 if (opcode == BPF_MOV) {
1826                         if (BPF_SRC(insn->code) == BPF_X) {
1827                                 /* dreg = sreg
1828                                  * dreg needs precision after this insn
1829                                  * sreg needs precision before this insn
1830                                  */
1831                                 *reg_mask &= ~dreg;
1832                                 *reg_mask |= sreg;
1833                         } else {
1834                                 /* dreg = K
1835                                  * dreg needs precision after this insn.
1836                                  * Corresponding register is already marked
1837                                  * as precise=true in this verifier state.
1838                                  * No further markings in parent are necessary
1839                                  */
1840                                 *reg_mask &= ~dreg;
1841                         }
1842                 } else {
1843                         if (BPF_SRC(insn->code) == BPF_X) {
1844                                 /* dreg += sreg
1845                                  * both dreg and sreg need precision
1846                                  * before this insn
1847                                  */
1848                                 *reg_mask |= sreg;
1849                         } /* else dreg += K
1850                            * dreg still needs precision before this insn
1851                            */
1852                 }
1853         } else if (class == BPF_LDX) {
1854                 if (!(*reg_mask & dreg))
1855                         return 0;
1856                 *reg_mask &= ~dreg;
1857
1858                 /* scalars can only be spilled into stack w/o losing precision.
1859                  * Load from any other memory can be zero extended.
1860                  * The desire to keep that precision is already indicated
1861                  * by 'precise' mark in corresponding register of this state.
1862                  * No further tracking necessary.
1863                  */
1864                 if (insn->src_reg != BPF_REG_FP)
1865                         return 0;
1866                 if (BPF_SIZE(insn->code) != BPF_DW)
1867                         return 0;
1868
1869                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1870                  * that [fp - off] slot contains scalar that needs to be
1871                  * tracked with precision
1872                  */
1873                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1874                 if (spi >= 64) {
1875                         verbose(env, "BUG spi %d\n", spi);
1876                         WARN_ONCE(1, "verifier backtracking bug");
1877                         return -EFAULT;
1878                 }
1879                 *stack_mask |= 1ull << spi;
1880         } else if (class == BPF_STX || class == BPF_ST) {
1881                 if (*reg_mask & dreg)
1882                         /* stx & st shouldn't be using _scalar_ dst_reg
1883                          * to access memory. It means backtracking
1884                          * encountered a case of pointer subtraction.
1885                          */
1886                         return -ENOTSUPP;
1887                 /* scalars can only be spilled into stack */
1888                 if (insn->dst_reg != BPF_REG_FP)
1889                         return 0;
1890                 if (BPF_SIZE(insn->code) != BPF_DW)
1891                         return 0;
1892                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1893                 if (spi >= 64) {
1894                         verbose(env, "BUG spi %d\n", spi);
1895                         WARN_ONCE(1, "verifier backtracking bug");
1896                         return -EFAULT;
1897                 }
1898                 if (!(*stack_mask & (1ull << spi)))
1899                         return 0;
1900                 *stack_mask &= ~(1ull << spi);
1901                 if (class == BPF_STX)
1902                         *reg_mask |= sreg;
1903         } else if (class == BPF_JMP || class == BPF_JMP32) {
1904                 if (opcode == BPF_CALL) {
1905                         if (insn->src_reg == BPF_PSEUDO_CALL)
1906                                 return -ENOTSUPP;
1907                         /* regular helper call sets R0 */
1908                         *reg_mask &= ~1;
1909                         if (*reg_mask & 0x3f) {
1910                                 /* if backtracing was looking for registers R1-R5
1911                                  * they should have been found already.
1912                                  */
1913                                 verbose(env, "BUG regs %x\n", *reg_mask);
1914                                 WARN_ONCE(1, "verifier backtracking bug");
1915                                 return -EFAULT;
1916                         }
1917                 } else if (opcode == BPF_EXIT) {
1918                         return -ENOTSUPP;
1919                 }
1920         } else if (class == BPF_LD) {
1921                 if (!(*reg_mask & dreg))
1922                         return 0;
1923                 *reg_mask &= ~dreg;
1924                 /* It's ld_imm64 or ld_abs or ld_ind.
1925                  * For ld_imm64 no further tracking of precision
1926                  * into parent is necessary
1927                  */
1928                 if (mode == BPF_IND || mode == BPF_ABS)
1929                         /* to be analyzed */
1930                         return -ENOTSUPP;
1931         }
1932         return 0;
1933 }
1934
1935 /* the scalar precision tracking algorithm:
1936  * . at the start all registers have precise=false.
1937  * . scalar ranges are tracked as normal through alu and jmp insns.
1938  * . once precise value of the scalar register is used in:
1939  *   .  ptr + scalar alu
1940  *   . if (scalar cond K|scalar)
1941  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1942  *   backtrack through the verifier states and mark all registers and
1943  *   stack slots with spilled constants that these scalar regisers
1944  *   should be precise.
1945  * . during state pruning two registers (or spilled stack slots)
1946  *   are equivalent if both are not precise.
1947  *
1948  * Note the verifier cannot simply walk register parentage chain,
1949  * since many different registers and stack slots could have been
1950  * used to compute single precise scalar.
1951  *
1952  * The approach of starting with precise=true for all registers and then
1953  * backtrack to mark a register as not precise when the verifier detects
1954  * that program doesn't care about specific value (e.g., when helper
1955  * takes register as ARG_ANYTHING parameter) is not safe.
1956  *
1957  * It's ok to walk single parentage chain of the verifier states.
1958  * It's possible that this backtracking will go all the way till 1st insn.
1959  * All other branches will be explored for needing precision later.
1960  *
1961  * The backtracking needs to deal with cases like:
1962  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1963  * r9 -= r8
1964  * r5 = r9
1965  * if r5 > 0x79f goto pc+7
1966  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1967  * r5 += 1
1968  * ...
1969  * call bpf_perf_event_output#25
1970  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1971  *
1972  * and this case:
1973  * r6 = 1
1974  * call foo // uses callee's r6 inside to compute r0
1975  * r0 += r6
1976  * if r0 == 0 goto
1977  *
1978  * to track above reg_mask/stack_mask needs to be independent for each frame.
1979  *
1980  * Also if parent's curframe > frame where backtracking started,
1981  * the verifier need to mark registers in both frames, otherwise callees
1982  * may incorrectly prune callers. This is similar to
1983  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1984  *
1985  * For now backtracking falls back into conservative marking.
1986  */
1987 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1988                                      struct bpf_verifier_state *st)
1989 {
1990         struct bpf_func_state *func;
1991         struct bpf_reg_state *reg;
1992         int i, j;
1993
1994         /* big hammer: mark all scalars precise in this path.
1995          * pop_stack may still get !precise scalars.
1996          */
1997         for (; st; st = st->parent)
1998                 for (i = 0; i <= st->curframe; i++) {
1999                         func = st->frame[i];
2000                         for (j = 0; j < BPF_REG_FP; j++) {
2001                                 reg = &func->regs[j];
2002                                 if (reg->type != SCALAR_VALUE)
2003                                         continue;
2004                                 reg->precise = true;
2005                         }
2006                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2007                                 if (func->stack[j].slot_type[0] != STACK_SPILL)
2008                                         continue;
2009                                 reg = &func->stack[j].spilled_ptr;
2010                                 if (reg->type != SCALAR_VALUE)
2011                                         continue;
2012                                 reg->precise = true;
2013                         }
2014                 }
2015 }
2016
2017 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2018                                   int spi)
2019 {
2020         struct bpf_verifier_state *st = env->cur_state;
2021         int first_idx = st->first_insn_idx;
2022         int last_idx = env->insn_idx;
2023         struct bpf_func_state *func;
2024         struct bpf_reg_state *reg;
2025         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2026         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2027         bool skip_first = true;
2028         bool new_marks = false;
2029         int i, err;
2030
2031         if (!env->bpf_capable)
2032                 return 0;
2033
2034         func = st->frame[st->curframe];
2035         if (regno >= 0) {
2036                 reg = &func->regs[regno];
2037                 if (reg->type != SCALAR_VALUE) {
2038                         WARN_ONCE(1, "backtracing misuse");
2039                         return -EFAULT;
2040                 }
2041                 if (!reg->precise)
2042                         new_marks = true;
2043                 else
2044                         reg_mask = 0;
2045                 reg->precise = true;
2046         }
2047
2048         while (spi >= 0) {
2049                 if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2050                         stack_mask = 0;
2051                         break;
2052                 }
2053                 reg = &func->stack[spi].spilled_ptr;
2054                 if (reg->type != SCALAR_VALUE) {
2055                         stack_mask = 0;
2056                         break;
2057                 }
2058                 if (!reg->precise)
2059                         new_marks = true;
2060                 else
2061                         stack_mask = 0;
2062                 reg->precise = true;
2063                 break;
2064         }
2065
2066         if (!new_marks)
2067                 return 0;
2068         if (!reg_mask && !stack_mask)
2069                 return 0;
2070         for (;;) {
2071                 DECLARE_BITMAP(mask, 64);
2072                 u32 history = st->jmp_history_cnt;
2073
2074                 if (env->log.level & BPF_LOG_LEVEL)
2075                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2076                 for (i = last_idx;;) {
2077                         if (skip_first) {
2078                                 err = 0;
2079                                 skip_first = false;
2080                         } else {
2081                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2082                         }
2083                         if (err == -ENOTSUPP) {
2084                                 mark_all_scalars_precise(env, st);
2085                                 return 0;
2086                         } else if (err) {
2087                                 return err;
2088                         }
2089                         if (!reg_mask && !stack_mask)
2090                                 /* Found assignment(s) into tracked register in this state.
2091                                  * Since this state is already marked, just return.
2092                                  * Nothing to be tracked further in the parent state.
2093                                  */
2094                                 return 0;
2095                         if (i == first_idx)
2096                                 break;
2097                         i = get_prev_insn_idx(st, i, &history);
2098                         if (i >= env->prog->len) {
2099                                 /* This can happen if backtracking reached insn 0
2100                                  * and there are still reg_mask or stack_mask
2101                                  * to backtrack.
2102                                  * It means the backtracking missed the spot where
2103                                  * particular register was initialized with a constant.
2104                                  */
2105                                 verbose(env, "BUG backtracking idx %d\n", i);
2106                                 WARN_ONCE(1, "verifier backtracking bug");
2107                                 return -EFAULT;
2108                         }
2109                 }
2110                 st = st->parent;
2111                 if (!st)
2112                         break;
2113
2114                 new_marks = false;
2115                 func = st->frame[st->curframe];
2116                 bitmap_from_u64(mask, reg_mask);
2117                 for_each_set_bit(i, mask, 32) {
2118                         reg = &func->regs[i];
2119                         if (reg->type != SCALAR_VALUE) {
2120                                 reg_mask &= ~(1u << i);
2121                                 continue;
2122                         }
2123                         if (!reg->precise)
2124                                 new_marks = true;
2125                         reg->precise = true;
2126                 }
2127
2128                 bitmap_from_u64(mask, stack_mask);
2129                 for_each_set_bit(i, mask, 64) {
2130                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
2131                                 /* the sequence of instructions:
2132                                  * 2: (bf) r3 = r10
2133                                  * 3: (7b) *(u64 *)(r3 -8) = r0
2134                                  * 4: (79) r4 = *(u64 *)(r10 -8)
2135                                  * doesn't contain jmps. It's backtracked
2136                                  * as a single block.
2137                                  * During backtracking insn 3 is not recognized as
2138                                  * stack access, so at the end of backtracking
2139                                  * stack slot fp-8 is still marked in stack_mask.
2140                                  * However the parent state may not have accessed
2141                                  * fp-8 and it's "unallocated" stack space.
2142                                  * In such case fallback to conservative.
2143                                  */
2144                                 mark_all_scalars_precise(env, st);
2145                                 return 0;
2146                         }
2147
2148                         if (func->stack[i].slot_type[0] != STACK_SPILL) {
2149                                 stack_mask &= ~(1ull << i);
2150                                 continue;
2151                         }
2152                         reg = &func->stack[i].spilled_ptr;
2153                         if (reg->type != SCALAR_VALUE) {
2154                                 stack_mask &= ~(1ull << i);
2155                                 continue;
2156                         }
2157                         if (!reg->precise)
2158                                 new_marks = true;
2159                         reg->precise = true;
2160                 }
2161                 if (env->log.level & BPF_LOG_LEVEL) {
2162                         print_verifier_state(env, func);
2163                         verbose(env, "parent %s regs=%x stack=%llx marks\n",
2164                                 new_marks ? "didn't have" : "already had",
2165                                 reg_mask, stack_mask);
2166                 }
2167
2168                 if (!reg_mask && !stack_mask)
2169                         break;
2170                 if (!new_marks)
2171                         break;
2172
2173                 last_idx = st->last_insn_idx;
2174                 first_idx = st->first_insn_idx;
2175         }
2176         return 0;
2177 }
2178
2179 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2180 {
2181         return __mark_chain_precision(env, regno, -1);
2182 }
2183
2184 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2185 {
2186         return __mark_chain_precision(env, -1, spi);
2187 }
2188
2189 static bool is_spillable_regtype(enum bpf_reg_type type)
2190 {
2191         switch (type) {
2192         case PTR_TO_MAP_VALUE:
2193         case PTR_TO_MAP_VALUE_OR_NULL:
2194         case PTR_TO_STACK:
2195         case PTR_TO_CTX:
2196         case PTR_TO_PACKET:
2197         case PTR_TO_PACKET_META:
2198         case PTR_TO_PACKET_END:
2199         case PTR_TO_FLOW_KEYS:
2200         case CONST_PTR_TO_MAP:
2201         case PTR_TO_SOCKET:
2202         case PTR_TO_SOCKET_OR_NULL:
2203         case PTR_TO_SOCK_COMMON:
2204         case PTR_TO_SOCK_COMMON_OR_NULL:
2205         case PTR_TO_TCP_SOCK:
2206         case PTR_TO_TCP_SOCK_OR_NULL:
2207         case PTR_TO_XDP_SOCK:
2208         case PTR_TO_BTF_ID:
2209         case PTR_TO_BTF_ID_OR_NULL:
2210         case PTR_TO_RDONLY_BUF:
2211         case PTR_TO_RDONLY_BUF_OR_NULL:
2212         case PTR_TO_RDWR_BUF:
2213         case PTR_TO_RDWR_BUF_OR_NULL:
2214         case PTR_TO_PERCPU_BTF_ID:
2215         case PTR_TO_MEM:
2216         case PTR_TO_MEM_OR_NULL:
2217                 return true;
2218         default:
2219                 return false;
2220         }
2221 }
2222
2223 /* Does this register contain a constant zero? */
2224 static bool register_is_null(struct bpf_reg_state *reg)
2225 {
2226         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2227 }
2228
2229 static bool register_is_const(struct bpf_reg_state *reg)
2230 {
2231         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2232 }
2233
2234 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2235 {
2236         return tnum_is_unknown(reg->var_off) &&
2237                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2238                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2239                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2240                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2241 }
2242
2243 static bool register_is_bounded(struct bpf_reg_state *reg)
2244 {
2245         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2246 }
2247
2248 static bool __is_pointer_value(bool allow_ptr_leaks,
2249                                const struct bpf_reg_state *reg)
2250 {
2251         if (allow_ptr_leaks)
2252                 return false;
2253
2254         return reg->type != SCALAR_VALUE;
2255 }
2256
2257 static void save_register_state(struct bpf_func_state *state,
2258                                 int spi, struct bpf_reg_state *reg)
2259 {
2260         int i;
2261
2262         state->stack[spi].spilled_ptr = *reg;
2263         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2264
2265         for (i = 0; i < BPF_REG_SIZE; i++)
2266                 state->stack[spi].slot_type[i] = STACK_SPILL;
2267 }
2268
2269 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2270  * stack boundary and alignment are checked in check_mem_access()
2271  */
2272 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2273                                        /* stack frame we're writing to */
2274                                        struct bpf_func_state *state,
2275                                        int off, int size, int value_regno,
2276                                        int insn_idx)
2277 {
2278         struct bpf_func_state *cur; /* state of the current function */
2279         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2280         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2281         struct bpf_reg_state *reg = NULL;
2282
2283         err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
2284                                  state->acquired_refs, true);
2285         if (err)
2286                 return err;
2287         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2288          * so it's aligned access and [off, off + size) are within stack limits
2289          */
2290         if (!env->allow_ptr_leaks &&
2291             state->stack[spi].slot_type[0] == STACK_SPILL &&
2292             size != BPF_REG_SIZE) {
2293                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2294                 return -EACCES;
2295         }
2296
2297         cur = env->cur_state->frame[env->cur_state->curframe];
2298         if (value_regno >= 0)
2299                 reg = &cur->regs[value_regno];
2300         if (!env->bypass_spec_v4) {
2301                 bool sanitize = reg && is_spillable_regtype(reg->type);
2302
2303                 for (i = 0; i < size; i++) {
2304                         if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2305                                 sanitize = true;
2306                                 break;
2307                         }
2308                 }
2309
2310                 if (sanitize)
2311                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2312         }
2313
2314         if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
2315             !register_is_null(reg) && env->bpf_capable) {
2316                 if (dst_reg != BPF_REG_FP) {
2317                         /* The backtracking logic can only recognize explicit
2318                          * stack slot address like [fp - 8]. Other spill of
2319                          * scalar via different register has to be conervative.
2320                          * Backtrack from here and mark all registers as precise
2321                          * that contributed into 'reg' being a constant.
2322                          */
2323                         err = mark_chain_precision(env, value_regno);
2324                         if (err)
2325                                 return err;
2326                 }
2327                 save_register_state(state, spi, reg);
2328         } else if (reg && is_spillable_regtype(reg->type)) {
2329                 /* register containing pointer is being spilled into stack */
2330                 if (size != BPF_REG_SIZE) {
2331                         verbose_linfo(env, insn_idx, "; ");
2332                         verbose(env, "invalid size of register spill\n");
2333                         return -EACCES;
2334                 }
2335                 if (state != cur && reg->type == PTR_TO_STACK) {
2336                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2337                         return -EINVAL;
2338                 }
2339                 save_register_state(state, spi, reg);
2340         } else {
2341                 u8 type = STACK_MISC;
2342
2343                 /* regular write of data into stack destroys any spilled ptr */
2344                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2345                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2346                 if (state->stack[spi].slot_type[0] == STACK_SPILL)
2347                         for (i = 0; i < BPF_REG_SIZE; i++)
2348                                 state->stack[spi].slot_type[i] = STACK_MISC;
2349
2350                 /* only mark the slot as written if all 8 bytes were written
2351                  * otherwise read propagation may incorrectly stop too soon
2352                  * when stack slots are partially written.
2353                  * This heuristic means that read propagation will be
2354                  * conservative, since it will add reg_live_read marks
2355                  * to stack slots all the way to first state when programs
2356                  * writes+reads less than 8 bytes
2357                  */
2358                 if (size == BPF_REG_SIZE)
2359                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2360
2361                 /* when we zero initialize stack slots mark them as such */
2362                 if (reg && register_is_null(reg)) {
2363                         /* backtracking doesn't work for STACK_ZERO yet. */
2364                         err = mark_chain_precision(env, value_regno);
2365                         if (err)
2366                                 return err;
2367                         type = STACK_ZERO;
2368                 }
2369
2370                 /* Mark slots affected by this stack write. */
2371                 for (i = 0; i < size; i++)
2372                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2373                                 type;
2374         }
2375         return 0;
2376 }
2377
2378 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2379  * known to contain a variable offset.
2380  * This function checks whether the write is permitted and conservatively
2381  * tracks the effects of the write, considering that each stack slot in the
2382  * dynamic range is potentially written to.
2383  *
2384  * 'off' includes 'regno->off'.
2385  * 'value_regno' can be -1, meaning that an unknown value is being written to
2386  * the stack.
2387  *
2388  * Spilled pointers in range are not marked as written because we don't know
2389  * what's going to be actually written. This means that read propagation for
2390  * future reads cannot be terminated by this write.
2391  *
2392  * For privileged programs, uninitialized stack slots are considered
2393  * initialized by this write (even though we don't know exactly what offsets
2394  * are going to be written to). The idea is that we don't want the verifier to
2395  * reject future reads that access slots written to through variable offsets.
2396  */
2397 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2398                                      /* func where register points to */
2399                                      struct bpf_func_state *state,
2400                                      int ptr_regno, int off, int size,
2401                                      int value_regno, int insn_idx)
2402 {
2403         struct bpf_func_state *cur; /* state of the current function */
2404         int min_off, max_off;
2405         int i, err;
2406         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2407         bool writing_zero = false;
2408         /* set if the fact that we're writing a zero is used to let any
2409          * stack slots remain STACK_ZERO
2410          */
2411         bool zero_used = false;
2412
2413         cur = env->cur_state->frame[env->cur_state->curframe];
2414         ptr_reg = &cur->regs[ptr_regno];
2415         min_off = ptr_reg->smin_value + off;
2416         max_off = ptr_reg->smax_value + off + size;
2417         if (value_regno >= 0)
2418                 value_reg = &cur->regs[value_regno];
2419         if (value_reg && register_is_null(value_reg))
2420                 writing_zero = true;
2421
2422         err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
2423                                  state->acquired_refs, true);
2424         if (err)
2425                 return err;
2426
2427
2428         /* Variable offset writes destroy any spilled pointers in range. */
2429         for (i = min_off; i < max_off; i++) {
2430                 u8 new_type, *stype;
2431                 int slot, spi;
2432
2433                 slot = -i - 1;
2434                 spi = slot / BPF_REG_SIZE;
2435                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2436
2437                 if (!env->allow_ptr_leaks
2438                                 && *stype != NOT_INIT
2439                                 && *stype != SCALAR_VALUE) {
2440                         /* Reject the write if there's are spilled pointers in
2441                          * range. If we didn't reject here, the ptr status
2442                          * would be erased below (even though not all slots are
2443                          * actually overwritten), possibly opening the door to
2444                          * leaks.
2445                          */
2446                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2447                                 insn_idx, i);
2448                         return -EINVAL;
2449                 }
2450
2451                 /* Erase all spilled pointers. */
2452                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2453
2454                 /* Update the slot type. */
2455                 new_type = STACK_MISC;
2456                 if (writing_zero && *stype == STACK_ZERO) {
2457                         new_type = STACK_ZERO;
2458                         zero_used = true;
2459                 }
2460                 /* If the slot is STACK_INVALID, we check whether it's OK to
2461                  * pretend that it will be initialized by this write. The slot
2462                  * might not actually be written to, and so if we mark it as
2463                  * initialized future reads might leak uninitialized memory.
2464                  * For privileged programs, we will accept such reads to slots
2465                  * that may or may not be written because, if we're reject
2466                  * them, the error would be too confusing.
2467                  */
2468                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2469                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2470                                         insn_idx, i);
2471                         return -EINVAL;
2472                 }
2473                 *stype = new_type;
2474         }
2475         if (zero_used) {
2476                 /* backtracking doesn't work for STACK_ZERO yet. */
2477                 err = mark_chain_precision(env, value_regno);
2478                 if (err)
2479                         return err;
2480         }
2481         return 0;
2482 }
2483
2484 /* When register 'dst_regno' is assigned some values from stack[min_off,
2485  * max_off), we set the register's type according to the types of the
2486  * respective stack slots. If all the stack values are known to be zeros, then
2487  * so is the destination reg. Otherwise, the register is considered to be
2488  * SCALAR. This function does not deal with register filling; the caller must
2489  * ensure that all spilled registers in the stack range have been marked as
2490  * read.
2491  */
2492 static void mark_reg_stack_read(struct bpf_verifier_env *env,
2493                                 /* func where src register points to */
2494                                 struct bpf_func_state *ptr_state,
2495                                 int min_off, int max_off, int dst_regno)
2496 {
2497         struct bpf_verifier_state *vstate = env->cur_state;
2498         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2499         int i, slot, spi;
2500         u8 *stype;
2501         int zeros = 0;
2502
2503         for (i = min_off; i < max_off; i++) {
2504                 slot = -i - 1;
2505                 spi = slot / BPF_REG_SIZE;
2506                 stype = ptr_state->stack[spi].slot_type;
2507                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
2508                         break;
2509                 zeros++;
2510         }
2511         if (zeros == max_off - min_off) {
2512                 /* any access_size read into register is zero extended,
2513                  * so the whole register == const_zero
2514                  */
2515                 __mark_reg_const_zero(&state->regs[dst_regno]);
2516                 /* backtracking doesn't support STACK_ZERO yet,
2517                  * so mark it precise here, so that later
2518                  * backtracking can stop here.
2519                  * Backtracking may not need this if this register
2520                  * doesn't participate in pointer adjustment.
2521                  * Forward propagation of precise flag is not
2522                  * necessary either. This mark is only to stop
2523                  * backtracking. Any register that contributed
2524                  * to const 0 was marked precise before spill.
2525                  */
2526                 state->regs[dst_regno].precise = true;
2527         } else {
2528                 /* have read misc data from the stack */
2529                 mark_reg_unknown(env, state->regs, dst_regno);
2530         }
2531         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2532 }
2533
2534 /* Read the stack at 'off' and put the results into the register indicated by
2535  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2536  * spilled reg.
2537  *
2538  * 'dst_regno' can be -1, meaning that the read value is not going to a
2539  * register.
2540  *
2541  * The access is assumed to be within the current stack bounds.
2542  */
2543 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2544                                       /* func where src register points to */
2545                                       struct bpf_func_state *reg_state,
2546                                       int off, int size, int dst_regno)
2547 {
2548         struct bpf_verifier_state *vstate = env->cur_state;
2549         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2550         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2551         struct bpf_reg_state *reg;
2552         u8 *stype;
2553
2554         stype = reg_state->stack[spi].slot_type;
2555         reg = &reg_state->stack[spi].spilled_ptr;
2556
2557         if (stype[0] == STACK_SPILL) {
2558                 if (size != BPF_REG_SIZE) {
2559                         if (reg->type != SCALAR_VALUE) {
2560                                 verbose_linfo(env, env->insn_idx, "; ");
2561                                 verbose(env, "invalid size of register fill\n");
2562                                 return -EACCES;
2563                         }
2564                         if (dst_regno >= 0) {
2565                                 mark_reg_unknown(env, state->regs, dst_regno);
2566                                 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2567                         }
2568                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2569                         return 0;
2570                 }
2571                 for (i = 1; i < BPF_REG_SIZE; i++) {
2572                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2573                                 verbose(env, "corrupted spill memory\n");
2574                                 return -EACCES;
2575                         }
2576                 }
2577
2578                 if (dst_regno >= 0) {
2579                         /* restore register state from stack */
2580                         state->regs[dst_regno] = *reg;
2581                         /* mark reg as written since spilled pointer state likely
2582                          * has its liveness marks cleared by is_state_visited()
2583                          * which resets stack/reg liveness for state transitions
2584                          */
2585                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2586                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2587                         /* If dst_regno==-1, the caller is asking us whether
2588                          * it is acceptable to use this value as a SCALAR_VALUE
2589                          * (e.g. for XADD).
2590                          * We must not allow unprivileged callers to do that
2591                          * with spilled pointers.
2592                          */
2593                         verbose(env, "leaking pointer from stack off %d\n",
2594                                 off);
2595                         return -EACCES;
2596                 }
2597                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2598         } else {
2599                 u8 type;
2600
2601                 for (i = 0; i < size; i++) {
2602                         type = stype[(slot - i) % BPF_REG_SIZE];
2603                         if (type == STACK_MISC)
2604                                 continue;
2605                         if (type == STACK_ZERO)
2606                                 continue;
2607                         verbose(env, "invalid read from stack off %d+%d size %d\n",
2608                                 off, i, size);
2609                         return -EACCES;
2610                 }
2611                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2612                 if (dst_regno >= 0)
2613                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2614         }
2615         return 0;
2616 }
2617
2618 enum stack_access_src {
2619         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
2620         ACCESS_HELPER = 2,  /* the access is performed by a helper */
2621 };
2622
2623 static int check_stack_range_initialized(struct bpf_verifier_env *env,
2624                                          int regno, int off, int access_size,
2625                                          bool zero_size_allowed,
2626                                          enum stack_access_src type,
2627                                          struct bpf_call_arg_meta *meta);
2628
2629 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2630 {
2631         return cur_regs(env) + regno;
2632 }
2633
2634 /* Read the stack at 'ptr_regno + off' and put the result into the register
2635  * 'dst_regno'.
2636  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2637  * but not its variable offset.
2638  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2639  *
2640  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2641  * filling registers (i.e. reads of spilled register cannot be detected when
2642  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2643  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2644  * offset; for a fixed offset check_stack_read_fixed_off should be used
2645  * instead.
2646  */
2647 static int check_stack_read_var_off(struct bpf_verifier_env *env,
2648                                     int ptr_regno, int off, int size, int dst_regno)
2649 {
2650         /* The state of the source register. */
2651         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2652         struct bpf_func_state *ptr_state = func(env, reg);
2653         int err;
2654         int min_off, max_off;
2655
2656         /* Note that we pass a NULL meta, so raw access will not be permitted.
2657          */
2658         err = check_stack_range_initialized(env, ptr_regno, off, size,
2659                                             false, ACCESS_DIRECT, NULL);
2660         if (err)
2661                 return err;
2662
2663         min_off = reg->smin_value + off;
2664         max_off = reg->smax_value + off;
2665         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2666         return 0;
2667 }
2668
2669 /* check_stack_read dispatches to check_stack_read_fixed_off or
2670  * check_stack_read_var_off.
2671  *
2672  * The caller must ensure that the offset falls within the allocated stack
2673  * bounds.
2674  *
2675  * 'dst_regno' is a register which will receive the value from the stack. It
2676  * can be -1, meaning that the read value is not going to a register.
2677  */
2678 static int check_stack_read(struct bpf_verifier_env *env,
2679                             int ptr_regno, int off, int size,
2680                             int dst_regno)
2681 {
2682         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2683         struct bpf_func_state *state = func(env, reg);
2684         int err;
2685         /* Some accesses are only permitted with a static offset. */
2686         bool var_off = !tnum_is_const(reg->var_off);
2687
2688         /* The offset is required to be static when reads don't go to a
2689          * register, in order to not leak pointers (see
2690          * check_stack_read_fixed_off).
2691          */
2692         if (dst_regno < 0 && var_off) {
2693                 char tn_buf[48];
2694
2695                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2696                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2697                         tn_buf, off, size);
2698                 return -EACCES;
2699         }
2700         /* Variable offset is prohibited for unprivileged mode for simplicity
2701          * since it requires corresponding support in Spectre masking for stack
2702          * ALU. See also retrieve_ptr_limit().
2703          */
2704         if (!env->bypass_spec_v1 && var_off) {
2705                 char tn_buf[48];
2706
2707                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2708                 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
2709                                 ptr_regno, tn_buf);
2710                 return -EACCES;
2711         }
2712
2713         if (!var_off) {
2714                 off += reg->var_off.value;
2715                 err = check_stack_read_fixed_off(env, state, off, size,
2716                                                  dst_regno);
2717         } else {
2718                 /* Variable offset stack reads need more conservative handling
2719                  * than fixed offset ones. Note that dst_regno >= 0 on this
2720                  * branch.
2721                  */
2722                 err = check_stack_read_var_off(env, ptr_regno, off, size,
2723                                                dst_regno);
2724         }
2725         return err;
2726 }
2727
2728
2729 /* check_stack_write dispatches to check_stack_write_fixed_off or
2730  * check_stack_write_var_off.
2731  *
2732  * 'ptr_regno' is the register used as a pointer into the stack.
2733  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2734  * 'value_regno' is the register whose value we're writing to the stack. It can
2735  * be -1, meaning that we're not writing from a register.
2736  *
2737  * The caller must ensure that the offset falls within the maximum stack size.
2738  */
2739 static int check_stack_write(struct bpf_verifier_env *env,
2740                              int ptr_regno, int off, int size,
2741                              int value_regno, int insn_idx)
2742 {
2743         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2744         struct bpf_func_state *state = func(env, reg);
2745         int err;
2746
2747         if (tnum_is_const(reg->var_off)) {
2748                 off += reg->var_off.value;
2749                 err = check_stack_write_fixed_off(env, state, off, size,
2750                                                   value_regno, insn_idx);
2751         } else {
2752                 /* Variable offset stack reads need more conservative handling
2753                  * than fixed offset ones.
2754                  */
2755                 err = check_stack_write_var_off(env, state,
2756                                                 ptr_regno, off, size,
2757                                                 value_regno, insn_idx);
2758         }
2759         return err;
2760 }
2761
2762 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2763                                  int off, int size, enum bpf_access_type type)
2764 {
2765         struct bpf_reg_state *regs = cur_regs(env);
2766         struct bpf_map *map = regs[regno].map_ptr;
2767         u32 cap = bpf_map_flags_to_cap(map);
2768
2769         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2770                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2771                         map->value_size, off, size);
2772                 return -EACCES;
2773         }
2774
2775         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2776                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2777                         map->value_size, off, size);
2778                 return -EACCES;
2779         }
2780
2781         return 0;
2782 }
2783
2784 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
2785 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
2786                               int off, int size, u32 mem_size,
2787                               bool zero_size_allowed)
2788 {
2789         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2790         struct bpf_reg_state *reg;
2791
2792         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
2793                 return 0;
2794
2795         reg = &cur_regs(env)[regno];
2796         switch (reg->type) {
2797         case PTR_TO_MAP_VALUE:
2798                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2799                         mem_size, off, size);
2800                 break;
2801         case PTR_TO_PACKET:
2802         case PTR_TO_PACKET_META:
2803         case PTR_TO_PACKET_END:
2804                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2805                         off, size, regno, reg->id, off, mem_size);
2806                 break;
2807         case PTR_TO_MEM:
2808         default:
2809                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
2810                         mem_size, off, size);
2811         }
2812
2813         return -EACCES;
2814 }
2815
2816 /* check read/write into a memory region with possible variable offset */
2817 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
2818                                    int off, int size, u32 mem_size,
2819                                    bool zero_size_allowed)
2820 {
2821         struct bpf_verifier_state *vstate = env->cur_state;
2822         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2823         struct bpf_reg_state *reg = &state->regs[regno];
2824         int err;
2825
2826         /* We may have adjusted the register pointing to memory region, so we
2827          * need to try adding each of min_value and max_value to off
2828          * to make sure our theoretical access will be safe.
2829          */
2830         if (env->log.level & BPF_LOG_LEVEL)
2831                 print_verifier_state(env, state);
2832
2833         /* The minimum value is only important with signed
2834          * comparisons where we can't assume the floor of a
2835          * value is 0.  If we are using signed variables for our
2836          * index'es we need to make sure that whatever we use
2837          * will have a set floor within our range.
2838          */
2839         if (reg->smin_value < 0 &&
2840             (reg->smin_value == S64_MIN ||
2841              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2842               reg->smin_value + off < 0)) {
2843                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2844                         regno);
2845                 return -EACCES;
2846         }
2847         err = __check_mem_access(env, regno, reg->smin_value + off, size,
2848                                  mem_size, zero_size_allowed);
2849         if (err) {
2850                 verbose(env, "R%d min value is outside of the allowed memory range\n",
2851                         regno);
2852                 return err;
2853         }
2854
2855         /* If we haven't set a max value then we need to bail since we can't be
2856          * sure we won't do bad things.
2857          * If reg->umax_value + off could overflow, treat that as unbounded too.
2858          */
2859         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2860                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
2861                         regno);
2862                 return -EACCES;
2863         }
2864         err = __check_mem_access(env, regno, reg->umax_value + off, size,
2865                                  mem_size, zero_size_allowed);
2866         if (err) {
2867                 verbose(env, "R%d max value is outside of the allowed memory range\n",
2868                         regno);
2869                 return err;
2870         }
2871
2872         return 0;
2873 }
2874
2875 /* check read/write into a map element with possible variable offset */
2876 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2877                             int off, int size, bool zero_size_allowed)
2878 {
2879         struct bpf_verifier_state *vstate = env->cur_state;
2880         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2881         struct bpf_reg_state *reg = &state->regs[regno];
2882         struct bpf_map *map = reg->map_ptr;
2883         int err;
2884
2885         err = check_mem_region_access(env, regno, off, size, map->value_size,
2886                                       zero_size_allowed);
2887         if (err)
2888                 return err;
2889
2890         if (map_value_has_spin_lock(map)) {
2891                 u32 lock = map->spin_lock_off;
2892
2893                 /* if any part of struct bpf_spin_lock can be touched by
2894                  * load/store reject this program.
2895                  * To check that [x1, x2) overlaps with [y1, y2)
2896                  * it is sufficient to check x1 < y2 && y1 < x2.
2897                  */
2898                 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2899                      lock < reg->umax_value + off + size) {
2900                         verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2901                         return -EACCES;
2902                 }
2903         }
2904         return err;
2905 }
2906
2907 #define MAX_PACKET_OFF 0xffff
2908
2909 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2910 {
2911         return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2912 }
2913
2914 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2915                                        const struct bpf_call_arg_meta *meta,
2916                                        enum bpf_access_type t)
2917 {
2918         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2919
2920         switch (prog_type) {
2921         /* Program types only with direct read access go here! */
2922         case BPF_PROG_TYPE_LWT_IN:
2923         case BPF_PROG_TYPE_LWT_OUT:
2924         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2925         case BPF_PROG_TYPE_SK_REUSEPORT:
2926         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2927         case BPF_PROG_TYPE_CGROUP_SKB:
2928                 if (t == BPF_WRITE)
2929                         return false;
2930                 fallthrough;
2931
2932         /* Program types with direct read + write access go here! */
2933         case BPF_PROG_TYPE_SCHED_CLS:
2934         case BPF_PROG_TYPE_SCHED_ACT:
2935         case BPF_PROG_TYPE_XDP:
2936         case BPF_PROG_TYPE_LWT_XMIT:
2937         case BPF_PROG_TYPE_SK_SKB:
2938         case BPF_PROG_TYPE_SK_MSG:
2939                 if (meta)
2940                         return meta->pkt_access;
2941
2942                 env->seen_direct_write = true;
2943                 return true;
2944
2945         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2946                 if (t == BPF_WRITE)
2947                         env->seen_direct_write = true;
2948
2949                 return true;
2950
2951         default:
2952                 return false;
2953         }
2954 }
2955
2956 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2957                                int size, bool zero_size_allowed)
2958 {
2959         struct bpf_reg_state *regs = cur_regs(env);
2960         struct bpf_reg_state *reg = &regs[regno];
2961         int err;
2962
2963         /* We may have added a variable offset to the packet pointer; but any
2964          * reg->range we have comes after that.  We are only checking the fixed
2965          * offset.
2966          */
2967
2968         /* We don't allow negative numbers, because we aren't tracking enough
2969          * detail to prove they're safe.
2970          */
2971         if (reg->smin_value < 0) {
2972                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2973                         regno);
2974                 return -EACCES;
2975         }
2976         err = __check_mem_access(env, regno, off, size, reg->range,
2977                                  zero_size_allowed);
2978         if (err) {
2979                 verbose(env, "R%d offset is outside of the packet\n", regno);
2980                 return err;
2981         }
2982
2983         /* __check_mem_access has made sure "off + size - 1" is within u16.
2984          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2985          * otherwise find_good_pkt_pointers would have refused to set range info
2986          * that __check_mem_access would have rejected this pkt access.
2987          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2988          */
2989         env->prog->aux->max_pkt_offset =
2990                 max_t(u32, env->prog->aux->max_pkt_offset,
2991                       off + reg->umax_value + size - 1);
2992
2993         return err;
2994 }
2995
2996 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2997 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
2998                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
2999                             u32 *btf_id)
3000 {
3001         struct bpf_insn_access_aux info = {
3002                 .reg_type = *reg_type,
3003                 .log = &env->log,
3004         };
3005
3006         if (env->ops->is_valid_access &&
3007             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3008                 /* A non zero info.ctx_field_size indicates that this field is a
3009                  * candidate for later verifier transformation to load the whole
3010                  * field and then apply a mask when accessed with a narrower
3011                  * access than actual ctx access size. A zero info.ctx_field_size
3012                  * will only allow for whole field access and rejects any other
3013                  * type of narrower access.
3014                  */
3015                 *reg_type = info.reg_type;
3016
3017                 if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
3018                         *btf_id = info.btf_id;
3019                 else
3020                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3021                 /* remember the offset of last byte accessed in ctx */
3022                 if (env->prog->aux->max_ctx_offset < off + size)
3023                         env->prog->aux->max_ctx_offset = off + size;
3024                 return 0;
3025         }
3026
3027         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3028         return -EACCES;
3029 }
3030
3031 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3032                                   int size)
3033 {
3034         if (size < 0 || off < 0 ||
3035             (u64)off + size > sizeof(struct bpf_flow_keys)) {
3036                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
3037                         off, size);
3038                 return -EACCES;
3039         }
3040         return 0;
3041 }
3042
3043 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3044                              u32 regno, int off, int size,
3045                              enum bpf_access_type t)
3046 {
3047         struct bpf_reg_state *regs = cur_regs(env);
3048         struct bpf_reg_state *reg = &regs[regno];
3049         struct bpf_insn_access_aux info = {};
3050         bool valid;
3051
3052         if (reg->smin_value < 0) {
3053                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3054                         regno);
3055                 return -EACCES;
3056         }
3057
3058         switch (reg->type) {
3059         case PTR_TO_SOCK_COMMON:
3060                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3061                 break;
3062         case PTR_TO_SOCKET:
3063                 valid = bpf_sock_is_valid_access(off, size, t, &info);
3064                 break;
3065         case PTR_TO_TCP_SOCK:
3066                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3067                 break;
3068         case PTR_TO_XDP_SOCK:
3069                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3070                 break;
3071         default:
3072                 valid = false;
3073         }
3074
3075
3076         if (valid) {
3077                 env->insn_aux_data[insn_idx].ctx_field_size =
3078                         info.ctx_field_size;
3079                 return 0;
3080         }
3081
3082         verbose(env, "R%d invalid %s access off=%d size=%d\n",
3083                 regno, reg_type_str[reg->type], off, size);
3084
3085         return -EACCES;
3086 }
3087
3088 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3089 {
3090         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3091 }
3092
3093 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3094 {
3095         const struct bpf_reg_state *reg = reg_state(env, regno);
3096
3097         return reg->type == PTR_TO_CTX;
3098 }
3099
3100 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3101 {
3102         const struct bpf_reg_state *reg = reg_state(env, regno);
3103
3104         return type_is_sk_pointer(reg->type);
3105 }
3106
3107 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3108 {
3109         const struct bpf_reg_state *reg = reg_state(env, regno);
3110
3111         return type_is_pkt_pointer(reg->type);
3112 }
3113
3114 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3115 {
3116         const struct bpf_reg_state *reg = reg_state(env, regno);
3117
3118         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3119         return reg->type == PTR_TO_FLOW_KEYS;
3120 }
3121
3122 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3123                                    const struct bpf_reg_state *reg,
3124                                    int off, int size, bool strict)
3125 {
3126         struct tnum reg_off;
3127         int ip_align;
3128
3129         /* Byte size accesses are always allowed. */
3130         if (!strict || size == 1)
3131                 return 0;
3132
3133         /* For platforms that do not have a Kconfig enabling
3134          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3135          * NET_IP_ALIGN is universally set to '2'.  And on platforms
3136          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3137          * to this code only in strict mode where we want to emulate
3138          * the NET_IP_ALIGN==2 checking.  Therefore use an
3139          * unconditional IP align value of '2'.
3140          */
3141         ip_align = 2;
3142
3143         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3144         if (!tnum_is_aligned(reg_off, size)) {
3145                 char tn_buf[48];
3146
3147                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3148                 verbose(env,
3149                         "misaligned packet access off %d+%s+%d+%d size %d\n",
3150                         ip_align, tn_buf, reg->off, off, size);
3151                 return -EACCES;
3152         }
3153
3154         return 0;
3155 }
3156
3157 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3158                                        const struct bpf_reg_state *reg,
3159                                        const char *pointer_desc,
3160                                        int off, int size, bool strict)
3161 {
3162         struct tnum reg_off;
3163
3164         /* Byte size accesses are always allowed. */
3165         if (!strict || size == 1)
3166                 return 0;
3167
3168         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3169         if (!tnum_is_aligned(reg_off, size)) {
3170                 char tn_buf[48];
3171
3172                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3173                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3174                         pointer_desc, tn_buf, reg->off, off, size);
3175                 return -EACCES;
3176         }
3177
3178         return 0;
3179 }
3180
3181 static int check_ptr_alignment(struct bpf_verifier_env *env,
3182                                const struct bpf_reg_state *reg, int off,
3183                                int size, bool strict_alignment_once)
3184 {
3185         bool strict = env->strict_alignment || strict_alignment_once;
3186         const char *pointer_desc = "";
3187
3188         switch (reg->type) {
3189         case PTR_TO_PACKET:
3190         case PTR_TO_PACKET_META:
3191                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3192                  * right in front, treat it the very same way.
3193                  */
3194                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3195         case PTR_TO_FLOW_KEYS:
3196                 pointer_desc = "flow keys ";
3197                 break;
3198         case PTR_TO_MAP_VALUE:
3199                 pointer_desc = "value ";
3200                 break;
3201         case PTR_TO_CTX:
3202                 pointer_desc = "context ";
3203                 break;
3204         case PTR_TO_STACK:
3205                 pointer_desc = "stack ";
3206                 /* The stack spill tracking logic in check_stack_write_fixed_off()
3207                  * and check_stack_read_fixed_off() relies on stack accesses being
3208                  * aligned.
3209                  */
3210                 strict = true;
3211                 break;
3212         case PTR_TO_SOCKET:
3213                 pointer_desc = "sock ";
3214                 break;
3215         case PTR_TO_SOCK_COMMON:
3216                 pointer_desc = "sock_common ";
3217                 break;
3218         case PTR_TO_TCP_SOCK:
3219                 pointer_desc = "tcp_sock ";
3220                 break;
3221         case PTR_TO_XDP_SOCK:
3222                 pointer_desc = "xdp_sock ";
3223                 break;
3224         default:
3225                 break;
3226         }
3227         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3228                                            strict);
3229 }
3230
3231 static int update_stack_depth(struct bpf_verifier_env *env,
3232                               const struct bpf_func_state *func,
3233                               int off)
3234 {
3235         u16 stack = env->subprog_info[func->subprogno].stack_depth;
3236
3237         if (stack >= -off)
3238                 return 0;
3239
3240         /* update known max for given subprogram */
3241         env->subprog_info[func->subprogno].stack_depth = -off;
3242         return 0;
3243 }
3244
3245 /* starting from main bpf function walk all instructions of the function
3246  * and recursively walk all callees that given function can call.
3247  * Ignore jump and exit insns.
3248  * Since recursion is prevented by check_cfg() this algorithm
3249  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3250  */
3251 static int check_max_stack_depth(struct bpf_verifier_env *env)
3252 {
3253         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3254         struct bpf_subprog_info *subprog = env->subprog_info;
3255         struct bpf_insn *insn = env->prog->insnsi;
3256         bool tail_call_reachable = false;
3257         int ret_insn[MAX_CALL_FRAMES];
3258         int ret_prog[MAX_CALL_FRAMES];
3259         int j;
3260
3261 process_func:
3262         /* protect against potential stack overflow that might happen when
3263          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3264          * depth for such case down to 256 so that the worst case scenario
3265          * would result in 8k stack size (32 which is tailcall limit * 256 =
3266          * 8k).
3267          *
3268          * To get the idea what might happen, see an example:
3269          * func1 -> sub rsp, 128
3270          *  subfunc1 -> sub rsp, 256
3271          *  tailcall1 -> add rsp, 256
3272          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3273          *   subfunc2 -> sub rsp, 64
3274          *   subfunc22 -> sub rsp, 128
3275          *   tailcall2 -> add rsp, 128
3276          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3277          *
3278          * tailcall will unwind the current stack frame but it will not get rid
3279          * of caller's stack as shown on the example above.
3280          */
3281         if (idx && subprog[idx].has_tail_call && depth >= 256) {
3282                 verbose(env,
3283                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3284                         depth);
3285                 return -EACCES;
3286         }
3287         /* round up to 32-bytes, since this is granularity
3288          * of interpreter stack size
3289          */
3290         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3291         if (depth > MAX_BPF_STACK) {
3292                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
3293                         frame + 1, depth);
3294                 return -EACCES;
3295         }
3296 continue_func:
3297         subprog_end = subprog[idx + 1].start;
3298         for (; i < subprog_end; i++) {
3299                 if (insn[i].code != (BPF_JMP | BPF_CALL))
3300                         continue;
3301                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
3302                         continue;
3303                 /* remember insn and function to return to */
3304                 ret_insn[frame] = i + 1;
3305                 ret_prog[frame] = idx;
3306
3307                 /* find the callee */
3308                 i = i + insn[i].imm + 1;
3309                 idx = find_subprog(env, i);
3310                 if (idx < 0) {
3311                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3312                                   i);
3313                         return -EFAULT;
3314                 }
3315
3316                 if (subprog[idx].has_tail_call)
3317                         tail_call_reachable = true;
3318
3319                 frame++;
3320                 if (frame >= MAX_CALL_FRAMES) {
3321                         verbose(env, "the call stack of %d frames is too deep !\n",
3322                                 frame);
3323                         return -E2BIG;
3324                 }
3325                 goto process_func;
3326         }
3327         /* if tail call got detected across bpf2bpf calls then mark each of the
3328          * currently present subprog frames as tail call reachable subprogs;
3329          * this info will be utilized by JIT so that we will be preserving the
3330          * tail call counter throughout bpf2bpf calls combined with tailcalls
3331          */
3332         if (tail_call_reachable)
3333                 for (j = 0; j < frame; j++)
3334                         subprog[ret_prog[j]].tail_call_reachable = true;
3335         if (subprog[0].tail_call_reachable)
3336                 env->prog->aux->tail_call_reachable = true;
3337
3338         /* end of for() loop means the last insn of the 'subprog'
3339          * was reached. Doesn't matter whether it was JA or EXIT
3340          */
3341         if (frame == 0)
3342                 return 0;
3343         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3344         frame--;
3345         i = ret_insn[frame];
3346         idx = ret_prog[frame];
3347         goto continue_func;
3348 }
3349
3350 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3351 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3352                                   const struct bpf_insn *insn, int idx)
3353 {
3354         int start = idx + insn->imm + 1, subprog;
3355
3356         subprog = find_subprog(env, start);
3357         if (subprog < 0) {
3358                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3359                           start);
3360                 return -EFAULT;
3361         }
3362         return env->subprog_info[subprog].stack_depth;
3363 }
3364 #endif
3365
3366 int check_ctx_reg(struct bpf_verifier_env *env,
3367                   const struct bpf_reg_state *reg, int regno)
3368 {
3369         /* Access to ctx or passing it to a helper is only allowed in
3370          * its original, unmodified form.
3371          */
3372
3373         if (reg->off) {
3374                 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3375                         regno, reg->off);
3376                 return -EACCES;
3377         }
3378
3379         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3380                 char tn_buf[48];
3381
3382                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3383                 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3384                 return -EACCES;
3385         }
3386
3387         return 0;
3388 }
3389
3390 static int __check_buffer_access(struct bpf_verifier_env *env,
3391                                  const char *buf_info,
3392                                  const struct bpf_reg_state *reg,
3393                                  int regno, int off, int size)
3394 {
3395         if (off < 0) {
3396                 verbose(env,
3397                         "R%d invalid %s buffer access: off=%d, size=%d\n",
3398                         regno, buf_info, off, size);
3399                 return -EACCES;
3400         }
3401         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3402                 char tn_buf[48];
3403
3404                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3405                 verbose(env,
3406                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3407                         regno, off, tn_buf);
3408                 return -EACCES;
3409         }
3410
3411         return 0;
3412 }
3413
3414 static int check_tp_buffer_access(struct bpf_verifier_env *env,
3415                                   const struct bpf_reg_state *reg,
3416                                   int regno, int off, int size)
3417 {
3418         int err;
3419
3420         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3421         if (err)
3422                 return err;
3423
3424         if (off + size > env->prog->aux->max_tp_access)
3425                 env->prog->aux->max_tp_access = off + size;
3426
3427         return 0;
3428 }
3429
3430 static int check_buffer_access(struct bpf_verifier_env *env,
3431                                const struct bpf_reg_state *reg,
3432                                int regno, int off, int size,
3433                                bool zero_size_allowed,
3434                                const char *buf_info,
3435                                u32 *max_access)
3436 {
3437         int err;
3438
3439         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3440         if (err)
3441                 return err;
3442
3443         if (off + size > *max_access)
3444                 *max_access = off + size;
3445
3446         return 0;
3447 }
3448
3449 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
3450 static void zext_32_to_64(struct bpf_reg_state *reg)
3451 {
3452         reg->var_off = tnum_subreg(reg->var_off);
3453         __reg_assign_32_into_64(reg);
3454 }
3455
3456 /* truncate register to smaller size (in bytes)
3457  * must be called with size < BPF_REG_SIZE
3458  */
3459 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3460 {
3461         u64 mask;
3462
3463         /* clear high bits in bit representation */
3464         reg->var_off = tnum_cast(reg->var_off, size);
3465
3466         /* fix arithmetic bounds */
3467         mask = ((u64)1 << (size * 8)) - 1;
3468         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3469                 reg->umin_value &= mask;
3470                 reg->umax_value &= mask;
3471         } else {
3472                 reg->umin_value = 0;
3473                 reg->umax_value = mask;
3474         }
3475         reg->smin_value = reg->umin_value;
3476         reg->smax_value = reg->umax_value;
3477
3478         /* If size is smaller than 32bit register the 32bit register
3479          * values are also truncated so we push 64-bit bounds into
3480          * 32-bit bounds. Above were truncated < 32-bits already.
3481          */
3482         if (size >= 4)
3483                 return;
3484         __reg_combine_64_into_32(reg);
3485 }
3486
3487 static bool bpf_map_is_rdonly(const struct bpf_map *map)
3488 {
3489         /* A map is considered read-only if the following condition are true:
3490          *
3491          * 1) BPF program side cannot change any of the map content. The
3492          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
3493          *    and was set at map creation time.
3494          * 2) The map value(s) have been initialized from user space by a
3495          *    loader and then "frozen", such that no new map update/delete
3496          *    operations from syscall side are possible for the rest of
3497          *    the map's lifetime from that point onwards.
3498          * 3) Any parallel/pending map update/delete operations from syscall
3499          *    side have been completed. Only after that point, it's safe to
3500          *    assume that map value(s) are immutable.
3501          */
3502         return (map->map_flags & BPF_F_RDONLY_PROG) &&
3503                READ_ONCE(map->frozen) &&
3504                !bpf_map_write_active(map);
3505 }
3506
3507 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3508 {
3509         void *ptr;
3510         u64 addr;
3511         int err;
3512
3513         err = map->ops->map_direct_value_addr(map, &addr, off);
3514         if (err)
3515                 return err;
3516         ptr = (void *)(long)addr + off;
3517
3518         switch (size) {
3519         case sizeof(u8):
3520                 *val = (u64)*(u8 *)ptr;
3521                 break;
3522         case sizeof(u16):
3523                 *val = (u64)*(u16 *)ptr;
3524                 break;
3525         case sizeof(u32):
3526                 *val = (u64)*(u32 *)ptr;
3527                 break;
3528         case sizeof(u64):
3529                 *val = *(u64 *)ptr;
3530                 break;
3531         default:
3532                 return -EINVAL;
3533         }
3534         return 0;
3535 }
3536
3537 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
3538                                    struct bpf_reg_state *regs,
3539                                    int regno, int off, int size,
3540                                    enum bpf_access_type atype,
3541                                    int value_regno)
3542 {
3543         struct bpf_reg_state *reg = regs + regno;
3544         const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3545         const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3546         u32 btf_id;
3547         int ret;
3548
3549         if (off < 0) {
3550                 verbose(env,
3551                         "R%d is ptr_%s invalid negative access: off=%d\n",
3552                         regno, tname, off);
3553                 return -EACCES;
3554         }
3555         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3556                 char tn_buf[48];
3557
3558                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3559                 verbose(env,
3560                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
3561                         regno, tname, off, tn_buf);
3562                 return -EACCES;
3563         }
3564
3565         if (env->ops->btf_struct_access) {
3566                 ret = env->ops->btf_struct_access(&env->log, t, off, size,
3567                                                   atype, &btf_id);
3568         } else {
3569                 if (atype != BPF_READ) {
3570                         verbose(env, "only read is supported\n");
3571                         return -EACCES;
3572                 }
3573
3574                 ret = btf_struct_access(&env->log, t, off, size, atype,
3575                                         &btf_id);
3576         }
3577
3578         if (ret < 0)
3579                 return ret;
3580
3581         if (atype == BPF_READ && value_regno >= 0)
3582                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3583
3584         return 0;
3585 }
3586
3587 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
3588                                    struct bpf_reg_state *regs,
3589                                    int regno, int off, int size,
3590                                    enum bpf_access_type atype,
3591                                    int value_regno)
3592 {
3593         struct bpf_reg_state *reg = regs + regno;
3594         struct bpf_map *map = reg->map_ptr;
3595         const struct btf_type *t;
3596         const char *tname;
3597         u32 btf_id;
3598         int ret;
3599
3600         if (!btf_vmlinux) {
3601                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3602                 return -ENOTSUPP;
3603         }
3604
3605         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3606                 verbose(env, "map_ptr access not supported for map type %d\n",
3607                         map->map_type);
3608                 return -ENOTSUPP;
3609         }
3610
3611         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3612         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3613
3614         if (!env->allow_ptr_to_map_access) {
3615                 verbose(env,
3616                         "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
3617                         tname);
3618                 return -EPERM;
3619         }
3620
3621         if (off < 0) {
3622                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
3623                         regno, tname, off);
3624                 return -EACCES;
3625         }
3626
3627         if (atype != BPF_READ) {
3628                 verbose(env, "only read from %s is supported\n", tname);
3629                 return -EACCES;
3630         }
3631
3632         ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3633         if (ret < 0)
3634                 return ret;
3635
3636         if (value_regno >= 0)
3637                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3638
3639         return 0;
3640 }
3641
3642 /* Check that the stack access at the given offset is within bounds. The
3643  * maximum valid offset is -1.
3644  *
3645  * The minimum valid offset is -MAX_BPF_STACK for writes, and
3646  * -state->allocated_stack for reads.
3647  */
3648 static int check_stack_slot_within_bounds(int off,
3649                                           struct bpf_func_state *state,
3650                                           enum bpf_access_type t)
3651 {
3652         int min_valid_off;
3653
3654         if (t == BPF_WRITE)
3655                 min_valid_off = -MAX_BPF_STACK;
3656         else
3657                 min_valid_off = -state->allocated_stack;
3658
3659         if (off < min_valid_off || off > -1)
3660                 return -EACCES;
3661         return 0;
3662 }
3663
3664 /* Check that the stack access at 'regno + off' falls within the maximum stack
3665  * bounds.
3666  *
3667  * 'off' includes `regno->offset`, but not its dynamic part (if any).
3668  */
3669 static int check_stack_access_within_bounds(
3670                 struct bpf_verifier_env *env,
3671                 int regno, int off, int access_size,
3672                 enum stack_access_src src, enum bpf_access_type type)
3673 {
3674         struct bpf_reg_state *regs = cur_regs(env);
3675         struct bpf_reg_state *reg = regs + regno;
3676         struct bpf_func_state *state = func(env, reg);
3677         int min_off, max_off;
3678         int err;
3679         char *err_extra;
3680
3681         if (src == ACCESS_HELPER)
3682                 /* We don't know if helpers are reading or writing (or both). */
3683                 err_extra = " indirect access to";
3684         else if (type == BPF_READ)
3685                 err_extra = " read from";
3686         else
3687                 err_extra = " write to";
3688
3689         if (tnum_is_const(reg->var_off)) {
3690                 min_off = reg->var_off.value + off;
3691                 if (access_size > 0)
3692                         max_off = min_off + access_size - 1;
3693                 else
3694                         max_off = min_off;
3695         } else {
3696                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3697                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
3698                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
3699                                 err_extra, regno);
3700                         return -EACCES;
3701                 }
3702                 min_off = reg->smin_value + off;
3703                 if (access_size > 0)
3704                         max_off = reg->smax_value + off + access_size - 1;
3705                 else
3706                         max_off = min_off;
3707         }
3708
3709         err = check_stack_slot_within_bounds(min_off, state, type);
3710         if (!err)
3711                 err = check_stack_slot_within_bounds(max_off, state, type);
3712
3713         if (err) {
3714                 if (tnum_is_const(reg->var_off)) {
3715                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
3716                                 err_extra, regno, off, access_size);
3717                 } else {
3718                         char tn_buf[48];
3719
3720                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3721                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
3722                                 err_extra, regno, tn_buf, access_size);
3723                 }
3724         }
3725         return err;
3726 }
3727
3728 /* check whether memory at (regno + off) is accessible for t = (read | write)
3729  * if t==write, value_regno is a register which value is stored into memory
3730  * if t==read, value_regno is a register which will receive the value from memory
3731  * if t==write && value_regno==-1, some unknown value is stored into memory
3732  * if t==read && value_regno==-1, don't care what we read from memory
3733  */
3734 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
3735                             int off, int bpf_size, enum bpf_access_type t,
3736                             int value_regno, bool strict_alignment_once)
3737 {
3738         struct bpf_reg_state *regs = cur_regs(env);
3739         struct bpf_reg_state *reg = regs + regno;
3740         struct bpf_func_state *state;
3741         int size, err = 0;
3742
3743         size = bpf_size_to_bytes(bpf_size);
3744         if (size < 0)
3745                 return size;
3746
3747         /* alignment checks will add in reg->off themselves */
3748         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3749         if (err)
3750                 return err;
3751
3752         /* for access checks, reg->off is just part of off */
3753         off += reg->off;
3754
3755         if (reg->type == PTR_TO_MAP_VALUE) {
3756                 if (t == BPF_WRITE && value_regno >= 0 &&
3757                     is_pointer_value(env, value_regno)) {
3758                         verbose(env, "R%d leaks addr into map\n", value_regno);
3759                         return -EACCES;
3760                 }
3761                 err = check_map_access_type(env, regno, off, size, t);
3762                 if (err)
3763                         return err;
3764                 err = check_map_access(env, regno, off, size, false);
3765                 if (!err && t == BPF_READ && value_regno >= 0) {
3766                         struct bpf_map *map = reg->map_ptr;
3767
3768                         /* if map is read-only, track its contents as scalars */
3769                         if (tnum_is_const(reg->var_off) &&
3770                             bpf_map_is_rdonly(map) &&
3771                             map->ops->map_direct_value_addr) {
3772                                 int map_off = off + reg->var_off.value;
3773                                 u64 val = 0;
3774
3775                                 err = bpf_map_direct_read(map, map_off, size,
3776                                                           &val);
3777                                 if (err)
3778                                         return err;
3779
3780                                 regs[value_regno].type = SCALAR_VALUE;
3781                                 __mark_reg_known(&regs[value_regno], val);
3782                         } else {
3783                                 mark_reg_unknown(env, regs, value_regno);
3784                         }
3785                 }
3786         } else if (reg->type == PTR_TO_MEM) {
3787                 if (t == BPF_WRITE && value_regno >= 0 &&
3788                     is_pointer_value(env, value_regno)) {
3789                         verbose(env, "R%d leaks addr into mem\n", value_regno);
3790                         return -EACCES;
3791                 }
3792                 err = check_mem_region_access(env, regno, off, size,
3793                                               reg->mem_size, false);
3794                 if (!err && t == BPF_READ && value_regno >= 0)
3795                         mark_reg_unknown(env, regs, value_regno);
3796         } else if (reg->type == PTR_TO_CTX) {
3797                 enum bpf_reg_type reg_type = SCALAR_VALUE;
3798                 u32 btf_id = 0;
3799
3800                 if (t == BPF_WRITE && value_regno >= 0 &&
3801                     is_pointer_value(env, value_regno)) {
3802                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
3803                         return -EACCES;
3804                 }
3805
3806                 err = check_ctx_reg(env, reg, regno);
3807                 if (err < 0)
3808                         return err;
3809
3810                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3811                 if (err)
3812                         verbose_linfo(env, insn_idx, "; ");
3813                 if (!err && t == BPF_READ && value_regno >= 0) {
3814                         /* ctx access returns either a scalar, or a
3815                          * PTR_TO_PACKET[_META,_END]. In the latter
3816                          * case, we know the offset is zero.
3817                          */
3818                         if (reg_type == SCALAR_VALUE) {
3819                                 mark_reg_unknown(env, regs, value_regno);
3820                         } else {
3821                                 mark_reg_known_zero(env, regs,
3822                                                     value_regno);
3823                                 if (reg_type_may_be_null(reg_type))
3824                                         regs[value_regno].id = ++env->id_gen;
3825                                 /* A load of ctx field could have different
3826                                  * actual load size with the one encoded in the
3827                                  * insn. When the dst is PTR, it is for sure not
3828                                  * a sub-register.
3829                                  */
3830                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3831                                 if (reg_type == PTR_TO_BTF_ID ||
3832                                     reg_type == PTR_TO_BTF_ID_OR_NULL)
3833                                         regs[value_regno].btf_id = btf_id;
3834                         }
3835                         regs[value_regno].type = reg_type;
3836                 }
3837
3838         } else if (reg->type == PTR_TO_STACK) {
3839                 /* Basic bounds checks. */
3840                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3841                 if (err)
3842                         return err;
3843
3844                 state = func(env, reg);
3845                 err = update_stack_depth(env, state, off);
3846                 if (err)
3847                         return err;
3848
3849                 if (t == BPF_READ)
3850                         err = check_stack_read(env, regno, off, size,
3851                                                value_regno);
3852                 else
3853                         err = check_stack_write(env, regno, off, size,
3854                                                 value_regno, insn_idx);
3855         } else if (reg_is_pkt_pointer(reg)) {
3856                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3857                         verbose(env, "cannot write into packet\n");
3858                         return -EACCES;
3859                 }
3860                 if (t == BPF_WRITE && value_regno >= 0 &&
3861                     is_pointer_value(env, value_regno)) {
3862                         verbose(env, "R%d leaks addr into packet\n",
3863                                 value_regno);
3864                         return -EACCES;
3865                 }
3866                 err = check_packet_access(env, regno, off, size, false);
3867                 if (!err && t == BPF_READ && value_regno >= 0)
3868                         mark_reg_unknown(env, regs, value_regno);
3869         } else if (reg->type == PTR_TO_FLOW_KEYS) {
3870                 if (t == BPF_WRITE && value_regno >= 0 &&
3871                     is_pointer_value(env, value_regno)) {
3872                         verbose(env, "R%d leaks addr into flow keys\n",
3873                                 value_regno);
3874                         return -EACCES;
3875                 }
3876
3877                 err = check_flow_keys_access(env, off, size);
3878                 if (!err && t == BPF_READ && value_regno >= 0)
3879                         mark_reg_unknown(env, regs, value_regno);
3880         } else if (type_is_sk_pointer(reg->type)) {
3881                 if (t == BPF_WRITE) {
3882                         verbose(env, "R%d cannot write into %s\n",
3883                                 regno, reg_type_str[reg->type]);
3884                         return -EACCES;
3885                 }
3886                 err = check_sock_access(env, insn_idx, regno, off, size, t);
3887                 if (!err && value_regno >= 0)
3888                         mark_reg_unknown(env, regs, value_regno);
3889         } else if (reg->type == PTR_TO_TP_BUFFER) {
3890                 err = check_tp_buffer_access(env, reg, regno, off, size);
3891                 if (!err && t == BPF_READ && value_regno >= 0)
3892                         mark_reg_unknown(env, regs, value_regno);
3893         } else if (reg->type == PTR_TO_BTF_ID) {
3894                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
3895                                               value_regno);
3896         } else if (reg->type == CONST_PTR_TO_MAP) {
3897                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
3898                                               value_regno);
3899         } else if (reg->type == PTR_TO_RDONLY_BUF) {
3900                 if (t == BPF_WRITE) {
3901                         verbose(env, "R%d cannot write into %s\n",
3902                                 regno, reg_type_str[reg->type]);
3903                         return -EACCES;
3904                 }
3905                 err = check_buffer_access(env, reg, regno, off, size, false,
3906                                           "rdonly",
3907                                           &env->prog->aux->max_rdonly_access);
3908                 if (!err && value_regno >= 0)
3909                         mark_reg_unknown(env, regs, value_regno);
3910         } else if (reg->type == PTR_TO_RDWR_BUF) {
3911                 err = check_buffer_access(env, reg, regno, off, size, false,
3912                                           "rdwr",
3913                                           &env->prog->aux->max_rdwr_access);
3914                 if (!err && t == BPF_READ && value_regno >= 0)
3915                         mark_reg_unknown(env, regs, value_regno);
3916         } else {
3917                 verbose(env, "R%d invalid mem access '%s'\n", regno,
3918                         reg_type_str[reg->type]);
3919                 return -EACCES;
3920         }
3921
3922         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
3923             regs[value_regno].type == SCALAR_VALUE) {
3924                 /* b/h/w load zero-extends, mark upper bits as known 0 */
3925                 coerce_reg_to_size(&regs[value_regno], size);
3926         }
3927         return err;
3928 }
3929
3930 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3931 {
3932         int err;
3933
3934         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
3935             insn->imm != 0) {
3936                 verbose(env, "BPF_XADD uses reserved fields\n");
3937                 return -EINVAL;
3938         }
3939
3940         /* check src1 operand */
3941         err = check_reg_arg(env, insn->src_reg, SRC_OP);
3942         if (err)
3943                 return err;
3944
3945         /* check src2 operand */
3946         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3947         if (err)
3948                 return err;
3949
3950         if (is_pointer_value(env, insn->src_reg)) {
3951                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3952                 return -EACCES;
3953         }
3954
3955         if (is_ctx_reg(env, insn->dst_reg) ||
3956             is_pkt_reg(env, insn->dst_reg) ||
3957             is_flow_key_reg(env, insn->dst_reg) ||
3958             is_sk_reg(env, insn->dst_reg)) {
3959                 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
3960                         insn->dst_reg,
3961                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
3962                 return -EACCES;
3963         }
3964
3965         /* check whether atomic_add can read the memory */
3966         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3967                                BPF_SIZE(insn->code), BPF_READ, -1, true);
3968         if (err)
3969                 return err;
3970
3971         /* check whether atomic_add can write into the same memory */
3972         return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3973                                 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3974 }
3975
3976 /* When register 'regno' is used to read the stack (either directly or through
3977  * a helper function) make sure that it's within stack boundary and, depending
3978  * on the access type, that all elements of the stack are initialized.
3979  *
3980  * 'off' includes 'regno->off', but not its dynamic part (if any).
3981  *
3982  * All registers that have been spilled on the stack in the slots within the
3983  * read offsets are marked as read.
3984  */
3985 static int check_stack_range_initialized(
3986                 struct bpf_verifier_env *env, int regno, int off,
3987                 int access_size, bool zero_size_allowed,
3988                 enum stack_access_src type, struct bpf_call_arg_meta *meta)
3989 {
3990         struct bpf_reg_state *reg = reg_state(env, regno);
3991         struct bpf_func_state *state = func(env, reg);
3992         int err, min_off, max_off, i, j, slot, spi;
3993         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
3994         enum bpf_access_type bounds_check_type;
3995         /* Some accesses can write anything into the stack, others are
3996          * read-only.
3997          */
3998         bool clobber = false;
3999
4000         if (access_size == 0 && !zero_size_allowed) {
4001                 verbose(env, "invalid zero-sized read\n");
4002                 return -EACCES;
4003         }
4004
4005         if (type == ACCESS_HELPER) {
4006                 /* The bounds checks for writes are more permissive than for
4007                  * reads. However, if raw_mode is not set, we'll do extra
4008                  * checks below.
4009                  */
4010                 bounds_check_type = BPF_WRITE;
4011                 clobber = true;
4012         } else {
4013                 bounds_check_type = BPF_READ;
4014         }
4015         err = check_stack_access_within_bounds(env, regno, off, access_size,
4016                                                type, bounds_check_type);
4017         if (err)
4018                 return err;
4019
4020
4021         if (tnum_is_const(reg->var_off)) {
4022                 min_off = max_off = reg->var_off.value + off;
4023         } else {
4024                 /* Variable offset is prohibited for unprivileged mode for
4025                  * simplicity since it requires corresponding support in
4026                  * Spectre masking for stack ALU.
4027                  * See also retrieve_ptr_limit().
4028                  */
4029                 if (!env->bypass_spec_v1) {
4030                         char tn_buf[48];
4031
4032                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4033                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4034                                 regno, err_extra, tn_buf);
4035                         return -EACCES;
4036                 }
4037                 /* Only initialized buffer on stack is allowed to be accessed
4038                  * with variable offset. With uninitialized buffer it's hard to
4039                  * guarantee that whole memory is marked as initialized on
4040                  * helper return since specific bounds are unknown what may
4041                  * cause uninitialized stack leaking.
4042                  */
4043                 if (meta && meta->raw_mode)
4044                         meta = NULL;
4045
4046                 min_off = reg->smin_value + off;
4047                 max_off = reg->smax_value + off;
4048         }
4049
4050         if (meta && meta->raw_mode) {
4051                 meta->access_size = access_size;
4052                 meta->regno = regno;
4053                 return 0;
4054         }
4055
4056         for (i = min_off; i < max_off + access_size; i++) {
4057                 u8 *stype;
4058
4059                 slot = -i - 1;
4060                 spi = slot / BPF_REG_SIZE;
4061                 if (state->allocated_stack <= slot)
4062                         goto err;
4063                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4064                 if (*stype == STACK_MISC)
4065                         goto mark;
4066                 if (*stype == STACK_ZERO) {
4067                         if (clobber) {
4068                                 /* helper can write anything into the stack */
4069                                 *stype = STACK_MISC;
4070                         }
4071                         goto mark;
4072                 }
4073
4074                 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4075                     state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4076                         goto mark;
4077
4078                 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4079                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4080                      env->allow_ptr_leaks)) {
4081                         if (clobber) {
4082                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4083                                 for (j = 0; j < BPF_REG_SIZE; j++)
4084                                         state->stack[spi].slot_type[j] = STACK_MISC;
4085                         }
4086                         goto mark;
4087                 }
4088
4089 err:
4090                 if (tnum_is_const(reg->var_off)) {
4091                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4092                                 err_extra, regno, min_off, i - min_off, access_size);
4093                 } else {
4094                         char tn_buf[48];
4095
4096                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4097                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4098                                 err_extra, regno, tn_buf, i - min_off, access_size);
4099                 }
4100                 return -EACCES;
4101 mark:
4102                 /* reading any byte out of 8-byte 'spill_slot' will cause
4103                  * the whole slot to be marked as 'read'
4104                  */
4105                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
4106                               state->stack[spi].spilled_ptr.parent,
4107                               REG_LIVE_READ64);
4108         }
4109         return update_stack_depth(env, state, min_off);
4110 }
4111
4112 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4113                                    int access_size, bool zero_size_allowed,
4114                                    struct bpf_call_arg_meta *meta)
4115 {
4116         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4117
4118         switch (reg->type) {
4119         case PTR_TO_PACKET:
4120         case PTR_TO_PACKET_META:
4121                 return check_packet_access(env, regno, reg->off, access_size,
4122                                            zero_size_allowed);
4123         case PTR_TO_MAP_VALUE:
4124                 if (check_map_access_type(env, regno, reg->off, access_size,
4125                                           meta && meta->raw_mode ? BPF_WRITE :
4126                                           BPF_READ))
4127                         return -EACCES;
4128                 return check_map_access(env, regno, reg->off, access_size,
4129                                         zero_size_allowed);
4130         case PTR_TO_MEM:
4131                 return check_mem_region_access(env, regno, reg->off,
4132                                                access_size, reg->mem_size,
4133                                                zero_size_allowed);
4134         case PTR_TO_RDONLY_BUF:
4135                 if (meta && meta->raw_mode)
4136                         return -EACCES;
4137                 return check_buffer_access(env, reg, regno, reg->off,
4138                                            access_size, zero_size_allowed,
4139                                            "rdonly",
4140                                            &env->prog->aux->max_rdonly_access);
4141         case PTR_TO_RDWR_BUF:
4142                 return check_buffer_access(env, reg, regno, reg->off,
4143                                            access_size, zero_size_allowed,
4144                                            "rdwr",
4145                                            &env->prog->aux->max_rdwr_access);
4146         case PTR_TO_STACK:
4147                 return check_stack_range_initialized(
4148                                 env,
4149                                 regno, reg->off, access_size,
4150                                 zero_size_allowed, ACCESS_HELPER, meta);
4151         default: /* scalar_value or invalid ptr */
4152                 /* Allow zero-byte read from NULL, regardless of pointer type */
4153                 if (zero_size_allowed && access_size == 0 &&
4154                     register_is_null(reg))
4155                         return 0;
4156
4157                 verbose(env, "R%d type=%s expected=%s\n", regno,
4158                         reg_type_str[reg->type],
4159                         reg_type_str[PTR_TO_STACK]);
4160                 return -EACCES;
4161         }
4162 }
4163
4164 /* Implementation details:
4165  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4166  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4167  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4168  * value_or_null->value transition, since the verifier only cares about
4169  * the range of access to valid map value pointer and doesn't care about actual
4170  * address of the map element.
4171  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4172  * reg->id > 0 after value_or_null->value transition. By doing so
4173  * two bpf_map_lookups will be considered two different pointers that
4174  * point to different bpf_spin_locks.
4175  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4176  * dead-locks.
4177  * Since only one bpf_spin_lock is allowed the checks are simpler than
4178  * reg_is_refcounted() logic. The verifier needs to remember only
4179  * one spin_lock instead of array of acquired_refs.
4180  * cur_state->active_spin_lock remembers which map value element got locked
4181  * and clears it after bpf_spin_unlock.
4182  */
4183 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4184                              bool is_lock)
4185 {
4186         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4187         struct bpf_verifier_state *cur = env->cur_state;
4188         bool is_const = tnum_is_const(reg->var_off);
4189         struct bpf_map *map = reg->map_ptr;
4190         u64 val = reg->var_off.value;
4191
4192         if (!is_const) {
4193                 verbose(env,
4194                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4195                         regno);
4196                 return -EINVAL;
4197         }
4198         if (!map->btf) {
4199                 verbose(env,
4200                         "map '%s' has to have BTF in order to use bpf_spin_lock\n",
4201                         map->name);
4202                 return -EINVAL;
4203         }
4204         if (!map_value_has_spin_lock(map)) {
4205                 if (map->spin_lock_off == -E2BIG)
4206                         verbose(env,
4207                                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
4208                                 map->name);
4209                 else if (map->spin_lock_off == -ENOENT)
4210                         verbose(env,
4211                                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
4212                                 map->name);
4213                 else
4214                         verbose(env,
4215                                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4216                                 map->name);
4217                 return -EINVAL;
4218         }
4219         if (map->spin_lock_off != val + reg->off) {
4220                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4221                         val + reg->off);
4222                 return -EINVAL;
4223         }
4224         if (is_lock) {
4225                 if (cur->active_spin_lock) {
4226                         verbose(env,
4227                                 "Locking two bpf_spin_locks are not allowed\n");
4228                         return -EINVAL;
4229                 }
4230                 cur->active_spin_lock = reg->id;
4231         } else {
4232                 if (!cur->active_spin_lock) {
4233                         verbose(env, "bpf_spin_unlock without taking a lock\n");
4234                         return -EINVAL;
4235                 }
4236                 if (cur->active_spin_lock != reg->id) {
4237                         verbose(env, "bpf_spin_unlock of different lock\n");
4238                         return -EINVAL;
4239                 }
4240                 cur->active_spin_lock = 0;
4241         }
4242         return 0;
4243 }
4244
4245 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4246 {
4247         return type == ARG_PTR_TO_MEM ||
4248                type == ARG_PTR_TO_MEM_OR_NULL ||
4249                type == ARG_PTR_TO_UNINIT_MEM;
4250 }
4251
4252 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4253 {
4254         return type == ARG_CONST_SIZE ||
4255                type == ARG_CONST_SIZE_OR_ZERO;
4256 }
4257
4258 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4259 {
4260         return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4261 }
4262
4263 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4264 {
4265         return type == ARG_PTR_TO_INT ||
4266                type == ARG_PTR_TO_LONG;
4267 }
4268
4269 static int int_ptr_type_to_size(enum bpf_arg_type type)
4270 {
4271         if (type == ARG_PTR_TO_INT)
4272                 return sizeof(u32);
4273         else if (type == ARG_PTR_TO_LONG)
4274                 return sizeof(u64);
4275
4276         return -EINVAL;
4277 }
4278
4279 static int resolve_map_arg_type(struct bpf_verifier_env *env,
4280                                  const struct bpf_call_arg_meta *meta,
4281                                  enum bpf_arg_type *arg_type)
4282 {
4283         if (!meta->map_ptr) {
4284                 /* kernel subsystem misconfigured verifier */
4285                 verbose(env, "invalid map_ptr to access map->type\n");
4286                 return -EACCES;
4287         }
4288
4289         switch (meta->map_ptr->map_type) {
4290         case BPF_MAP_TYPE_SOCKMAP:
4291         case BPF_MAP_TYPE_SOCKHASH:
4292                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4293                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4294                 } else {
4295                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
4296                         return -EINVAL;
4297                 }
4298                 break;
4299
4300         default:
4301                 break;
4302         }
4303         return 0;
4304 }
4305
4306 struct bpf_reg_types {
4307         const enum bpf_reg_type types[10];
4308         u32 *btf_id;
4309 };
4310
4311 static const struct bpf_reg_types map_key_value_types = {
4312         .types = {
4313                 PTR_TO_STACK,
4314                 PTR_TO_PACKET,
4315                 PTR_TO_PACKET_META,
4316                 PTR_TO_MAP_VALUE,
4317         },
4318 };
4319
4320 static const struct bpf_reg_types sock_types = {
4321         .types = {
4322                 PTR_TO_SOCK_COMMON,
4323                 PTR_TO_SOCKET,
4324                 PTR_TO_TCP_SOCK,
4325                 PTR_TO_XDP_SOCK,
4326         },
4327 };
4328
4329 #ifdef CONFIG_NET
4330 static const struct bpf_reg_types btf_id_sock_common_types = {
4331         .types = {
4332                 PTR_TO_SOCK_COMMON,
4333                 PTR_TO_SOCKET,
4334                 PTR_TO_TCP_SOCK,
4335                 PTR_TO_XDP_SOCK,
4336                 PTR_TO_BTF_ID,
4337         },
4338         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4339 };
4340 #endif
4341
4342 static const struct bpf_reg_types mem_types = {
4343         .types = {
4344                 PTR_TO_STACK,
4345                 PTR_TO_PACKET,
4346                 PTR_TO_PACKET_META,
4347                 PTR_TO_MAP_VALUE,
4348                 PTR_TO_MEM,
4349                 PTR_TO_RDONLY_BUF,
4350                 PTR_TO_RDWR_BUF,
4351         },
4352 };
4353
4354 static const struct bpf_reg_types int_ptr_types = {
4355         .types = {
4356                 PTR_TO_STACK,
4357                 PTR_TO_PACKET,
4358                 PTR_TO_PACKET_META,
4359                 PTR_TO_MAP_VALUE,
4360         },
4361 };
4362
4363 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
4364 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
4365 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
4366 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
4367 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
4368 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
4369 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
4370 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
4371
4372 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4373         [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
4374         [ARG_PTR_TO_MAP_VALUE]          = &map_key_value_types,
4375         [ARG_PTR_TO_UNINIT_MAP_VALUE]   = &map_key_value_types,
4376         [ARG_PTR_TO_MAP_VALUE_OR_NULL]  = &map_key_value_types,
4377         [ARG_CONST_SIZE]                = &scalar_types,
4378         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
4379         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
4380         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
4381         [ARG_PTR_TO_CTX]                = &context_types,
4382         [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
4383         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
4384 #ifdef CONFIG_NET
4385         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4386 #endif
4387         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
4388         [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
4389         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
4390         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
4391         [ARG_PTR_TO_MEM]                = &mem_types,
4392         [ARG_PTR_TO_MEM_OR_NULL]        = &mem_types,
4393         [ARG_PTR_TO_UNINIT_MEM]         = &mem_types,
4394         [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
4395         [ARG_PTR_TO_ALLOC_MEM_OR_NULL]  = &alloc_mem_types,
4396         [ARG_PTR_TO_INT]                = &int_ptr_types,
4397         [ARG_PTR_TO_LONG]               = &int_ptr_types,
4398         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
4399 };
4400
4401 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
4402                           enum bpf_arg_type arg_type,
4403                           const u32 *arg_btf_id)
4404 {
4405         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4406         enum bpf_reg_type expected, type = reg->type;
4407         const struct bpf_reg_types *compatible;
4408         int i, j;
4409
4410         compatible = compatible_reg_types[arg_type];
4411         if (!compatible) {
4412                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4413                 return -EFAULT;
4414         }
4415
4416         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4417                 expected = compatible->types[i];
4418                 if (expected == NOT_INIT)
4419                         break;
4420
4421                 if (type == expected)
4422                         goto found;
4423         }
4424
4425         verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
4426         for (j = 0; j + 1 < i; j++)
4427                 verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
4428         verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
4429         return -EACCES;
4430
4431 found:
4432         if (type == PTR_TO_BTF_ID) {
4433                 if (!arg_btf_id) {
4434                         if (!compatible->btf_id) {
4435                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4436                                 return -EFAULT;
4437                         }
4438                         arg_btf_id = compatible->btf_id;
4439                 }
4440
4441                 if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
4442                                           *arg_btf_id)) {
4443                         verbose(env, "R%d is of type %s but %s is expected\n",
4444                                 regno, kernel_type_name(reg->btf_id),
4445                                 kernel_type_name(*arg_btf_id));
4446                         return -EACCES;
4447                 }
4448
4449                 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4450                         verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
4451                                 regno);
4452                         return -EACCES;
4453                 }
4454         }
4455
4456         return 0;
4457 }
4458
4459 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
4460                           struct bpf_call_arg_meta *meta,
4461                           const struct bpf_func_proto *fn)
4462 {
4463         u32 regno = BPF_REG_1 + arg;
4464         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4465         enum bpf_arg_type arg_type = fn->arg_type[arg];
4466         enum bpf_reg_type type = reg->type;
4467         int err = 0;
4468
4469         if (arg_type == ARG_DONTCARE)
4470                 return 0;
4471
4472         err = check_reg_arg(env, regno, SRC_OP);
4473         if (err)
4474                 return err;
4475
4476         if (arg_type == ARG_ANYTHING) {
4477                 if (is_pointer_value(env, regno)) {
4478                         verbose(env, "R%d leaks addr into helper function\n",
4479                                 regno);
4480                         return -EACCES;
4481                 }
4482                 return 0;
4483         }
4484
4485         if (type_is_pkt_pointer(type) &&
4486             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4487                 verbose(env, "helper access to the packet is not allowed\n");
4488                 return -EACCES;
4489         }
4490
4491         if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4492             arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
4493             arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
4494                 err = resolve_map_arg_type(env, meta, &arg_type);
4495                 if (err)
4496                         return err;
4497         }
4498
4499         if (register_is_null(reg) && arg_type_may_be_null(arg_type))
4500                 /* A NULL register has a SCALAR_VALUE type, so skip
4501                  * type checking.
4502                  */
4503                 goto skip_type_check;
4504
4505         err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4506         if (err)
4507                 return err;
4508
4509         if (type == PTR_TO_CTX) {
4510                 err = check_ctx_reg(env, reg, regno);
4511                 if (err < 0)
4512                         return err;
4513         }
4514
4515 skip_type_check:
4516         if (reg->ref_obj_id) {
4517                 if (meta->ref_obj_id) {
4518                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
4519                                 regno, reg->ref_obj_id,
4520                                 meta->ref_obj_id);
4521                         return -EFAULT;
4522                 }
4523                 meta->ref_obj_id = reg->ref_obj_id;
4524         }
4525
4526         if (arg_type == ARG_CONST_MAP_PTR) {
4527                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4528                 meta->map_ptr = reg->map_ptr;
4529         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4530                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
4531                  * check that [key, key + map->key_size) are within
4532                  * stack limits and initialized
4533                  */
4534                 if (!meta->map_ptr) {
4535                         /* in function declaration map_ptr must come before
4536                          * map_key, so that it's verified and known before
4537                          * we have to check map_key here. Otherwise it means
4538                          * that kernel subsystem misconfigured verifier
4539                          */
4540                         verbose(env, "invalid map_ptr to access map->key\n");
4541                         return -EACCES;
4542                 }
4543                 err = check_helper_mem_access(env, regno,
4544                                               meta->map_ptr->key_size, false,
4545                                               NULL);
4546         } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4547                    (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
4548                     !register_is_null(reg)) ||
4549                    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4550                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
4551                  * check [value, value + map->value_size) validity
4552                  */
4553                 if (!meta->map_ptr) {
4554                         /* kernel subsystem misconfigured verifier */
4555                         verbose(env, "invalid map_ptr to access map->value\n");
4556                         return -EACCES;
4557                 }
4558                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4559                 err = check_helper_mem_access(env, regno,
4560                                               meta->map_ptr->value_size, false,
4561                                               meta);
4562         } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4563                 if (!reg->btf_id) {
4564                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4565                         return -EACCES;
4566                 }
4567                 meta->ret_btf_id = reg->btf_id;
4568         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4569                 if (meta->func_id == BPF_FUNC_spin_lock) {
4570                         if (process_spin_lock(env, regno, true))
4571                                 return -EACCES;
4572                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4573                         if (process_spin_lock(env, regno, false))
4574                                 return -EACCES;
4575                 } else {
4576                         verbose(env, "verifier internal error\n");
4577                         return -EFAULT;
4578                 }
4579         } else if (arg_type_is_mem_ptr(arg_type)) {
4580                 /* The access to this pointer is only checked when we hit the
4581                  * next is_mem_size argument below.
4582                  */
4583                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4584         } else if (arg_type_is_mem_size(arg_type)) {
4585                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4586
4587                 /* This is used to refine r0 return value bounds for helpers
4588                  * that enforce this value as an upper bound on return values.
4589                  * See do_refine_retval_range() for helpers that can refine
4590                  * the return value. C type of helper is u32 so we pull register
4591                  * bound from umax_value however, if negative verifier errors
4592                  * out. Only upper bounds can be learned because retval is an
4593                  * int type and negative retvals are allowed.
4594                  */
4595                 meta->msize_max_value = reg->umax_value;
4596
4597                 /* The register is SCALAR_VALUE; the access check
4598                  * happens using its boundaries.
4599                  */
4600                 if (!tnum_is_const(reg->var_off))
4601                         /* For unprivileged variable accesses, disable raw
4602                          * mode so that the program is required to
4603                          * initialize all the memory that the helper could
4604                          * just partially fill up.
4605                          */
4606                         meta = NULL;
4607
4608                 if (reg->smin_value < 0) {
4609                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4610                                 regno);
4611                         return -EACCES;
4612                 }
4613
4614                 if (reg->umin_value == 0) {
4615                         err = check_helper_mem_access(env, regno - 1, 0,
4616                                                       zero_size_allowed,
4617                                                       meta);
4618                         if (err)
4619                                 return err;
4620                 }
4621
4622                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4623                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4624                                 regno);
4625                         return -EACCES;
4626                 }
4627                 err = check_helper_mem_access(env, regno - 1,
4628                                               reg->umax_value,
4629                                               zero_size_allowed, meta);
4630                 if (!err)
4631                         err = mark_chain_precision(env, regno);
4632         } else if (arg_type_is_alloc_size(arg_type)) {
4633                 if (!tnum_is_const(reg->var_off)) {
4634                         verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
4635                                 regno);
4636                         return -EACCES;
4637                 }
4638                 meta->mem_size = reg->var_off.value;
4639         } else if (arg_type_is_int_ptr(arg_type)) {
4640                 int size = int_ptr_type_to_size(arg_type);
4641
4642                 err = check_helper_mem_access(env, regno, size, false, meta);
4643                 if (err)
4644                         return err;
4645                 err = check_ptr_alignment(env, reg, 0, size, true);
4646         }
4647
4648         return err;
4649 }
4650
4651 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4652 {
4653         enum bpf_attach_type eatype = env->prog->expected_attach_type;
4654         enum bpf_prog_type type = resolve_prog_type(env->prog);
4655
4656         if (func_id != BPF_FUNC_map_update_elem)
4657                 return false;
4658
4659         /* It's not possible to get access to a locked struct sock in these
4660          * contexts, so updating is safe.
4661          */
4662         switch (type) {
4663         case BPF_PROG_TYPE_TRACING:
4664                 if (eatype == BPF_TRACE_ITER)
4665                         return true;
4666                 break;
4667         case BPF_PROG_TYPE_SOCKET_FILTER:
4668         case BPF_PROG_TYPE_SCHED_CLS:
4669         case BPF_PROG_TYPE_SCHED_ACT:
4670         case BPF_PROG_TYPE_XDP:
4671         case BPF_PROG_TYPE_SK_REUSEPORT:
4672         case BPF_PROG_TYPE_FLOW_DISSECTOR:
4673         case BPF_PROG_TYPE_SK_LOOKUP:
4674                 return true;
4675         default:
4676                 break;
4677         }
4678
4679         verbose(env, "cannot update sockmap in this context\n");
4680         return false;
4681 }
4682
4683 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4684 {
4685         return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4686 }
4687
4688 static int check_map_func_compatibility(struct bpf_verifier_env *env,
4689                                         struct bpf_map *map, int func_id)
4690 {
4691         if (!map)
4692                 return 0;
4693
4694         /* We need a two way check, first is from map perspective ... */
4695         switch (map->map_type) {
4696         case BPF_MAP_TYPE_PROG_ARRAY:
4697                 if (func_id != BPF_FUNC_tail_call)
4698                         goto error;
4699                 break;
4700         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4701                 if (func_id != BPF_FUNC_perf_event_read &&
4702                     func_id != BPF_FUNC_perf_event_output &&
4703                     func_id != BPF_FUNC_skb_output &&
4704                     func_id != BPF_FUNC_perf_event_read_value &&
4705                     func_id != BPF_FUNC_xdp_output)
4706                         goto error;
4707                 break;
4708         case BPF_MAP_TYPE_RINGBUF:
4709                 if (func_id != BPF_FUNC_ringbuf_output &&
4710                     func_id != BPF_FUNC_ringbuf_reserve &&
4711                     func_id != BPF_FUNC_ringbuf_query)
4712                         goto error;
4713                 break;
4714         case BPF_MAP_TYPE_STACK_TRACE:
4715                 if (func_id != BPF_FUNC_get_stackid)
4716                         goto error;
4717                 break;
4718         case BPF_MAP_TYPE_CGROUP_ARRAY:
4719                 if (func_id != BPF_FUNC_skb_under_cgroup &&
4720                     func_id != BPF_FUNC_current_task_under_cgroup)
4721                         goto error;
4722                 break;
4723         case BPF_MAP_TYPE_CGROUP_STORAGE:
4724         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4725                 if (func_id != BPF_FUNC_get_local_storage)
4726                         goto error;
4727                 break;
4728         case BPF_MAP_TYPE_DEVMAP:
4729         case BPF_MAP_TYPE_DEVMAP_HASH:
4730                 if (func_id != BPF_FUNC_redirect_map &&
4731                     func_id != BPF_FUNC_map_lookup_elem)
4732                         goto error;
4733                 break;
4734         /* Restrict bpf side of cpumap and xskmap, open when use-cases
4735          * appear.
4736          */
4737         case BPF_MAP_TYPE_CPUMAP:
4738                 if (func_id != BPF_FUNC_redirect_map)
4739                         goto error;
4740                 break;
4741         case BPF_MAP_TYPE_XSKMAP:
4742                 if (func_id != BPF_FUNC_redirect_map &&
4743                     func_id != BPF_FUNC_map_lookup_elem)
4744                         goto error;
4745                 break;
4746         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4747         case BPF_MAP_TYPE_HASH_OF_MAPS:
4748                 if (func_id != BPF_FUNC_map_lookup_elem)
4749                         goto error;
4750                 break;
4751         case BPF_MAP_TYPE_SOCKMAP:
4752                 if (func_id != BPF_FUNC_sk_redirect_map &&
4753                     func_id != BPF_FUNC_sock_map_update &&
4754                     func_id != BPF_FUNC_map_delete_elem &&
4755                     func_id != BPF_FUNC_msg_redirect_map &&
4756                     func_id != BPF_FUNC_sk_select_reuseport &&
4757                     func_id != BPF_FUNC_map_lookup_elem &&
4758                     !may_update_sockmap(env, func_id))
4759                         goto error;
4760                 break;
4761         case BPF_MAP_TYPE_SOCKHASH:
4762                 if (func_id != BPF_FUNC_sk_redirect_hash &&
4763                     func_id != BPF_FUNC_sock_hash_update &&
4764                     func_id != BPF_FUNC_map_delete_elem &&
4765                     func_id != BPF_FUNC_msg_redirect_hash &&
4766                     func_id != BPF_FUNC_sk_select_reuseport &&
4767                     func_id != BPF_FUNC_map_lookup_elem &&
4768                     !may_update_sockmap(env, func_id))
4769                         goto error;
4770                 break;
4771         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4772                 if (func_id != BPF_FUNC_sk_select_reuseport)
4773                         goto error;
4774                 break;
4775         case BPF_MAP_TYPE_QUEUE:
4776         case BPF_MAP_TYPE_STACK:
4777                 if (func_id != BPF_FUNC_map_peek_elem &&
4778                     func_id != BPF_FUNC_map_pop_elem &&
4779                     func_id != BPF_FUNC_map_push_elem)
4780                         goto error;
4781                 break;
4782         case BPF_MAP_TYPE_SK_STORAGE:
4783                 if (func_id != BPF_FUNC_sk_storage_get &&
4784                     func_id != BPF_FUNC_sk_storage_delete)
4785                         goto error;
4786                 break;
4787         case BPF_MAP_TYPE_INODE_STORAGE:
4788                 if (func_id != BPF_FUNC_inode_storage_get &&
4789                     func_id != BPF_FUNC_inode_storage_delete)
4790                         goto error;
4791                 break;
4792         default:
4793                 break;
4794         }
4795
4796         /* ... and second from the function itself. */
4797         switch (func_id) {
4798         case BPF_FUNC_tail_call:
4799                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
4800                         goto error;
4801                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4802                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4803                         return -EINVAL;
4804                 }
4805                 break;
4806         case BPF_FUNC_perf_event_read:
4807         case BPF_FUNC_perf_event_output:
4808         case BPF_FUNC_perf_event_read_value:
4809         case BPF_FUNC_skb_output:
4810         case BPF_FUNC_xdp_output:
4811                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
4812                         goto error;
4813                 break;
4814         case BPF_FUNC_ringbuf_output:
4815         case BPF_FUNC_ringbuf_reserve:
4816         case BPF_FUNC_ringbuf_query:
4817                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
4818                         goto error;
4819                 break;
4820         case BPF_FUNC_get_stackid:
4821                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
4822                         goto error;
4823                 break;
4824         case BPF_FUNC_current_task_under_cgroup:
4825         case BPF_FUNC_skb_under_cgroup:
4826                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
4827                         goto error;
4828                 break;
4829         case BPF_FUNC_redirect_map:
4830                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
4831                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4832                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
4833                     map->map_type != BPF_MAP_TYPE_XSKMAP)
4834                         goto error;
4835                 break;
4836         case BPF_FUNC_sk_redirect_map:
4837         case BPF_FUNC_msg_redirect_map:
4838         case BPF_FUNC_sock_map_update:
4839                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
4840                         goto error;
4841                 break;
4842         case BPF_FUNC_sk_redirect_hash:
4843         case BPF_FUNC_msg_redirect_hash:
4844         case BPF_FUNC_sock_hash_update:
4845                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
4846                         goto error;
4847                 break;
4848         case BPF_FUNC_get_local_storage:
4849                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
4850                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
4851                         goto error;
4852                 break;
4853         case BPF_FUNC_sk_select_reuseport:
4854                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
4855                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4856                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
4857                         goto error;
4858                 break;
4859         case BPF_FUNC_map_peek_elem:
4860         case BPF_FUNC_map_pop_elem:
4861         case BPF_FUNC_map_push_elem:
4862                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
4863                     map->map_type != BPF_MAP_TYPE_STACK)
4864                         goto error;
4865                 break;
4866         case BPF_FUNC_sk_storage_get:
4867         case BPF_FUNC_sk_storage_delete:
4868                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
4869                         goto error;
4870                 break;
4871         case BPF_FUNC_inode_storage_get:
4872         case BPF_FUNC_inode_storage_delete:
4873                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
4874                         goto error;
4875                 break;
4876         default:
4877                 break;
4878         }
4879
4880         return 0;
4881 error:
4882         verbose(env, "cannot pass map_type %d into func %s#%d\n",
4883                 map->map_type, func_id_name(func_id), func_id);
4884         return -EINVAL;
4885 }
4886
4887 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4888 {
4889         int count = 0;
4890
4891         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
4892                 count++;
4893         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
4894                 count++;
4895         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
4896                 count++;
4897         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
4898                 count++;
4899         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
4900                 count++;
4901
4902         /* We only support one arg being in raw mode at the moment,
4903          * which is sufficient for the helper functions we have
4904          * right now.
4905          */
4906         return count <= 1;
4907 }
4908
4909 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
4910                                     enum bpf_arg_type arg_next)
4911 {
4912         return (arg_type_is_mem_ptr(arg_curr) &&
4913                 !arg_type_is_mem_size(arg_next)) ||
4914                (!arg_type_is_mem_ptr(arg_curr) &&
4915                 arg_type_is_mem_size(arg_next));
4916 }
4917
4918 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4919 {
4920         /* bpf_xxx(..., buf, len) call will access 'len'
4921          * bytes from memory 'buf'. Both arg types need
4922          * to be paired, so make sure there's no buggy
4923          * helper function specification.
4924          */
4925         if (arg_type_is_mem_size(fn->arg1_type) ||
4926             arg_type_is_mem_ptr(fn->arg5_type)  ||
4927             check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4928             check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4929             check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4930             check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
4931                 return false;
4932
4933         return true;
4934 }
4935
4936 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4937 {
4938         int count = 0;
4939
4940         if (arg_type_may_be_refcounted(fn->arg1_type))
4941                 count++;
4942         if (arg_type_may_be_refcounted(fn->arg2_type))
4943                 count++;
4944         if (arg_type_may_be_refcounted(fn->arg3_type))
4945                 count++;
4946         if (arg_type_may_be_refcounted(fn->arg4_type))
4947                 count++;
4948         if (arg_type_may_be_refcounted(fn->arg5_type))
4949                 count++;
4950
4951         /* A reference acquiring function cannot acquire
4952          * another refcounted ptr.
4953          */
4954         if (may_be_acquire_function(func_id) && count)
4955                 return false;
4956
4957         /* We only support one arg being unreferenced at the moment,
4958          * which is sufficient for the helper functions we have right now.
4959          */
4960         return count <= 1;
4961 }
4962
4963 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4964 {
4965         int i;
4966
4967         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4968                 if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
4969                         return false;
4970
4971                 if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
4972                         return false;
4973         }
4974
4975         return true;
4976 }
4977
4978 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4979 {
4980         return check_raw_mode_ok(fn) &&
4981                check_arg_pair_ok(fn) &&
4982                check_btf_id_ok(fn) &&
4983                check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
4984 }
4985
4986 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4987  * are now invalid, so turn them into unknown SCALAR_VALUE.
4988  */
4989 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
4990                                      struct bpf_func_state *state)
4991 {
4992         struct bpf_reg_state *regs = state->regs, *reg;
4993         int i;
4994
4995         for (i = 0; i < MAX_BPF_REG; i++)
4996                 if (reg_is_pkt_pointer_any(&regs[i]))
4997                         mark_reg_unknown(env, regs, i);
4998
4999         bpf_for_each_spilled_reg(i, state, reg) {
5000                 if (!reg)
5001                         continue;
5002                 if (reg_is_pkt_pointer_any(reg))
5003                         __mark_reg_unknown(env, reg);
5004         }
5005 }
5006
5007 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5008 {
5009         struct bpf_verifier_state *vstate = env->cur_state;
5010         int i;
5011
5012         for (i = 0; i <= vstate->curframe; i++)
5013                 __clear_all_pkt_pointers(env, vstate->frame[i]);
5014 }
5015
5016 static void release_reg_references(struct bpf_verifier_env *env,
5017                                    struct bpf_func_state *state,
5018                                    int ref_obj_id)
5019 {
5020         struct bpf_reg_state *regs = state->regs, *reg;
5021         int i;
5022
5023         for (i = 0; i < MAX_BPF_REG; i++)
5024                 if (regs[i].ref_obj_id == ref_obj_id)
5025                         mark_reg_unknown(env, regs, i);
5026
5027         bpf_for_each_spilled_reg(i, state, reg) {
5028                 if (!reg)
5029                         continue;
5030                 if (reg->ref_obj_id == ref_obj_id)
5031                         __mark_reg_unknown(env, reg);
5032         }
5033 }
5034
5035 /* The pointer with the specified id has released its reference to kernel
5036  * resources. Identify all copies of the same pointer and clear the reference.
5037  */
5038 static int release_reference(struct bpf_verifier_env *env,
5039                              int ref_obj_id)
5040 {
5041         struct bpf_verifier_state *vstate = env->cur_state;
5042         int err;
5043         int i;
5044
5045         err = release_reference_state(cur_func(env), ref_obj_id);
5046         if (err)
5047                 return err;
5048
5049         for (i = 0; i <= vstate->curframe; i++)
5050                 release_reg_references(env, vstate->frame[i], ref_obj_id);
5051
5052         return 0;
5053 }
5054
5055 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5056                                     struct bpf_reg_state *regs)
5057 {
5058         int i;
5059
5060         /* after the call registers r0 - r5 were scratched */
5061         for (i = 0; i < CALLER_SAVED_REGS; i++) {
5062                 mark_reg_not_init(env, regs, caller_saved[i]);
5063                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5064         }
5065 }
5066
5067 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5068                            int *insn_idx)
5069 {
5070         struct bpf_verifier_state *state = env->cur_state;
5071         struct bpf_func_info_aux *func_info_aux;
5072         struct bpf_func_state *caller, *callee;
5073         int i, err, subprog, target_insn;
5074         bool is_global = false;
5075
5076         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5077                 verbose(env, "the call stack of %d frames is too deep\n",
5078                         state->curframe + 2);
5079                 return -E2BIG;
5080         }
5081
5082         target_insn = *insn_idx + insn->imm;
5083         subprog = find_subprog(env, target_insn + 1);
5084         if (subprog < 0) {
5085                 verbose(env, "verifier bug. No program starts at insn %d\n",
5086                         target_insn + 1);
5087                 return -EFAULT;
5088         }
5089
5090         caller = state->frame[state->curframe];
5091         if (state->frame[state->curframe + 1]) {
5092                 verbose(env, "verifier bug. Frame %d already allocated\n",
5093                         state->curframe + 1);
5094                 return -EFAULT;
5095         }
5096
5097         func_info_aux = env->prog->aux->func_info_aux;
5098         if (func_info_aux)
5099                 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5100         err = btf_check_func_arg_match(env, subprog, caller->regs);
5101         if (err == -EFAULT)
5102                 return err;
5103         if (is_global) {
5104                 if (err) {
5105                         verbose(env, "Caller passes invalid args into func#%d\n",
5106                                 subprog);
5107                         return err;
5108                 } else {
5109                         if (env->log.level & BPF_LOG_LEVEL)
5110                                 verbose(env,
5111                                         "Func#%d is global and valid. Skipping.\n",
5112                                         subprog);
5113                         clear_caller_saved_regs(env, caller->regs);
5114
5115                         /* All global functions return a 64-bit SCALAR_VALUE */
5116                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
5117                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5118
5119                         /* continue with next insn after call */
5120                         return 0;
5121                 }
5122         }
5123
5124         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5125         if (!callee)
5126                 return -ENOMEM;
5127         state->frame[state->curframe + 1] = callee;
5128
5129         /* callee cannot access r0, r6 - r9 for reading and has to write
5130          * into its own stack before reading from it.
5131          * callee can read/write into caller's stack
5132          */
5133         init_func_state(env, callee,
5134                         /* remember the callsite, it will be used by bpf_exit */
5135                         *insn_idx /* callsite */,
5136                         state->curframe + 1 /* frameno within this callchain */,
5137                         subprog /* subprog number within this prog */);
5138
5139         /* Transfer references to the callee */
5140         err = transfer_reference_state(callee, caller);
5141         if (err)
5142                 return err;
5143
5144         /* copy r1 - r5 args that callee can access.  The copy includes parent
5145          * pointers, which connects us up to the liveness chain
5146          */
5147         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
5148                 callee->regs[i] = caller->regs[i];
5149
5150         clear_caller_saved_regs(env, caller->regs);
5151
5152         /* only increment it after check_reg_arg() finished */
5153         state->curframe++;
5154
5155         /* and go analyze first insn of the callee */
5156         *insn_idx = target_insn;
5157
5158         if (env->log.level & BPF_LOG_LEVEL) {
5159                 verbose(env, "caller:\n");
5160                 print_verifier_state(env, caller);
5161                 verbose(env, "callee:\n");
5162                 print_verifier_state(env, callee);
5163         }
5164         return 0;
5165 }
5166
5167 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5168 {
5169         struct bpf_verifier_state *state = env->cur_state;
5170         struct bpf_func_state *caller, *callee;
5171         struct bpf_reg_state *r0;
5172         int err;
5173
5174         callee = state->frame[state->curframe];
5175         r0 = &callee->regs[BPF_REG_0];
5176         if (r0->type == PTR_TO_STACK) {
5177                 /* technically it's ok to return caller's stack pointer
5178                  * (or caller's caller's pointer) back to the caller,
5179                  * since these pointers are valid. Only current stack
5180                  * pointer will be invalid as soon as function exits,
5181                  * but let's be conservative
5182                  */
5183                 verbose(env, "cannot return stack pointer to the caller\n");
5184                 return -EINVAL;
5185         }
5186
5187         state->curframe--;
5188         caller = state->frame[state->curframe];
5189         /* return to the caller whatever r0 had in the callee */
5190         caller->regs[BPF_REG_0] = *r0;
5191
5192         /* Transfer references to the caller */
5193         err = transfer_reference_state(caller, callee);
5194         if (err)
5195                 return err;
5196
5197         *insn_idx = callee->callsite + 1;
5198         if (env->log.level & BPF_LOG_LEVEL) {
5199                 verbose(env, "returning from callee:\n");
5200                 print_verifier_state(env, callee);
5201                 verbose(env, "to caller at %d:\n", *insn_idx);
5202                 print_verifier_state(env, caller);
5203         }
5204         /* clear everything in the callee */
5205         free_func_state(callee);
5206         state->frame[state->curframe + 1] = NULL;
5207         return 0;
5208 }
5209
5210 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
5211                                    int func_id,
5212                                    struct bpf_call_arg_meta *meta)
5213 {
5214         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
5215
5216         if (ret_type != RET_INTEGER ||
5217             (func_id != BPF_FUNC_get_stack &&
5218              func_id != BPF_FUNC_probe_read_str &&
5219              func_id != BPF_FUNC_probe_read_kernel_str &&
5220              func_id != BPF_FUNC_probe_read_user_str))
5221                 return;
5222
5223         ret_reg->smax_value = meta->msize_max_value;
5224         ret_reg->s32_max_value = meta->msize_max_value;
5225         ret_reg->smin_value = -MAX_ERRNO;
5226         ret_reg->s32_min_value = -MAX_ERRNO;
5227         __reg_deduce_bounds(ret_reg);
5228         __reg_bound_offset(ret_reg);
5229         __update_reg_bounds(ret_reg);
5230 }
5231
5232 static int
5233 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5234                 int func_id, int insn_idx)
5235 {
5236         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5237         struct bpf_map *map = meta->map_ptr;
5238
5239         if (func_id != BPF_FUNC_tail_call &&
5240             func_id != BPF_FUNC_map_lookup_elem &&
5241             func_id != BPF_FUNC_map_update_elem &&
5242             func_id != BPF_FUNC_map_delete_elem &&
5243             func_id != BPF_FUNC_map_push_elem &&
5244             func_id != BPF_FUNC_map_pop_elem &&
5245             func_id != BPF_FUNC_map_peek_elem)
5246                 return 0;
5247
5248         if (map == NULL) {
5249                 verbose(env, "kernel subsystem misconfigured verifier\n");
5250                 return -EINVAL;
5251         }
5252
5253         /* In case of read-only, some additional restrictions
5254          * need to be applied in order to prevent altering the
5255          * state of the map from program side.
5256          */
5257         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5258             (func_id == BPF_FUNC_map_delete_elem ||
5259              func_id == BPF_FUNC_map_update_elem ||
5260              func_id == BPF_FUNC_map_push_elem ||
5261              func_id == BPF_FUNC_map_pop_elem)) {
5262                 verbose(env, "write into map forbidden\n");
5263                 return -EACCES;
5264         }
5265
5266         if (!BPF_MAP_PTR(aux->map_ptr_state))
5267                 bpf_map_ptr_store(aux, meta->map_ptr,
5268                                   !meta->map_ptr->bypass_spec_v1);
5269         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
5270                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
5271                                   !meta->map_ptr->bypass_spec_v1);
5272         return 0;
5273 }
5274
5275 static int
5276 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5277                 int func_id, int insn_idx)
5278 {
5279         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5280         struct bpf_reg_state *regs = cur_regs(env), *reg;
5281         struct bpf_map *map = meta->map_ptr;
5282         struct tnum range;
5283         u64 val;
5284         int err;
5285
5286         if (func_id != BPF_FUNC_tail_call)
5287                 return 0;
5288         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5289                 verbose(env, "kernel subsystem misconfigured verifier\n");
5290                 return -EINVAL;
5291         }
5292
5293         range = tnum_range(0, map->max_entries - 1);
5294         reg = &regs[BPF_REG_3];
5295
5296         if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
5297                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5298                 return 0;
5299         }
5300
5301         err = mark_chain_precision(env, BPF_REG_3);
5302         if (err)
5303                 return err;
5304
5305         val = reg->var_off.value;
5306         if (bpf_map_key_unseen(aux))
5307                 bpf_map_key_store(aux, val);
5308         else if (!bpf_map_key_poisoned(aux) &&
5309                   bpf_map_key_immediate(aux) != val)
5310                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5311         return 0;
5312 }
5313
5314 static int check_reference_leak(struct bpf_verifier_env *env)
5315 {
5316         struct bpf_func_state *state = cur_func(env);
5317         int i;
5318
5319         for (i = 0; i < state->acquired_refs; i++) {
5320                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
5321                         state->refs[i].id, state->refs[i].insn_idx);
5322         }
5323         return state->acquired_refs ? -EINVAL : 0;
5324 }
5325
5326 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5327 {
5328         const struct bpf_func_proto *fn = NULL;
5329         struct bpf_reg_state *regs;
5330         struct bpf_call_arg_meta meta;
5331         bool changes_data;
5332         int i, err;
5333
5334         /* find function prototype */
5335         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5336                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
5337                         func_id);
5338                 return -EINVAL;
5339         }
5340
5341         if (env->ops->get_func_proto)
5342                 fn = env->ops->get_func_proto(func_id, env->prog);
5343         if (!fn) {
5344                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
5345                         func_id);
5346                 return -EINVAL;
5347         }
5348
5349         /* eBPF programs must be GPL compatible to use GPL-ed functions */
5350         if (!env->prog->gpl_compatible && fn->gpl_only) {
5351                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5352                 return -EINVAL;
5353         }
5354
5355         if (fn->allowed && !fn->allowed(env->prog)) {
5356                 verbose(env, "helper call is not allowed in probe\n");
5357                 return -EINVAL;
5358         }
5359
5360         /* With LD_ABS/IND some JITs save/restore skb from r1. */
5361         changes_data = bpf_helper_changes_pkt_data(fn->func);
5362         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5363                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
5364                         func_id_name(func_id), func_id);
5365                 return -EINVAL;
5366         }
5367
5368         memset(&meta, 0, sizeof(meta));
5369         meta.pkt_access = fn->pkt_access;
5370
5371         err = check_func_proto(fn, func_id);
5372         if (err) {
5373                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
5374                         func_id_name(func_id), func_id);
5375                 return err;
5376         }
5377
5378         meta.func_id = func_id;
5379         /* check args */
5380         for (i = 0; i < 5; i++) {
5381                 err = check_func_arg(env, i, &meta, fn);
5382                 if (err)
5383                         return err;
5384         }
5385
5386         err = record_func_map(env, &meta, func_id, insn_idx);
5387         if (err)
5388                 return err;
5389
5390         err = record_func_key(env, &meta, func_id, insn_idx);
5391         if (err)
5392                 return err;
5393
5394         /* Mark slots with STACK_MISC in case of raw mode, stack offset
5395          * is inferred from register state.
5396          */
5397         for (i = 0; i < meta.access_size; i++) {
5398                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
5399                                        BPF_WRITE, -1, false);
5400                 if (err)
5401                         return err;
5402         }
5403
5404         if (func_id == BPF_FUNC_tail_call) {
5405                 err = check_reference_leak(env);
5406                 if (err) {
5407                         verbose(env, "tail_call would lead to reference leak\n");
5408                         return err;
5409                 }
5410         } else if (is_release_function(func_id)) {
5411                 err = release_reference(env, meta.ref_obj_id);
5412                 if (err) {
5413                         verbose(env, "func %s#%d reference has not been acquired before\n",
5414                                 func_id_name(func_id), func_id);
5415                         return err;
5416                 }
5417         }
5418
5419         regs = cur_regs(env);
5420
5421         /* check that flags argument in get_local_storage(map, flags) is 0,
5422          * this is required because get_local_storage() can't return an error.
5423          */
5424         if (func_id == BPF_FUNC_get_local_storage &&
5425             !register_is_null(&regs[BPF_REG_2])) {
5426                 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5427                 return -EINVAL;
5428         }
5429
5430         /* reset caller saved regs */
5431         for (i = 0; i < CALLER_SAVED_REGS; i++) {
5432                 mark_reg_not_init(env, regs, caller_saved[i]);
5433                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5434         }
5435
5436         /* helper call returns 64-bit value. */
5437         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5438
5439         /* update return register (already marked as written above) */
5440         if (fn->ret_type == RET_INTEGER) {
5441                 /* sets type to SCALAR_VALUE */
5442                 mark_reg_unknown(env, regs, BPF_REG_0);
5443         } else if (fn->ret_type == RET_VOID) {
5444                 regs[BPF_REG_0].type = NOT_INIT;
5445         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
5446                    fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5447                 /* There is no offset yet applied, variable or fixed */
5448                 mark_reg_known_zero(env, regs, BPF_REG_0);
5449                 /* remember map_ptr, so that check_map_access()
5450                  * can check 'value_size' boundary of memory access
5451                  * to map element returned from bpf_map_lookup_elem()
5452                  */
5453                 if (meta.map_ptr == NULL) {
5454                         verbose(env,
5455                                 "kernel subsystem misconfigured verifier\n");
5456                         return -EINVAL;
5457                 }
5458                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
5459                 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5460                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
5461                         if (map_value_has_spin_lock(meta.map_ptr))
5462                                 regs[BPF_REG_0].id = ++env->id_gen;
5463                 } else {
5464                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
5465                 }
5466         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
5467                 mark_reg_known_zero(env, regs, BPF_REG_0);
5468                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
5469         } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
5470                 mark_reg_known_zero(env, regs, BPF_REG_0);
5471                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
5472         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
5473                 mark_reg_known_zero(env, regs, BPF_REG_0);
5474                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
5475         } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
5476                 mark_reg_known_zero(env, regs, BPF_REG_0);
5477                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
5478                 regs[BPF_REG_0].mem_size = meta.mem_size;
5479         } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
5480                    fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
5481                 const struct btf_type *t;
5482
5483                 mark_reg_known_zero(env, regs, BPF_REG_0);
5484                 t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5485                 if (!btf_type_is_struct(t)) {
5486                         u32 tsize;
5487                         const struct btf_type *ret;
5488                         const char *tname;
5489
5490                         /* resolve the type size of ksym. */
5491                         ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5492                         if (IS_ERR(ret)) {
5493                                 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5494                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
5495                                         tname, PTR_ERR(ret));
5496                                 return -EINVAL;
5497                         }
5498                         regs[BPF_REG_0].type =
5499                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5500                                 PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
5501                         regs[BPF_REG_0].mem_size = tsize;
5502                 } else {
5503                         regs[BPF_REG_0].type =
5504                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5505                                 PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
5506                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5507                 }
5508         } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
5509                 int ret_btf_id;
5510
5511                 mark_reg_known_zero(env, regs, BPF_REG_0);
5512                 regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
5513                 ret_btf_id = *fn->ret_btf_id;
5514                 if (ret_btf_id == 0) {
5515                         verbose(env, "invalid return type %d of func %s#%d\n",
5516                                 fn->ret_type, func_id_name(func_id), func_id);
5517                         return -EINVAL;
5518                 }
5519                 regs[BPF_REG_0].btf_id = ret_btf_id;
5520         } else {
5521                 verbose(env, "unknown return type %d of func %s#%d\n",
5522                         fn->ret_type, func_id_name(func_id), func_id);
5523                 return -EINVAL;
5524         }
5525
5526         if (reg_type_may_be_null(regs[BPF_REG_0].type))
5527                 regs[BPF_REG_0].id = ++env->id_gen;
5528
5529         if (is_ptr_cast_function(func_id)) {
5530                 /* For release_reference() */
5531                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5532         } else if (is_acquire_function(func_id, meta.map_ptr)) {
5533                 int id = acquire_reference_state(env, insn_idx);
5534
5535                 if (id < 0)
5536                         return id;
5537                 /* For mark_ptr_or_null_reg() */
5538                 regs[BPF_REG_0].id = id;
5539                 /* For release_reference() */
5540                 regs[BPF_REG_0].ref_obj_id = id;
5541         }
5542
5543         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5544
5545         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5546         if (err)
5547                 return err;
5548
5549         if ((func_id == BPF_FUNC_get_stack ||
5550              func_id == BPF_FUNC_get_task_stack) &&
5551             !env->prog->has_callchain_buf) {
5552                 const char *err_str;
5553
5554 #ifdef CONFIG_PERF_EVENTS
5555                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
5556                 err_str = "cannot get callchain buffer for func %s#%d\n";
5557 #else
5558                 err = -ENOTSUPP;
5559                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5560 #endif
5561                 if (err) {
5562                         verbose(env, err_str, func_id_name(func_id), func_id);
5563                         return err;
5564                 }
5565
5566                 env->prog->has_callchain_buf = true;
5567         }
5568
5569         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
5570                 env->prog->call_get_stack = true;
5571
5572         if (changes_data)
5573                 clear_all_pkt_pointers(env);
5574         return 0;
5575 }
5576
5577 static bool signed_add_overflows(s64 a, s64 b)
5578 {
5579         /* Do the add in u64, where overflow is well-defined */
5580         s64 res = (s64)((u64)a + (u64)b);
5581
5582         if (b < 0)
5583                 return res > a;
5584         return res < a;
5585 }
5586
5587 static bool signed_add32_overflows(s32 a, s32 b)
5588 {
5589         /* Do the add in u32, where overflow is well-defined */
5590         s32 res = (s32)((u32)a + (u32)b);
5591
5592         if (b < 0)
5593                 return res > a;
5594         return res < a;
5595 }
5596
5597 static bool signed_sub_overflows(s64 a, s64 b)
5598 {
5599         /* Do the sub in u64, where overflow is well-defined */
5600         s64 res = (s64)((u64)a - (u64)b);
5601
5602         if (b < 0)
5603                 return res < a;
5604         return res > a;
5605 }
5606
5607 static bool signed_sub32_overflows(s32 a, s32 b)
5608 {
5609         /* Do the sub in u32, where overflow is well-defined */
5610         s32 res = (s32)((u32)a - (u32)b);
5611
5612         if (b < 0)
5613                 return res < a;
5614         return res > a;
5615 }
5616
5617 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
5618                                   const struct bpf_reg_state *reg,
5619                                   enum bpf_reg_type type)
5620 {
5621         bool known = tnum_is_const(reg->var_off);
5622         s64 val = reg->var_off.value;
5623         s64 smin = reg->smin_value;
5624
5625         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5626                 verbose(env, "math between %s pointer and %lld is not allowed\n",
5627                         reg_type_str[type], val);
5628                 return false;
5629         }
5630
5631         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5632                 verbose(env, "%s pointer offset %d is not allowed\n",
5633                         reg_type_str[type], reg->off);
5634                 return false;
5635         }
5636
5637         if (smin == S64_MIN) {
5638                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5639                         reg_type_str[type]);
5640                 return false;
5641         }
5642
5643         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5644                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
5645                         smin, reg_type_str[type]);
5646                 return false;
5647         }
5648
5649         return true;
5650 }
5651
5652 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5653 {
5654         return &env->insn_aux_data[env->insn_idx];
5655 }
5656
5657 enum {
5658         REASON_BOUNDS   = -1,
5659         REASON_TYPE     = -2,
5660         REASON_PATHS    = -3,
5661         REASON_LIMIT    = -4,
5662         REASON_STACK    = -5,
5663 };
5664
5665 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
5666                               u32 *alu_limit, bool mask_to_left)
5667 {
5668         u32 max = 0, ptr_limit = 0;
5669
5670         switch (ptr_reg->type) {
5671         case PTR_TO_STACK:
5672                 /* Offset 0 is out-of-bounds, but acceptable start for the
5673                  * left direction, see BPF_REG_FP. Also, unknown scalar
5674                  * offset where we would need to deal with min/max bounds is
5675                  * currently prohibited for unprivileged.
5676                  */
5677                 max = MAX_BPF_STACK + mask_to_left;
5678                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5679                 break;
5680         case PTR_TO_MAP_VALUE:
5681                 max = ptr_reg->map_ptr->value_size;
5682                 ptr_limit = (mask_to_left ?
5683                              ptr_reg->smin_value :
5684                              ptr_reg->umax_value) + ptr_reg->off;
5685                 break;
5686         default:
5687                 return REASON_TYPE;
5688         }
5689
5690         if (ptr_limit >= max)
5691                 return REASON_LIMIT;
5692         *alu_limit = ptr_limit;
5693         return 0;
5694 }
5695
5696 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
5697                                     const struct bpf_insn *insn)
5698 {
5699         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5700 }
5701
5702 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
5703                                        u32 alu_state, u32 alu_limit)
5704 {
5705         /* If we arrived here from different branches with different
5706          * state or limits to sanitize, then this won't work.
5707          */
5708         if (aux->alu_state &&
5709             (aux->alu_state != alu_state ||
5710              aux->alu_limit != alu_limit))
5711                 return REASON_PATHS;
5712
5713         /* Corresponding fixup done in fixup_bpf_calls(). */
5714         aux->alu_state = alu_state;
5715         aux->alu_limit = alu_limit;
5716         return 0;
5717 }
5718
5719 static int sanitize_val_alu(struct bpf_verifier_env *env,
5720                             struct bpf_insn *insn)
5721 {
5722         struct bpf_insn_aux_data *aux = cur_aux(env);
5723
5724         if (can_skip_alu_sanitation(env, insn))
5725                 return 0;
5726
5727         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5728 }
5729
5730 static bool sanitize_needed(u8 opcode)
5731 {
5732         return opcode == BPF_ADD || opcode == BPF_SUB;
5733 }
5734
5735 struct bpf_sanitize_info {
5736         struct bpf_insn_aux_data aux;
5737         bool mask_to_left;
5738 };
5739
5740 static struct bpf_verifier_state *
5741 sanitize_speculative_path(struct bpf_verifier_env *env,
5742                           const struct bpf_insn *insn,
5743                           u32 next_idx, u32 curr_idx)
5744 {
5745         struct bpf_verifier_state *branch;
5746         struct bpf_reg_state *regs;
5747
5748         branch = push_stack(env, next_idx, curr_idx, true);
5749         if (branch && insn) {
5750                 regs = branch->frame[branch->curframe]->regs;
5751                 if (BPF_SRC(insn->code) == BPF_K) {
5752                         mark_reg_unknown(env, regs, insn->dst_reg);
5753                 } else if (BPF_SRC(insn->code) == BPF_X) {
5754                         mark_reg_unknown(env, regs, insn->dst_reg);
5755                         mark_reg_unknown(env, regs, insn->src_reg);
5756                 }
5757         }
5758         return branch;
5759 }
5760
5761 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
5762                             struct bpf_insn *insn,
5763                             const struct bpf_reg_state *ptr_reg,
5764                             const struct bpf_reg_state *off_reg,
5765                             struct bpf_reg_state *dst_reg,
5766                             struct bpf_sanitize_info *info,
5767                             const bool commit_window)
5768 {
5769         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5770         struct bpf_verifier_state *vstate = env->cur_state;
5771         bool off_is_imm = tnum_is_const(off_reg->var_off);
5772         bool off_is_neg = off_reg->smin_value < 0;
5773         bool ptr_is_dst_reg = ptr_reg == dst_reg;
5774         u8 opcode = BPF_OP(insn->code);
5775         u32 alu_state, alu_limit;
5776         struct bpf_reg_state tmp;
5777         bool ret;
5778         int err;
5779
5780         if (can_skip_alu_sanitation(env, insn))
5781                 return 0;
5782
5783         /* We already marked aux for masking from non-speculative
5784          * paths, thus we got here in the first place. We only care
5785          * to explore bad access from here.
5786          */
5787         if (vstate->speculative)
5788                 goto do_sim;
5789
5790         if (!commit_window) {
5791                 if (!tnum_is_const(off_reg->var_off) &&
5792                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
5793                         return REASON_BOUNDS;
5794
5795                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
5796                                      (opcode == BPF_SUB && !off_is_neg);
5797         }
5798
5799         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5800         if (err < 0)
5801                 return err;
5802
5803         if (commit_window) {
5804                 /* In commit phase we narrow the masking window based on
5805                  * the observed pointer move after the simulated operation.
5806                  */
5807                 alu_state = info->aux.alu_state;
5808                 alu_limit = abs(info->aux.alu_limit - alu_limit);
5809         } else {
5810                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5811                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5812                 alu_state |= ptr_is_dst_reg ?
5813                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5814
5815                 /* Limit pruning on unknown scalars to enable deep search for
5816                  * potential masking differences from other program paths.
5817                  */
5818                 if (!off_is_imm)
5819                         env->explore_alu_limits = true;
5820         }
5821
5822         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5823         if (err < 0)
5824                 return err;
5825 do_sim:
5826         /* If we're in commit phase, we're done here given we already
5827          * pushed the truncated dst_reg into the speculative verification
5828          * stack.
5829          *
5830          * Also, when register is a known constant, we rewrite register-based
5831          * operation to immediate-based, and thus do not need masking (and as
5832          * a consequence, do not need to simulate the zero-truncation either).
5833          */
5834         if (commit_window || off_is_imm)
5835                 return 0;
5836
5837         /* Simulate and find potential out-of-bounds access under
5838          * speculative execution from truncation as a result of
5839          * masking when off was not within expected range. If off
5840          * sits in dst, then we temporarily need to move ptr there
5841          * to simulate dst (== 0) +/-= ptr. Needed, for example,
5842          * for cases where we use K-based arithmetic in one direction
5843          * and truncated reg-based in the other in order to explore
5844          * bad access.
5845          */
5846         if (!ptr_is_dst_reg) {
5847                 tmp = *dst_reg;
5848                 *dst_reg = *ptr_reg;
5849         }
5850         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
5851                                         env->insn_idx);
5852         if (!ptr_is_dst_reg && ret)
5853                 *dst_reg = tmp;
5854         return !ret ? REASON_STACK : 0;
5855 }
5856
5857 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5858 {
5859         struct bpf_verifier_state *vstate = env->cur_state;
5860
5861         /* If we simulate paths under speculation, we don't update the
5862          * insn as 'seen' such that when we verify unreachable paths in
5863          * the non-speculative domain, sanitize_dead_code() can still
5864          * rewrite/sanitize them.
5865          */
5866         if (!vstate->speculative)
5867                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5868 }
5869
5870 static int sanitize_err(struct bpf_verifier_env *env,
5871                         const struct bpf_insn *insn, int reason,
5872                         const struct bpf_reg_state *off_reg,
5873                         const struct bpf_reg_state *dst_reg)
5874 {
5875         static const char *err = "pointer arithmetic with it prohibited for !root";
5876         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5877         u32 dst = insn->dst_reg, src = insn->src_reg;
5878
5879         switch (reason) {
5880         case REASON_BOUNDS:
5881                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
5882                         off_reg == dst_reg ? dst : src, err);
5883                 break;
5884         case REASON_TYPE:
5885                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
5886                         off_reg == dst_reg ? src : dst, err);
5887                 break;
5888         case REASON_PATHS:
5889                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
5890                         dst, op, err);
5891                 break;
5892         case REASON_LIMIT:
5893                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
5894                         dst, op, err);
5895                 break;
5896         case REASON_STACK:
5897                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
5898                         dst, err);
5899                 break;
5900         default:
5901                 verbose(env, "verifier internal error: unknown reason (%d)\n",
5902                         reason);
5903                 break;
5904         }
5905
5906         return -EACCES;
5907 }
5908
5909 /* check that stack access falls within stack limits and that 'reg' doesn't
5910  * have a variable offset.
5911  *
5912  * Variable offset is prohibited for unprivileged mode for simplicity since it
5913  * requires corresponding support in Spectre masking for stack ALU.  See also
5914  * retrieve_ptr_limit().
5915  *
5916  *
5917  * 'off' includes 'reg->off'.
5918  */
5919 static int check_stack_access_for_ptr_arithmetic(
5920                                 struct bpf_verifier_env *env,
5921                                 int regno,
5922                                 const struct bpf_reg_state *reg,
5923                                 int off)
5924 {
5925         if (!tnum_is_const(reg->var_off)) {
5926                 char tn_buf[48];
5927
5928                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5929                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
5930                         regno, tn_buf, off);
5931                 return -EACCES;
5932         }
5933
5934         if (off >= 0 || off < -MAX_BPF_STACK) {
5935                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
5936                         "prohibited for !root; off=%d\n", regno, off);
5937                 return -EACCES;
5938         }
5939
5940         return 0;
5941 }
5942
5943 static int sanitize_check_bounds(struct bpf_verifier_env *env,
5944                                  const struct bpf_insn *insn,
5945                                  const struct bpf_reg_state *dst_reg)
5946 {
5947         u32 dst = insn->dst_reg;
5948
5949         /* For unprivileged we require that resulting offset must be in bounds
5950          * in order to be able to sanitize access later on.
5951          */
5952         if (env->bypass_spec_v1)
5953                 return 0;
5954
5955         switch (dst_reg->type) {
5956         case PTR_TO_STACK:
5957                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
5958                                         dst_reg->off + dst_reg->var_off.value))
5959                         return -EACCES;
5960                 break;
5961         case PTR_TO_MAP_VALUE:
5962                 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5963                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
5964                                 "prohibited for !root\n", dst);
5965                         return -EACCES;
5966                 }
5967                 break;
5968         default:
5969                 break;
5970         }
5971
5972         return 0;
5973 }
5974
5975 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5976  * Caller should also handle BPF_MOV case separately.
5977  * If we return -EACCES, caller may want to try again treating pointer as a
5978  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
5979  */
5980 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
5981                                    struct bpf_insn *insn,
5982                                    const struct bpf_reg_state *ptr_reg,
5983                                    const struct bpf_reg_state *off_reg)
5984 {
5985         struct bpf_verifier_state *vstate = env->cur_state;
5986         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5987         struct bpf_reg_state *regs = state->regs, *dst_reg;
5988         bool known = tnum_is_const(off_reg->var_off);
5989         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
5990             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
5991         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
5992             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
5993         struct bpf_sanitize_info info = {};
5994         u8 opcode = BPF_OP(insn->code);
5995         u32 dst = insn->dst_reg;
5996         int ret;
5997
5998         dst_reg = &regs[dst];
5999
6000         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
6001             smin_val > smax_val || umin_val > umax_val) {
6002                 /* Taint dst register if offset had invalid bounds derived from
6003                  * e.g. dead branches.
6004                  */
6005                 __mark_reg_unknown(env, dst_reg);
6006                 return 0;
6007         }
6008
6009         if (BPF_CLASS(insn->code) != BPF_ALU64) {
6010                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
6011                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6012                         __mark_reg_unknown(env, dst_reg);
6013                         return 0;
6014                 }
6015
6016                 verbose(env,
6017                         "R%d 32-bit pointer arithmetic prohibited\n",
6018                         dst);
6019                 return -EACCES;
6020         }
6021
6022         switch (ptr_reg->type) {
6023         case PTR_TO_MAP_VALUE_OR_NULL:
6024                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
6025                         dst, reg_type_str[ptr_reg->type]);
6026                 return -EACCES;
6027         case CONST_PTR_TO_MAP:
6028                 /* smin_val represents the known value */
6029                 if (known && smin_val == 0 && opcode == BPF_ADD)
6030                         break;
6031                 fallthrough;
6032         case PTR_TO_PACKET_END:
6033         case PTR_TO_SOCKET:
6034         case PTR_TO_SOCKET_OR_NULL:
6035         case PTR_TO_SOCK_COMMON:
6036         case PTR_TO_SOCK_COMMON_OR_NULL:
6037         case PTR_TO_TCP_SOCK:
6038         case PTR_TO_TCP_SOCK_OR_NULL:
6039         case PTR_TO_XDP_SOCK:
6040                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
6041                         dst, reg_type_str[ptr_reg->type]);
6042                 return -EACCES;
6043         default:
6044                 break;
6045         }
6046
6047         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6048          * The id may be overwritten later if we create a new variable offset.
6049          */
6050         dst_reg->type = ptr_reg->type;
6051         dst_reg->id = ptr_reg->id;
6052
6053         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
6054             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
6055                 return -EINVAL;
6056
6057         /* pointer types do not carry 32-bit bounds at the moment. */
6058         __mark_reg32_unbounded(dst_reg);
6059
6060         if (sanitize_needed(opcode)) {
6061                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
6062                                        &info, false);
6063                 if (ret < 0)
6064                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
6065         }
6066
6067         switch (opcode) {
6068         case BPF_ADD:
6069                 /* We can take a fixed offset as long as it doesn't overflow
6070                  * the s32 'off' field
6071                  */
6072                 if (known && (ptr_reg->off + smin_val ==
6073                               (s64)(s32)(ptr_reg->off + smin_val))) {
6074                         /* pointer += K.  Accumulate it into fixed offset */
6075                         dst_reg->smin_value = smin_ptr;
6076                         dst_reg->smax_value = smax_ptr;
6077                         dst_reg->umin_value = umin_ptr;
6078                         dst_reg->umax_value = umax_ptr;
6079                         dst_reg->var_off = ptr_reg->var_off;
6080                         dst_reg->off = ptr_reg->off + smin_val;
6081                         dst_reg->raw = ptr_reg->raw;
6082                         break;
6083                 }
6084                 /* A new variable offset is created.  Note that off_reg->off
6085                  * == 0, since it's a scalar.
6086                  * dst_reg gets the pointer type and since some positive
6087                  * integer value was added to the pointer, give it a new 'id'
6088                  * if it's a PTR_TO_PACKET.
6089                  * this creates a new 'base' pointer, off_reg (variable) gets
6090                  * added into the variable offset, and we copy the fixed offset
6091                  * from ptr_reg.
6092                  */
6093                 if (signed_add_overflows(smin_ptr, smin_val) ||
6094                     signed_add_overflows(smax_ptr, smax_val)) {
6095                         dst_reg->smin_value = S64_MIN;
6096                         dst_reg->smax_value = S64_MAX;
6097                 } else {
6098                         dst_reg->smin_value = smin_ptr + smin_val;
6099                         dst_reg->smax_value = smax_ptr + smax_val;
6100                 }
6101                 if (umin_ptr + umin_val < umin_ptr ||
6102                     umax_ptr + umax_val < umax_ptr) {
6103                         dst_reg->umin_value = 0;
6104                         dst_reg->umax_value = U64_MAX;
6105                 } else {
6106                         dst_reg->umin_value = umin_ptr + umin_val;
6107                         dst_reg->umax_value = umax_ptr + umax_val;
6108                 }
6109                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6110                 dst_reg->off = ptr_reg->off;
6111                 dst_reg->raw = ptr_reg->raw;
6112                 if (reg_is_pkt_pointer(ptr_reg)) {
6113                         dst_reg->id = ++env->id_gen;
6114                         /* something was added to pkt_ptr, set range to zero */
6115                         dst_reg->raw = 0;
6116                 }
6117                 break;
6118         case BPF_SUB:
6119                 if (dst_reg == off_reg) {
6120                         /* scalar -= pointer.  Creates an unknown scalar */
6121                         verbose(env, "R%d tried to subtract pointer from scalar\n",
6122                                 dst);
6123                         return -EACCES;
6124                 }
6125                 /* We don't allow subtraction from FP, because (according to
6126                  * test_verifier.c test "invalid fp arithmetic", JITs might not
6127                  * be able to deal with it.
6128                  */
6129                 if (ptr_reg->type == PTR_TO_STACK) {
6130                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
6131                                 dst);
6132                         return -EACCES;
6133                 }
6134                 if (known && (ptr_reg->off - smin_val ==
6135                               (s64)(s32)(ptr_reg->off - smin_val))) {
6136                         /* pointer -= K.  Subtract it from fixed offset */
6137                         dst_reg->smin_value = smin_ptr;
6138                         dst_reg->smax_value = smax_ptr;
6139                         dst_reg->umin_value = umin_ptr;
6140                         dst_reg->umax_value = umax_ptr;
6141                         dst_reg->var_off = ptr_reg->var_off;
6142                         dst_reg->id = ptr_reg->id;
6143                         dst_reg->off = ptr_reg->off - smin_val;
6144                         dst_reg->raw = ptr_reg->raw;
6145                         break;
6146                 }
6147                 /* A new variable offset is created.  If the subtrahend is known
6148                  * nonnegative, then any reg->range we had before is still good.
6149                  */
6150                 if (signed_sub_overflows(smin_ptr, smax_val) ||
6151                     signed_sub_overflows(smax_ptr, smin_val)) {
6152                         /* Overflow possible, we know nothing */
6153                         dst_reg->smin_value = S64_MIN;
6154                         dst_reg->smax_value = S64_MAX;
6155                 } else {
6156                         dst_reg->smin_value = smin_ptr - smax_val;
6157                         dst_reg->smax_value = smax_ptr - smin_val;
6158                 }
6159                 if (umin_ptr < umax_val) {
6160                         /* Overflow possible, we know nothing */
6161                         dst_reg->umin_value = 0;
6162                         dst_reg->umax_value = U64_MAX;
6163                 } else {
6164                         /* Cannot overflow (as long as bounds are consistent) */
6165                         dst_reg->umin_value = umin_ptr - umax_val;
6166                         dst_reg->umax_value = umax_ptr - umin_val;
6167                 }
6168                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6169                 dst_reg->off = ptr_reg->off;
6170                 dst_reg->raw = ptr_reg->raw;
6171                 if (reg_is_pkt_pointer(ptr_reg)) {
6172                         dst_reg->id = ++env->id_gen;
6173                         /* something was added to pkt_ptr, set range to zero */
6174                         if (smin_val < 0)
6175                                 dst_reg->raw = 0;
6176                 }
6177                 break;
6178         case BPF_AND:
6179         case BPF_OR:
6180         case BPF_XOR:
6181                 /* bitwise ops on pointers are troublesome, prohibit. */
6182                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
6183                         dst, bpf_alu_string[opcode >> 4]);
6184                 return -EACCES;
6185         default:
6186                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
6187                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
6188                         dst, bpf_alu_string[opcode >> 4]);
6189                 return -EACCES;
6190         }
6191
6192         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
6193                 return -EINVAL;
6194
6195         __update_reg_bounds(dst_reg);
6196         __reg_deduce_bounds(dst_reg);
6197         __reg_bound_offset(dst_reg);
6198
6199         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
6200                 return -EACCES;
6201         if (sanitize_needed(opcode)) {
6202                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
6203                                        &info, true);
6204                 if (ret < 0)
6205                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
6206         }
6207
6208         return 0;
6209 }
6210
6211 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
6212                                  struct bpf_reg_state *src_reg)
6213 {
6214         s32 smin_val = src_reg->s32_min_value;
6215         s32 smax_val = src_reg->s32_max_value;
6216         u32 umin_val = src_reg->u32_min_value;
6217         u32 umax_val = src_reg->u32_max_value;
6218
6219         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6220             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6221                 dst_reg->s32_min_value = S32_MIN;
6222                 dst_reg->s32_max_value = S32_MAX;
6223         } else {
6224                 dst_reg->s32_min_value += smin_val;
6225                 dst_reg->s32_max_value += smax_val;
6226         }
6227         if (dst_reg->u32_min_value + umin_val < umin_val ||
6228             dst_reg->u32_max_value + umax_val < umax_val) {
6229                 dst_reg->u32_min_value = 0;
6230                 dst_reg->u32_max_value = U32_MAX;
6231         } else {
6232                 dst_reg->u32_min_value += umin_val;
6233                 dst_reg->u32_max_value += umax_val;
6234         }
6235 }
6236
6237 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
6238                                struct bpf_reg_state *src_reg)
6239 {
6240         s64 smin_val = src_reg->smin_value;
6241         s64 smax_val = src_reg->smax_value;
6242         u64 umin_val = src_reg->umin_value;
6243         u64 umax_val = src_reg->umax_value;
6244
6245         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
6246             signed_add_overflows(dst_reg->smax_value, smax_val)) {
6247                 dst_reg->smin_value = S64_MIN;
6248                 dst_reg->smax_value = S64_MAX;
6249         } else {
6250                 dst_reg->smin_value += smin_val;
6251                 dst_reg->smax_value += smax_val;
6252         }
6253         if (dst_reg->umin_value + umin_val < umin_val ||
6254             dst_reg->umax_value + umax_val < umax_val) {
6255                 dst_reg->umin_value = 0;
6256                 dst_reg->umax_value = U64_MAX;
6257         } else {
6258                 dst_reg->umin_value += umin_val;
6259                 dst_reg->umax_value += umax_val;
6260         }
6261 }
6262
6263 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
6264                                  struct bpf_reg_state *src_reg)
6265 {
6266         s32 smin_val = src_reg->s32_min_value;
6267         s32 smax_val = src_reg->s32_max_value;
6268         u32 umin_val = src_reg->u32_min_value;
6269         u32 umax_val = src_reg->u32_max_value;
6270
6271         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6272             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6273                 /* Overflow possible, we know nothing */
6274                 dst_reg->s32_min_value = S32_MIN;
6275                 dst_reg->s32_max_value = S32_MAX;
6276         } else {
6277                 dst_reg->s32_min_value -= smax_val;
6278                 dst_reg->s32_max_value -= smin_val;
6279         }
6280         if (dst_reg->u32_min_value < umax_val) {
6281                 /* Overflow possible, we know nothing */
6282                 dst_reg->u32_min_value = 0;
6283                 dst_reg->u32_max_value = U32_MAX;
6284         } else {
6285                 /* Cannot overflow (as long as bounds are consistent) */
6286                 dst_reg->u32_min_value -= umax_val;
6287                 dst_reg->u32_max_value -= umin_val;
6288         }
6289 }
6290
6291 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
6292                                struct bpf_reg_state *src_reg)
6293 {
6294         s64 smin_val = src_reg->smin_value;
6295         s64 smax_val = src_reg->smax_value;
6296         u64 umin_val = src_reg->umin_value;
6297         u64 umax_val = src_reg->umax_value;
6298
6299         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
6300             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6301                 /* Overflow possible, we know nothing */
6302                 dst_reg->smin_value = S64_MIN;
6303                 dst_reg->smax_value = S64_MAX;
6304         } else {
6305                 dst_reg->smin_value -= smax_val;
6306                 dst_reg->smax_value -= smin_val;
6307         }
6308         if (dst_reg->umin_value < umax_val) {
6309                 /* Overflow possible, we know nothing */
6310                 dst_reg->umin_value = 0;
6311                 dst_reg->umax_value = U64_MAX;
6312         } else {
6313                 /* Cannot overflow (as long as bounds are consistent) */
6314                 dst_reg->umin_value -= umax_val;
6315                 dst_reg->umax_value -= umin_val;
6316         }
6317 }
6318
6319 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
6320                                  struct bpf_reg_state *src_reg)
6321 {
6322         s32 smin_val = src_reg->s32_min_value;
6323         u32 umin_val = src_reg->u32_min_value;
6324         u32 umax_val = src_reg->u32_max_value;
6325
6326         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6327                 /* Ain't nobody got time to multiply that sign */
6328                 __mark_reg32_unbounded(dst_reg);
6329                 return;
6330         }
6331         /* Both values are positive, so we can work with unsigned and
6332          * copy the result to signed (unless it exceeds S32_MAX).
6333          */
6334         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6335                 /* Potential overflow, we know nothing */
6336                 __mark_reg32_unbounded(dst_reg);
6337                 return;
6338         }
6339         dst_reg->u32_min_value *= umin_val;
6340         dst_reg->u32_max_value *= umax_val;
6341         if (dst_reg->u32_max_value > S32_MAX) {
6342                 /* Overflow possible, we know nothing */
6343                 dst_reg->s32_min_value = S32_MIN;
6344                 dst_reg->s32_max_value = S32_MAX;
6345         } else {
6346                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6347                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6348         }
6349 }
6350
6351 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
6352                                struct bpf_reg_state *src_reg)
6353 {
6354         s64 smin_val = src_reg->smin_value;
6355         u64 umin_val = src_reg->umin_value;
6356         u64 umax_val = src_reg->umax_value;
6357
6358         if (smin_val < 0 || dst_reg->smin_value < 0) {
6359                 /* Ain't nobody got time to multiply that sign */
6360                 __mark_reg64_unbounded(dst_reg);
6361                 return;
6362         }
6363         /* Both values are positive, so we can work with unsigned and
6364          * copy the result to signed (unless it exceeds S64_MAX).
6365          */
6366         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6367                 /* Potential overflow, we know nothing */
6368                 __mark_reg64_unbounded(dst_reg);
6369                 return;
6370         }
6371         dst_reg->umin_value *= umin_val;
6372         dst_reg->umax_value *= umax_val;
6373         if (dst_reg->umax_value > S64_MAX) {
6374                 /* Overflow possible, we know nothing */
6375                 dst_reg->smin_value = S64_MIN;
6376                 dst_reg->smax_value = S64_MAX;
6377         } else {
6378                 dst_reg->smin_value = dst_reg->umin_value;
6379                 dst_reg->smax_value = dst_reg->umax_value;
6380         }
6381 }
6382
6383 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
6384                                  struct bpf_reg_state *src_reg)
6385 {
6386         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6387         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6388         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6389         s32 smin_val = src_reg->s32_min_value;
6390         u32 umax_val = src_reg->u32_max_value;
6391
6392         if (src_known && dst_known) {
6393                 __mark_reg32_known(dst_reg, var32_off.value);
6394                 return;
6395         }
6396
6397         /* We get our minimum from the var_off, since that's inherently
6398          * bitwise.  Our maximum is the minimum of the operands' maxima.
6399          */
6400         dst_reg->u32_min_value = var32_off.value;
6401         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6402         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6403                 /* Lose signed bounds when ANDing negative numbers,
6404                  * ain't nobody got time for that.
6405                  */
6406                 dst_reg->s32_min_value = S32_MIN;
6407                 dst_reg->s32_max_value = S32_MAX;
6408         } else {
6409                 /* ANDing two positives gives a positive, so safe to
6410                  * cast result into s64.
6411                  */
6412                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6413                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6414         }
6415 }
6416
6417 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
6418                                struct bpf_reg_state *src_reg)
6419 {
6420         bool src_known = tnum_is_const(src_reg->var_off);
6421         bool dst_known = tnum_is_const(dst_reg->var_off);
6422         s64 smin_val = src_reg->smin_value;
6423         u64 umax_val = src_reg->umax_value;
6424
6425         if (src_known && dst_known) {
6426                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6427                 return;
6428         }
6429
6430         /* We get our minimum from the var_off, since that's inherently
6431          * bitwise.  Our maximum is the minimum of the operands' maxima.
6432          */
6433         dst_reg->umin_value = dst_reg->var_off.value;
6434         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6435         if (dst_reg->smin_value < 0 || smin_val < 0) {
6436                 /* Lose signed bounds when ANDing negative numbers,
6437                  * ain't nobody got time for that.
6438                  */
6439                 dst_reg->smin_value = S64_MIN;
6440                 dst_reg->smax_value = S64_MAX;
6441         } else {
6442                 /* ANDing two positives gives a positive, so safe to
6443                  * cast result into s64.
6444                  */
6445                 dst_reg->smin_value = dst_reg->umin_value;
6446                 dst_reg->smax_value = dst_reg->umax_value;
6447         }
6448         /* We may learn something more from the var_off */
6449         __update_reg_bounds(dst_reg);
6450 }
6451
6452 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
6453                                 struct bpf_reg_state *src_reg)
6454 {
6455         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6456         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6457         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6458         s32 smin_val = src_reg->s32_min_value;
6459         u32 umin_val = src_reg->u32_min_value;
6460
6461         if (src_known && dst_known) {
6462                 __mark_reg32_known(dst_reg, var32_off.value);
6463                 return;
6464         }
6465
6466         /* We get our maximum from the var_off, and our minimum is the
6467          * maximum of the operands' minima
6468          */
6469         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6470         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6471         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6472                 /* Lose signed bounds when ORing negative numbers,
6473                  * ain't nobody got time for that.
6474                  */
6475                 dst_reg->s32_min_value = S32_MIN;
6476                 dst_reg->s32_max_value = S32_MAX;
6477         } else {
6478                 /* ORing two positives gives a positive, so safe to
6479                  * cast result into s64.
6480                  */
6481                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6482                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6483         }
6484 }
6485
6486 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
6487                               struct bpf_reg_state *src_reg)
6488 {
6489         bool src_known = tnum_is_const(src_reg->var_off);
6490         bool dst_known = tnum_is_const(dst_reg->var_off);
6491         s64 smin_val = src_reg->smin_value;
6492         u64 umin_val = src_reg->umin_value;
6493
6494         if (src_known && dst_known) {
6495                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6496                 return;
6497         }
6498
6499         /* We get our maximum from the var_off, and our minimum is the
6500          * maximum of the operands' minima
6501          */
6502         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6503         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6504         if (dst_reg->smin_value < 0 || smin_val < 0) {
6505                 /* Lose signed bounds when ORing negative numbers,
6506                  * ain't nobody got time for that.
6507                  */
6508                 dst_reg->smin_value = S64_MIN;
6509                 dst_reg->smax_value = S64_MAX;
6510         } else {
6511                 /* ORing two positives gives a positive, so safe to
6512                  * cast result into s64.
6513                  */
6514                 dst_reg->smin_value = dst_reg->umin_value;
6515                 dst_reg->smax_value = dst_reg->umax_value;
6516         }
6517         /* We may learn something more from the var_off */
6518         __update_reg_bounds(dst_reg);
6519 }
6520
6521 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
6522                                  struct bpf_reg_state *src_reg)
6523 {
6524         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6525         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6526         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6527         s32 smin_val = src_reg->s32_min_value;
6528
6529         if (src_known && dst_known) {
6530                 __mark_reg32_known(dst_reg, var32_off.value);
6531                 return;
6532         }
6533
6534         /* We get both minimum and maximum from the var32_off. */
6535         dst_reg->u32_min_value = var32_off.value;
6536         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6537
6538         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6539                 /* XORing two positive sign numbers gives a positive,
6540                  * so safe to cast u32 result into s32.
6541                  */
6542                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6543                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6544         } else {
6545                 dst_reg->s32_min_value = S32_MIN;
6546                 dst_reg->s32_max_value = S32_MAX;
6547         }
6548 }
6549
6550 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
6551                                struct bpf_reg_state *src_reg)
6552 {
6553         bool src_known = tnum_is_const(src_reg->var_off);
6554         bool dst_known = tnum_is_const(dst_reg->var_off);
6555         s64 smin_val = src_reg->smin_value;
6556
6557         if (src_known && dst_known) {
6558                 /* dst_reg->var_off.value has been updated earlier */
6559                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6560                 return;
6561         }
6562
6563         /* We get both minimum and maximum from the var_off. */
6564         dst_reg->umin_value = dst_reg->var_off.value;
6565         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6566
6567         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6568                 /* XORing two positive sign numbers gives a positive,
6569                  * so safe to cast u64 result into s64.
6570                  */
6571                 dst_reg->smin_value = dst_reg->umin_value;
6572                 dst_reg->smax_value = dst_reg->umax_value;
6573         } else {
6574                 dst_reg->smin_value = S64_MIN;
6575                 dst_reg->smax_value = S64_MAX;
6576         }
6577
6578         __update_reg_bounds(dst_reg);
6579 }
6580
6581 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6582                                    u64 umin_val, u64 umax_val)
6583 {
6584         /* We lose all sign bit information (except what we can pick
6585          * up from var_off)
6586          */
6587         dst_reg->s32_min_value = S32_MIN;
6588         dst_reg->s32_max_value = S32_MAX;
6589         /* If we might shift our top bit out, then we know nothing */
6590         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
6591                 dst_reg->u32_min_value = 0;
6592                 dst_reg->u32_max_value = U32_MAX;
6593         } else {
6594                 dst_reg->u32_min_value <<= umin_val;
6595                 dst_reg->u32_max_value <<= umax_val;
6596         }
6597 }
6598
6599 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6600                                  struct bpf_reg_state *src_reg)
6601 {
6602         u32 umax_val = src_reg->u32_max_value;
6603         u32 umin_val = src_reg->u32_min_value;
6604         /* u32 alu operation will zext upper bits */
6605         struct tnum subreg = tnum_subreg(dst_reg->var_off);
6606
6607         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6608         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6609         /* Not required but being careful mark reg64 bounds as unknown so
6610          * that we are forced to pick them up from tnum and zext later and
6611          * if some path skips this step we are still safe.
6612          */
6613         __mark_reg64_unbounded(dst_reg);
6614         __update_reg32_bounds(dst_reg);
6615 }
6616
6617 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
6618                                    u64 umin_val, u64 umax_val)
6619 {
6620         /* Special case <<32 because it is a common compiler pattern to sign
6621          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6622          * positive we know this shift will also be positive so we can track
6623          * bounds correctly. Otherwise we lose all sign bit information except
6624          * what we can pick up from var_off. Perhaps we can generalize this
6625          * later to shifts of any length.
6626          */
6627         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
6628                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
6629         else
6630                 dst_reg->smax_value = S64_MAX;
6631
6632         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
6633                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
6634         else
6635                 dst_reg->smin_value = S64_MIN;
6636
6637         /* If we might shift our top bit out, then we know nothing */
6638         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
6639                 dst_reg->umin_value = 0;
6640                 dst_reg->umax_value = U64_MAX;
6641         } else {
6642                 dst_reg->umin_value <<= umin_val;
6643                 dst_reg->umax_value <<= umax_val;
6644         }
6645 }
6646
6647 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
6648                                struct bpf_reg_state *src_reg)
6649 {
6650         u64 umax_val = src_reg->umax_value;
6651         u64 umin_val = src_reg->umin_value;
6652
6653         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6654         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6655         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6656
6657         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6658         /* We may learn something more from the var_off */
6659         __update_reg_bounds(dst_reg);
6660 }
6661
6662 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
6663                                  struct bpf_reg_state *src_reg)
6664 {
6665         struct tnum subreg = tnum_subreg(dst_reg->var_off);
6666         u32 umax_val = src_reg->u32_max_value;
6667         u32 umin_val = src_reg->u32_min_value;
6668
6669         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6670          * be negative, then either:
6671          * 1) src_reg might be zero, so the sign bit of the result is
6672          *    unknown, so we lose our signed bounds
6673          * 2) it's known negative, thus the unsigned bounds capture the
6674          *    signed bounds
6675          * 3) the signed bounds cross zero, so they tell us nothing
6676          *    about the result
6677          * If the value in dst_reg is known nonnegative, then again the
6678          * unsigned bounts capture the signed bounds.
6679          * Thus, in all cases it suffices to blow away our signed bounds
6680          * and rely on inferring new ones from the unsigned bounds and
6681          * var_off of the result.
6682          */
6683         dst_reg->s32_min_value = S32_MIN;
6684         dst_reg->s32_max_value = S32_MAX;
6685
6686         dst_reg->var_off = tnum_rshift(subreg, umin_val);
6687         dst_reg->u32_min_value >>= umax_val;
6688         dst_reg->u32_max_value >>= umin_val;
6689
6690         __mark_reg64_unbounded(dst_reg);
6691         __update_reg32_bounds(dst_reg);
6692 }
6693
6694 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
6695                                struct bpf_reg_state *src_reg)
6696 {
6697         u64 umax_val = src_reg->umax_value;
6698         u64 umin_val = src_reg->umin_value;
6699
6700         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6701          * be negative, then either:
6702          * 1) src_reg might be zero, so the sign bit of the result is
6703          *    unknown, so we lose our signed bounds
6704          * 2) it's known negative, thus the unsigned bounds capture the
6705          *    signed bounds
6706          * 3) the signed bounds cross zero, so they tell us nothing
6707          *    about the result
6708          * If the value in dst_reg is known nonnegative, then again the
6709          * unsigned bounts capture the signed bounds.
6710          * Thus, in all cases it suffices to blow away our signed bounds
6711          * and rely on inferring new ones from the unsigned bounds and
6712          * var_off of the result.
6713          */
6714         dst_reg->smin_value = S64_MIN;
6715         dst_reg->smax_value = S64_MAX;
6716         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6717         dst_reg->umin_value >>= umax_val;
6718         dst_reg->umax_value >>= umin_val;
6719
6720         /* Its not easy to operate on alu32 bounds here because it depends
6721          * on bits being shifted in. Take easy way out and mark unbounded
6722          * so we can recalculate later from tnum.
6723          */
6724         __mark_reg32_unbounded(dst_reg);
6725         __update_reg_bounds(dst_reg);
6726 }
6727
6728 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
6729                                   struct bpf_reg_state *src_reg)
6730 {
6731         u64 umin_val = src_reg->u32_min_value;
6732
6733         /* Upon reaching here, src_known is true and
6734          * umax_val is equal to umin_val.
6735          */
6736         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6737         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6738
6739         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
6740
6741         /* blow away the dst_reg umin_value/umax_value and rely on
6742          * dst_reg var_off to refine the result.
6743          */
6744         dst_reg->u32_min_value = 0;
6745         dst_reg->u32_max_value = U32_MAX;
6746
6747         __mark_reg64_unbounded(dst_reg);
6748         __update_reg32_bounds(dst_reg);
6749 }
6750
6751 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
6752                                 struct bpf_reg_state *src_reg)
6753 {
6754         u64 umin_val = src_reg->umin_value;
6755
6756         /* Upon reaching here, src_known is true and umax_val is equal
6757          * to umin_val.
6758          */
6759         dst_reg->smin_value >>= umin_val;
6760         dst_reg->smax_value >>= umin_val;
6761
6762         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
6763
6764         /* blow away the dst_reg umin_value/umax_value and rely on
6765          * dst_reg var_off to refine the result.
6766          */
6767         dst_reg->umin_value = 0;
6768         dst_reg->umax_value = U64_MAX;
6769
6770         /* Its not easy to operate on alu32 bounds here because it depends
6771          * on bits being shifted in from upper 32-bits. Take easy way out
6772          * and mark unbounded so we can recalculate later from tnum.
6773          */
6774         __mark_reg32_unbounded(dst_reg);
6775         __update_reg_bounds(dst_reg);
6776 }
6777
6778 /* WARNING: This function does calculations on 64-bit values, but the actual
6779  * execution may occur on 32-bit values. Therefore, things like bitshifts
6780  * need extra checks in the 32-bit case.
6781  */
6782 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
6783                                       struct bpf_insn *insn,
6784                                       struct bpf_reg_state *dst_reg,
6785                                       struct bpf_reg_state src_reg)
6786 {
6787         struct bpf_reg_state *regs = cur_regs(env);
6788         u8 opcode = BPF_OP(insn->code);
6789         bool src_known;
6790         s64 smin_val, smax_val;
6791         u64 umin_val, umax_val;
6792         s32 s32_min_val, s32_max_val;
6793         u32 u32_min_val, u32_max_val;
6794         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
6795         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6796         int ret;
6797
6798         smin_val = src_reg.smin_value;
6799         smax_val = src_reg.smax_value;
6800         umin_val = src_reg.umin_value;
6801         umax_val = src_reg.umax_value;
6802
6803         s32_min_val = src_reg.s32_min_value;
6804         s32_max_val = src_reg.s32_max_value;
6805         u32_min_val = src_reg.u32_min_value;
6806         u32_max_val = src_reg.u32_max_value;
6807
6808         if (alu32) {
6809                 src_known = tnum_subreg_is_const(src_reg.var_off);
6810                 if ((src_known &&
6811                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
6812                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
6813                         /* Taint dst register if offset had invalid bounds
6814                          * derived from e.g. dead branches.
6815                          */
6816                         __mark_reg_unknown(env, dst_reg);
6817                         return 0;
6818                 }
6819         } else {
6820                 src_known = tnum_is_const(src_reg.var_off);
6821                 if ((src_known &&
6822                      (smin_val != smax_val || umin_val != umax_val)) ||
6823                     smin_val > smax_val || umin_val > umax_val) {
6824                         /* Taint dst register if offset had invalid bounds
6825                          * derived from e.g. dead branches.
6826                          */
6827                         __mark_reg_unknown(env, dst_reg);
6828                         return 0;
6829                 }
6830         }
6831
6832         if (!src_known &&
6833             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6834                 __mark_reg_unknown(env, dst_reg);
6835                 return 0;
6836         }
6837
6838         if (sanitize_needed(opcode)) {
6839                 ret = sanitize_val_alu(env, insn);
6840                 if (ret < 0)
6841                         return sanitize_err(env, insn, ret, NULL, NULL);
6842         }
6843
6844         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6845          * There are two classes of instructions: The first class we track both
6846          * alu32 and alu64 sign/unsigned bounds independently this provides the
6847          * greatest amount of precision when alu operations are mixed with jmp32
6848          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6849          * and BPF_OR. This is possible because these ops have fairly easy to
6850          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6851          * See alu32 verifier tests for examples. The second class of
6852          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6853          * with regards to tracking sign/unsigned bounds because the bits may
6854          * cross subreg boundaries in the alu64 case. When this happens we mark
6855          * the reg unbounded in the subreg bound space and use the resulting
6856          * tnum to calculate an approximation of the sign/unsigned bounds.
6857          */
6858         switch (opcode) {
6859         case BPF_ADD:
6860                 scalar32_min_max_add(dst_reg, &src_reg);
6861                 scalar_min_max_add(dst_reg, &src_reg);
6862                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6863                 break;
6864         case BPF_SUB:
6865                 scalar32_min_max_sub(dst_reg, &src_reg);
6866                 scalar_min_max_sub(dst_reg, &src_reg);
6867                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6868                 break;
6869         case BPF_MUL:
6870                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6871                 scalar32_min_max_mul(dst_reg, &src_reg);
6872                 scalar_min_max_mul(dst_reg, &src_reg);
6873                 break;
6874         case BPF_AND:
6875                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6876                 scalar32_min_max_and(dst_reg, &src_reg);
6877                 scalar_min_max_and(dst_reg, &src_reg);
6878                 break;
6879         case BPF_OR:
6880                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6881                 scalar32_min_max_or(dst_reg, &src_reg);
6882                 scalar_min_max_or(dst_reg, &src_reg);
6883                 break;
6884         case BPF_XOR:
6885                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6886                 scalar32_min_max_xor(dst_reg, &src_reg);
6887                 scalar_min_max_xor(dst_reg, &src_reg);
6888                 break;
6889         case BPF_LSH:
6890                 if (umax_val >= insn_bitness) {
6891                         /* Shifts greater than 31 or 63 are undefined.
6892                          * This includes shifts by a negative number.
6893                          */
6894                         mark_reg_unknown(env, regs, insn->dst_reg);
6895                         break;
6896                 }
6897                 if (alu32)
6898                         scalar32_min_max_lsh(dst_reg, &src_reg);
6899                 else
6900                         scalar_min_max_lsh(dst_reg, &src_reg);
6901                 break;
6902         case BPF_RSH:
6903                 if (umax_val >= insn_bitness) {
6904                         /* Shifts greater than 31 or 63 are undefined.
6905                          * This includes shifts by a negative number.
6906                          */
6907                         mark_reg_unknown(env, regs, insn->dst_reg);
6908                         break;
6909                 }
6910                 if (alu32)
6911                         scalar32_min_max_rsh(dst_reg, &src_reg);
6912                 else
6913                         scalar_min_max_rsh(dst_reg, &src_reg);
6914                 break;
6915         case BPF_ARSH:
6916                 if (umax_val >= insn_bitness) {
6917                         /* Shifts greater than 31 or 63 are undefined.
6918                          * This includes shifts by a negative number.
6919                          */
6920                         mark_reg_unknown(env, regs, insn->dst_reg);
6921                         break;
6922                 }
6923                 if (alu32)
6924                         scalar32_min_max_arsh(dst_reg, &src_reg);
6925                 else
6926                         scalar_min_max_arsh(dst_reg, &src_reg);
6927                 break;
6928         default:
6929                 mark_reg_unknown(env, regs, insn->dst_reg);
6930                 break;
6931         }
6932
6933         /* ALU32 ops are zero extended into 64bit register */
6934         if (alu32)
6935                 zext_32_to_64(dst_reg);
6936
6937         __update_reg_bounds(dst_reg);
6938         __reg_deduce_bounds(dst_reg);
6939         __reg_bound_offset(dst_reg);
6940         return 0;
6941 }
6942
6943 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6944  * and var_off.
6945  */
6946 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
6947                                    struct bpf_insn *insn)
6948 {
6949         struct bpf_verifier_state *vstate = env->cur_state;
6950         struct bpf_func_state *state = vstate->frame[vstate->curframe];
6951         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6952         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6953         u8 opcode = BPF_OP(insn->code);
6954         int err;
6955
6956         dst_reg = &regs[insn->dst_reg];
6957         src_reg = NULL;
6958         if (dst_reg->type != SCALAR_VALUE)
6959                 ptr_reg = dst_reg;
6960         else
6961                 /* Make sure ID is cleared otherwise dst_reg min/max could be
6962                  * incorrectly propagated into other registers by find_equal_scalars()
6963                  */
6964                 dst_reg->id = 0;
6965         if (BPF_SRC(insn->code) == BPF_X) {
6966                 src_reg = &regs[insn->src_reg];
6967                 if (src_reg->type != SCALAR_VALUE) {
6968                         if (dst_reg->type != SCALAR_VALUE) {
6969                                 /* Combining two pointers by any ALU op yields
6970                                  * an arbitrary scalar. Disallow all math except
6971                                  * pointer subtraction
6972                                  */
6973                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6974                                         mark_reg_unknown(env, regs, insn->dst_reg);
6975                                         return 0;
6976                                 }
6977                                 verbose(env, "R%d pointer %s pointer prohibited\n",
6978                                         insn->dst_reg,
6979                                         bpf_alu_string[opcode >> 4]);
6980                                 return -EACCES;
6981                         } else {
6982                                 /* scalar += pointer
6983                                  * This is legal, but we have to reverse our
6984                                  * src/dest handling in computing the range
6985                                  */
6986                                 err = mark_chain_precision(env, insn->dst_reg);
6987                                 if (err)
6988                                         return err;
6989                                 return adjust_ptr_min_max_vals(env, insn,
6990                                                                src_reg, dst_reg);
6991                         }
6992                 } else if (ptr_reg) {
6993                         /* pointer += scalar */
6994                         err = mark_chain_precision(env, insn->src_reg);
6995                         if (err)
6996                                 return err;
6997                         return adjust_ptr_min_max_vals(env, insn,
6998                                                        dst_reg, src_reg);
6999                 }
7000         } else {
7001                 /* Pretend the src is a reg with a known value, since we only
7002                  * need to be able to read from this state.
7003                  */
7004                 off_reg.type = SCALAR_VALUE;
7005                 __mark_reg_known(&off_reg, insn->imm);
7006                 src_reg = &off_reg;
7007                 if (ptr_reg) /* pointer += K */
7008                         return adjust_ptr_min_max_vals(env, insn,
7009                                                        ptr_reg, src_reg);
7010         }
7011
7012         /* Got here implies adding two SCALAR_VALUEs */
7013         if (WARN_ON_ONCE(ptr_reg)) {
7014                 print_verifier_state(env, state);
7015                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
7016                 return -EINVAL;
7017         }
7018         if (WARN_ON(!src_reg)) {
7019                 print_verifier_state(env, state);
7020                 verbose(env, "verifier internal error: no src_reg\n");
7021                 return -EINVAL;
7022         }
7023         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
7024 }
7025
7026 /* check validity of 32-bit and 64-bit arithmetic operations */
7027 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
7028 {
7029         struct bpf_reg_state *regs = cur_regs(env);
7030         u8 opcode = BPF_OP(insn->code);
7031         int err;
7032
7033         if (opcode == BPF_END || opcode == BPF_NEG) {
7034                 if (opcode == BPF_NEG) {
7035                         if (BPF_SRC(insn->code) != 0 ||
7036                             insn->src_reg != BPF_REG_0 ||
7037                             insn->off != 0 || insn->imm != 0) {
7038                                 verbose(env, "BPF_NEG uses reserved fields\n");
7039                                 return -EINVAL;
7040                         }
7041                 } else {
7042                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
7043                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
7044                             BPF_CLASS(insn->code) == BPF_ALU64) {
7045                                 verbose(env, "BPF_END uses reserved fields\n");
7046                                 return -EINVAL;
7047                         }
7048                 }
7049
7050                 /* check src operand */
7051                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7052                 if (err)
7053                         return err;
7054
7055                 if (is_pointer_value(env, insn->dst_reg)) {
7056                         verbose(env, "R%d pointer arithmetic prohibited\n",
7057                                 insn->dst_reg);
7058                         return -EACCES;
7059                 }
7060
7061                 /* check dest operand */
7062                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
7063                 if (err)
7064                         return err;
7065
7066         } else if (opcode == BPF_MOV) {
7067
7068                 if (BPF_SRC(insn->code) == BPF_X) {
7069                         if (insn->imm != 0 || insn->off != 0) {
7070                                 verbose(env, "BPF_MOV uses reserved fields\n");
7071                                 return -EINVAL;
7072                         }
7073
7074                         /* check src operand */
7075                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7076                         if (err)
7077                                 return err;
7078                 } else {
7079                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7080                                 verbose(env, "BPF_MOV uses reserved fields\n");
7081                                 return -EINVAL;
7082                         }
7083                 }
7084
7085                 /* check dest operand, mark as required later */
7086                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7087                 if (err)
7088                         return err;
7089
7090                 if (BPF_SRC(insn->code) == BPF_X) {
7091                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
7092                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7093
7094                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
7095                                 /* case: R1 = R2
7096                                  * copy register state to dest reg
7097                                  */
7098                                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
7099                                         /* Assign src and dst registers the same ID
7100                                          * that will be used by find_equal_scalars()
7101                                          * to propagate min/max range.
7102                                          */
7103                                         src_reg->id = ++env->id_gen;
7104                                 *dst_reg = *src_reg;
7105                                 dst_reg->live |= REG_LIVE_WRITTEN;
7106                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
7107                         } else {
7108                                 /* R1 = (u32) R2 */
7109                                 if (is_pointer_value(env, insn->src_reg)) {
7110                                         verbose(env,
7111                                                 "R%d partial copy of pointer\n",
7112                                                 insn->src_reg);
7113                                         return -EACCES;
7114                                 } else if (src_reg->type == SCALAR_VALUE) {
7115                                         *dst_reg = *src_reg;
7116                                         /* Make sure ID is cleared otherwise
7117                                          * dst_reg min/max could be incorrectly
7118                                          * propagated into src_reg by find_equal_scalars()
7119                                          */
7120                                         dst_reg->id = 0;
7121                                         dst_reg->live |= REG_LIVE_WRITTEN;
7122                                         dst_reg->subreg_def = env->insn_idx + 1;
7123                                 } else {
7124                                         mark_reg_unknown(env, regs,
7125                                                          insn->dst_reg);
7126                                 }
7127                                 zext_32_to_64(dst_reg);
7128                         }
7129                 } else {
7130                         /* case: R = imm
7131                          * remember the value we stored into this reg
7132                          */
7133                         /* clear any state __mark_reg_known doesn't set */
7134                         mark_reg_unknown(env, regs, insn->dst_reg);
7135                         regs[insn->dst_reg].type = SCALAR_VALUE;
7136                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
7137                                 __mark_reg_known(regs + insn->dst_reg,
7138                                                  insn->imm);
7139                         } else {
7140                                 __mark_reg_known(regs + insn->dst_reg,
7141                                                  (u32)insn->imm);
7142                         }
7143                 }
7144
7145         } else if (opcode > BPF_END) {
7146                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7147                 return -EINVAL;
7148
7149         } else {        /* all other ALU ops: and, sub, xor, add, ... */
7150
7151                 if (BPF_SRC(insn->code) == BPF_X) {
7152                         if (insn->imm != 0 || insn->off != 0) {
7153                                 verbose(env, "BPF_ALU uses reserved fields\n");
7154                                 return -EINVAL;
7155                         }
7156                         /* check src1 operand */
7157                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7158                         if (err)
7159                                 return err;
7160                 } else {
7161                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7162                                 verbose(env, "BPF_ALU uses reserved fields\n");
7163                                 return -EINVAL;
7164                         }
7165                 }
7166
7167                 /* check src2 operand */
7168                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7169                 if (err)
7170                         return err;
7171
7172                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
7173                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7174                         verbose(env, "div by zero\n");
7175                         return -EINVAL;
7176                 }
7177
7178                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
7179                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7180                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
7181
7182                         if (insn->imm < 0 || insn->imm >= size) {
7183                                 verbose(env, "invalid shift %d\n", insn->imm);
7184                                 return -EINVAL;
7185                         }
7186                 }
7187
7188                 /* check dest operand */
7189                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7190                 if (err)
7191                         return err;
7192
7193                 return adjust_reg_min_max_vals(env, insn);
7194         }
7195
7196         return 0;
7197 }
7198
7199 static void __find_good_pkt_pointers(struct bpf_func_state *state,
7200                                      struct bpf_reg_state *dst_reg,
7201                                      enum bpf_reg_type type, u16 new_range)
7202 {
7203         struct bpf_reg_state *reg;
7204         int i;
7205
7206         for (i = 0; i < MAX_BPF_REG; i++) {
7207                 reg = &state->regs[i];
7208                 if (reg->type == type && reg->id == dst_reg->id)
7209                         /* keep the maximum range already checked */
7210                         reg->range = max(reg->range, new_range);
7211         }
7212
7213         bpf_for_each_spilled_reg(i, state, reg) {
7214                 if (!reg)
7215                         continue;
7216                 if (reg->type == type && reg->id == dst_reg->id)
7217                         reg->range = max(reg->range, new_range);
7218         }
7219 }
7220
7221 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
7222                                    struct bpf_reg_state *dst_reg,
7223                                    enum bpf_reg_type type,
7224                                    bool range_right_open)
7225 {
7226         u16 new_range;
7227         int i;
7228
7229         if (dst_reg->off < 0 ||
7230             (dst_reg->off == 0 && range_right_open))
7231                 /* This doesn't give us any range */
7232                 return;
7233
7234         if (dst_reg->umax_value > MAX_PACKET_OFF ||
7235             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
7236                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
7237                  * than pkt_end, but that's because it's also less than pkt.
7238                  */
7239                 return;
7240
7241         new_range = dst_reg->off;
7242         if (range_right_open)
7243                 new_range++;
7244
7245         /* Examples for register markings:
7246          *
7247          * pkt_data in dst register:
7248          *
7249          *   r2 = r3;
7250          *   r2 += 8;
7251          *   if (r2 > pkt_end) goto <handle exception>
7252          *   <access okay>
7253          *
7254          *   r2 = r3;
7255          *   r2 += 8;
7256          *   if (r2 < pkt_end) goto <access okay>
7257          *   <handle exception>
7258          *
7259          *   Where:
7260          *     r2 == dst_reg, pkt_end == src_reg
7261          *     r2=pkt(id=n,off=8,r=0)
7262          *     r3=pkt(id=n,off=0,r=0)
7263          *
7264          * pkt_data in src register:
7265          *
7266          *   r2 = r3;
7267          *   r2 += 8;
7268          *   if (pkt_end >= r2) goto <access okay>
7269          *   <handle exception>
7270          *
7271          *   r2 = r3;
7272          *   r2 += 8;
7273          *   if (pkt_end <= r2) goto <handle exception>
7274          *   <access okay>
7275          *
7276          *   Where:
7277          *     pkt_end == dst_reg, r2 == src_reg
7278          *     r2=pkt(id=n,off=8,r=0)
7279          *     r3=pkt(id=n,off=0,r=0)
7280          *
7281          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7282          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7283          * and [r3, r3 + 8-1) respectively is safe to access depending on
7284          * the check.
7285          */
7286
7287         /* If our ids match, then we must have the same max_value.  And we
7288          * don't care about the other reg's fixed offset, since if it's too big
7289          * the range won't allow anything.
7290          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7291          */
7292         for (i = 0; i <= vstate->curframe; i++)
7293                 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
7294                                          new_range);
7295 }
7296
7297 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7298 {
7299         struct tnum subreg = tnum_subreg(reg->var_off);
7300         s32 sval = (s32)val;
7301
7302         switch (opcode) {
7303         case BPF_JEQ:
7304                 if (tnum_is_const(subreg))
7305                         return !!tnum_equals_const(subreg, val);
7306                 break;
7307         case BPF_JNE:
7308                 if (tnum_is_const(subreg))
7309                         return !tnum_equals_const(subreg, val);
7310                 break;
7311         case BPF_JSET:
7312                 if ((~subreg.mask & subreg.value) & val)
7313                         return 1;
7314                 if (!((subreg.mask | subreg.value) & val))
7315                         return 0;
7316                 break;
7317         case BPF_JGT:
7318                 if (reg->u32_min_value > val)
7319                         return 1;
7320                 else if (reg->u32_max_value <= val)
7321                         return 0;
7322                 break;
7323         case BPF_JSGT:
7324                 if (reg->s32_min_value > sval)
7325                         return 1;
7326                 else if (reg->s32_max_value <= sval)
7327                         return 0;
7328                 break;
7329         case BPF_JLT:
7330                 if (reg->u32_max_value < val)
7331                         return 1;
7332                 else if (reg->u32_min_value >= val)
7333                         return 0;
7334                 break;
7335         case BPF_JSLT:
7336                 if (reg->s32_max_value < sval)
7337                         return 1;
7338                 else if (reg->s32_min_value >= sval)
7339                         return 0;
7340                 break;
7341         case BPF_JGE:
7342                 if (reg->u32_min_value >= val)
7343                         return 1;
7344                 else if (reg->u32_max_value < val)
7345                         return 0;
7346                 break;
7347         case BPF_JSGE:
7348                 if (reg->s32_min_value >= sval)
7349                         return 1;
7350                 else if (reg->s32_max_value < sval)
7351                         return 0;
7352                 break;
7353         case BPF_JLE:
7354                 if (reg->u32_max_value <= val)
7355                         return 1;
7356                 else if (reg->u32_min_value > val)
7357                         return 0;
7358                 break;
7359         case BPF_JSLE:
7360                 if (reg->s32_max_value <= sval)
7361                         return 1;
7362                 else if (reg->s32_min_value > sval)
7363                         return 0;
7364                 break;
7365         }
7366
7367         return -1;
7368 }
7369
7370
7371 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7372 {
7373         s64 sval = (s64)val;
7374
7375         switch (opcode) {
7376         case BPF_JEQ:
7377                 if (tnum_is_const(reg->var_off))
7378                         return !!tnum_equals_const(reg->var_off, val);
7379                 break;
7380         case BPF_JNE:
7381                 if (tnum_is_const(reg->var_off))
7382                         return !tnum_equals_const(reg->var_off, val);
7383                 break;
7384         case BPF_JSET:
7385                 if ((~reg->var_off.mask & reg->var_off.value) & val)
7386                         return 1;
7387                 if (!((reg->var_off.mask | reg->var_off.value) & val))
7388                         return 0;
7389                 break;
7390         case BPF_JGT:
7391                 if (reg->umin_value > val)
7392                         return 1;
7393                 else if (reg->umax_value <= val)
7394                         return 0;
7395                 break;
7396         case BPF_JSGT:
7397                 if (reg->smin_value > sval)
7398                         return 1;
7399                 else if (reg->smax_value <= sval)
7400                         return 0;
7401                 break;
7402         case BPF_JLT:
7403                 if (reg->umax_value < val)
7404                         return 1;
7405                 else if (reg->umin_value >= val)
7406                         return 0;
7407                 break;
7408         case BPF_JSLT:
7409                 if (reg->smax_value < sval)
7410                         return 1;
7411                 else if (reg->smin_value >= sval)
7412                         return 0;
7413                 break;
7414         case BPF_JGE:
7415                 if (reg->umin_value >= val)
7416                         return 1;
7417                 else if (reg->umax_value < val)
7418                         return 0;
7419                 break;
7420         case BPF_JSGE:
7421                 if (reg->smin_value >= sval)
7422                         return 1;
7423                 else if (reg->smax_value < sval)
7424                         return 0;
7425                 break;
7426         case BPF_JLE:
7427                 if (reg->umax_value <= val)
7428                         return 1;
7429                 else if (reg->umin_value > val)
7430                         return 0;
7431                 break;
7432         case BPF_JSLE:
7433                 if (reg->smax_value <= sval)
7434                         return 1;
7435                 else if (reg->smin_value > sval)
7436                         return 0;
7437                 break;
7438         }
7439
7440         return -1;
7441 }
7442
7443 /* compute branch direction of the expression "if (reg opcode val) goto target;"
7444  * and return:
7445  *  1 - branch will be taken and "goto target" will be executed
7446  *  0 - branch will not be taken and fall-through to next insn
7447  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7448  *      range [0,10]
7449  */
7450 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
7451                            bool is_jmp32)
7452 {
7453         if (__is_pointer_value(false, reg)) {
7454                 if (!reg_type_not_null(reg->type))
7455                         return -1;
7456
7457                 /* If pointer is valid tests against zero will fail so we can
7458                  * use this to direct branch taken.
7459                  */
7460                 if (val != 0)
7461                         return -1;
7462
7463                 switch (opcode) {
7464                 case BPF_JEQ:
7465                         return 0;
7466                 case BPF_JNE:
7467                         return 1;
7468                 default:
7469                         return -1;
7470                 }
7471         }
7472
7473         if (is_jmp32)
7474                 return is_branch32_taken(reg, val, opcode);
7475         return is_branch64_taken(reg, val, opcode);
7476 }
7477
7478 /* Adjusts the register min/max values in the case that the dst_reg is the
7479  * variable register that we are working on, and src_reg is a constant or we're
7480  * simply doing a BPF_K check.
7481  * In JEQ/JNE cases we also adjust the var_off values.
7482  */
7483 static void reg_set_min_max(struct bpf_reg_state *true_reg,
7484                             struct bpf_reg_state *false_reg,
7485                             u64 val, u32 val32,
7486                             u8 opcode, bool is_jmp32)
7487 {
7488         struct tnum false_32off = tnum_subreg(false_reg->var_off);
7489         struct tnum false_64off = false_reg->var_off;
7490         struct tnum true_32off = tnum_subreg(true_reg->var_off);
7491         struct tnum true_64off = true_reg->var_off;
7492         s64 sval = (s64)val;
7493         s32 sval32 = (s32)val32;
7494
7495         /* If the dst_reg is a pointer, we can't learn anything about its
7496          * variable offset from the compare (unless src_reg were a pointer into
7497          * the same object, but we don't bother with that.
7498          * Since false_reg and true_reg have the same type by construction, we
7499          * only need to check one of them for pointerness.
7500          */
7501         if (__is_pointer_value(false, false_reg))
7502                 return;
7503
7504         switch (opcode) {
7505         case BPF_JEQ:
7506         case BPF_JNE:
7507         {
7508                 struct bpf_reg_state *reg =
7509                         opcode == BPF_JEQ ? true_reg : false_reg;
7510
7511                 /* JEQ/JNE comparison doesn't change the register equivalence.
7512                  * r1 = r2;
7513                  * if (r1 == 42) goto label;
7514                  * ...
7515                  * label: // here both r1 and r2 are known to be 42.
7516                  *
7517                  * Hence when marking register as known preserve it's ID.
7518                  */
7519                 if (is_jmp32)
7520                         __mark_reg32_known(reg, val32);
7521                 else
7522                         ___mark_reg_known(reg, val);
7523                 break;
7524         }
7525         case BPF_JSET:
7526                 if (is_jmp32) {
7527                         false_32off = tnum_and(false_32off, tnum_const(~val32));
7528                         if (is_power_of_2(val32))
7529                                 true_32off = tnum_or(true_32off,
7530                                                      tnum_const(val32));
7531                 } else {
7532                         false_64off = tnum_and(false_64off, tnum_const(~val));
7533                         if (is_power_of_2(val))
7534                                 true_64off = tnum_or(true_64off,
7535                                                      tnum_const(val));
7536                 }
7537                 break;
7538         case BPF_JGE:
7539         case BPF_JGT:
7540         {
7541                 if (is_jmp32) {
7542                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
7543                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7544
7545                         false_reg->u32_max_value = min(false_reg->u32_max_value,
7546                                                        false_umax);
7547                         true_reg->u32_min_value = max(true_reg->u32_min_value,
7548                                                       true_umin);
7549                 } else {
7550                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
7551                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7552
7553                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
7554                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
7555                 }
7556                 break;
7557         }
7558         case BPF_JSGE:
7559         case BPF_JSGT:
7560         {
7561                 if (is_jmp32) {
7562                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
7563                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7564
7565                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7566                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7567                 } else {
7568                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
7569                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7570
7571                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
7572                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
7573                 }
7574                 break;
7575         }
7576         case BPF_JLE:
7577         case BPF_JLT:
7578         {
7579                 if (is_jmp32) {
7580                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
7581                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7582
7583                         false_reg->u32_min_value = max(false_reg->u32_min_value,
7584                                                        false_umin);
7585                         true_reg->u32_max_value = min(true_reg->u32_max_value,
7586                                                       true_umax);
7587                 } else {
7588                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
7589                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7590
7591                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
7592                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
7593                 }
7594                 break;
7595         }
7596         case BPF_JSLE:
7597         case BPF_JSLT:
7598         {
7599                 if (is_jmp32) {
7600                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
7601                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7602
7603                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7604                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7605                 } else {
7606                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
7607                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7608
7609                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
7610                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
7611                 }
7612                 break;
7613         }
7614         default:
7615                 return;
7616         }
7617
7618         if (is_jmp32) {
7619                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
7620                                              tnum_subreg(false_32off));
7621                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
7622                                             tnum_subreg(true_32off));
7623                 __reg_combine_32_into_64(false_reg);
7624                 __reg_combine_32_into_64(true_reg);
7625         } else {
7626                 false_reg->var_off = false_64off;
7627                 true_reg->var_off = true_64off;
7628                 __reg_combine_64_into_32(false_reg);
7629                 __reg_combine_64_into_32(true_reg);
7630         }
7631 }
7632
7633 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
7634  * the variable reg.
7635  */
7636 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
7637                                 struct bpf_reg_state *false_reg,
7638                                 u64 val, u32 val32,
7639                                 u8 opcode, bool is_jmp32)
7640 {
7641         /* How can we transform "a <op> b" into "b <op> a"? */
7642         static const u8 opcode_flip[16] = {
7643                 /* these stay the same */
7644                 [BPF_JEQ  >> 4] = BPF_JEQ,
7645                 [BPF_JNE  >> 4] = BPF_JNE,
7646                 [BPF_JSET >> 4] = BPF_JSET,
7647                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
7648                 [BPF_JGE  >> 4] = BPF_JLE,
7649                 [BPF_JGT  >> 4] = BPF_JLT,
7650                 [BPF_JLE  >> 4] = BPF_JGE,
7651                 [BPF_JLT  >> 4] = BPF_JGT,
7652                 [BPF_JSGE >> 4] = BPF_JSLE,
7653                 [BPF_JSGT >> 4] = BPF_JSLT,
7654                 [BPF_JSLE >> 4] = BPF_JSGE,
7655                 [BPF_JSLT >> 4] = BPF_JSGT
7656         };
7657         opcode = opcode_flip[opcode >> 4];
7658         /* This uses zero as "not present in table"; luckily the zero opcode,
7659          * BPF_JA, can't get here.
7660          */
7661         if (opcode)
7662                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7663 }
7664
7665 /* Regs are known to be equal, so intersect their min/max/var_off */
7666 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
7667                                   struct bpf_reg_state *dst_reg)
7668 {
7669         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
7670                                                         dst_reg->umin_value);
7671         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
7672                                                         dst_reg->umax_value);
7673         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
7674                                                         dst_reg->smin_value);
7675         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
7676                                                         dst_reg->smax_value);
7677         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
7678                                                              dst_reg->var_off);
7679         /* We might have learned new bounds from the var_off. */
7680         __update_reg_bounds(src_reg);
7681         __update_reg_bounds(dst_reg);
7682         /* We might have learned something about the sign bit. */
7683         __reg_deduce_bounds(src_reg);
7684         __reg_deduce_bounds(dst_reg);
7685         /* We might have learned some bits from the bounds. */
7686         __reg_bound_offset(src_reg);
7687         __reg_bound_offset(dst_reg);
7688         /* Intersecting with the old var_off might have improved our bounds
7689          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
7690          * then new var_off is (0; 0x7f...fc) which improves our umax.
7691          */
7692         __update_reg_bounds(src_reg);
7693         __update_reg_bounds(dst_reg);
7694 }
7695
7696 static void reg_combine_min_max(struct bpf_reg_state *true_src,
7697                                 struct bpf_reg_state *true_dst,
7698                                 struct bpf_reg_state *false_src,
7699                                 struct bpf_reg_state *false_dst,
7700                                 u8 opcode)
7701 {
7702         switch (opcode) {
7703         case BPF_JEQ:
7704                 __reg_combine_min_max(true_src, true_dst);
7705                 break;
7706         case BPF_JNE:
7707                 __reg_combine_min_max(false_src, false_dst);
7708                 break;
7709         }
7710 }
7711
7712 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
7713                                  struct bpf_reg_state *reg, u32 id,
7714                                  bool is_null)
7715 {
7716         if (reg_type_may_be_null(reg->type) && reg->id == id &&
7717             !WARN_ON_ONCE(!reg->id)) {
7718                 /* Old offset (both fixed and variable parts) should
7719                  * have been known-zero, because we don't allow pointer
7720                  * arithmetic on pointers that might be NULL.
7721                  */
7722                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7723                                  !tnum_equals_const(reg->var_off, 0) ||
7724                                  reg->off)) {
7725                         __mark_reg_known_zero(reg);
7726                         reg->off = 0;
7727                 }
7728                 if (is_null) {
7729                         reg->type = SCALAR_VALUE;
7730                 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
7731                         const struct bpf_map *map = reg->map_ptr;
7732
7733                         if (map->inner_map_meta) {
7734                                 reg->type = CONST_PTR_TO_MAP;
7735                                 reg->map_ptr = map->inner_map_meta;
7736                         } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7737                                 reg->type = PTR_TO_XDP_SOCK;
7738                         } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
7739                                    map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7740                                 reg->type = PTR_TO_SOCKET;
7741                         } else {
7742                                 reg->type = PTR_TO_MAP_VALUE;
7743                         }
7744                 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
7745                         reg->type = PTR_TO_SOCKET;
7746                 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
7747                         reg->type = PTR_TO_SOCK_COMMON;
7748                 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
7749                         reg->type = PTR_TO_TCP_SOCK;
7750                 } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
7751                         reg->type = PTR_TO_BTF_ID;
7752                 } else if (reg->type == PTR_TO_MEM_OR_NULL) {
7753                         reg->type = PTR_TO_MEM;
7754                 } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
7755                         reg->type = PTR_TO_RDONLY_BUF;
7756                 } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
7757                         reg->type = PTR_TO_RDWR_BUF;
7758                 }
7759                 if (is_null) {
7760                         /* We don't need id and ref_obj_id from this point
7761                          * onwards anymore, thus we should better reset it,
7762                          * so that state pruning has chances to take effect.
7763                          */
7764                         reg->id = 0;
7765                         reg->ref_obj_id = 0;
7766                 } else if (!reg_may_point_to_spin_lock(reg)) {
7767                         /* For not-NULL ptr, reg->ref_obj_id will be reset
7768                          * in release_reg_references().
7769                          *
7770                          * reg->id is still used by spin_lock ptr. Other
7771                          * than spin_lock ptr type, reg->id can be reset.
7772                          */
7773                         reg->id = 0;
7774                 }
7775         }
7776 }
7777
7778 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
7779                                     bool is_null)
7780 {
7781         struct bpf_reg_state *reg;
7782         int i;
7783
7784         for (i = 0; i < MAX_BPF_REG; i++)
7785                 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
7786
7787         bpf_for_each_spilled_reg(i, state, reg) {
7788                 if (!reg)
7789                         continue;
7790                 mark_ptr_or_null_reg(state, reg, id, is_null);
7791         }
7792 }
7793
7794 /* The logic is similar to find_good_pkt_pointers(), both could eventually
7795  * be folded together at some point.
7796  */
7797 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
7798                                   bool is_null)
7799 {
7800         struct bpf_func_state *state = vstate->frame[vstate->curframe];
7801         struct bpf_reg_state *regs = state->regs;
7802         u32 ref_obj_id = regs[regno].ref_obj_id;
7803         u32 id = regs[regno].id;
7804         int i;
7805
7806         if (ref_obj_id && ref_obj_id == id && is_null)
7807                 /* regs[regno] is in the " == NULL" branch.
7808                  * No one could have freed the reference state before
7809                  * doing the NULL check.
7810                  */
7811                 WARN_ON_ONCE(release_reference_state(state, id));
7812
7813         for (i = 0; i <= vstate->curframe; i++)
7814                 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7815 }
7816
7817 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
7818                                    struct bpf_reg_state *dst_reg,
7819                                    struct bpf_reg_state *src_reg,
7820                                    struct bpf_verifier_state *this_branch,
7821                                    struct bpf_verifier_state *other_branch)
7822 {
7823         if (BPF_SRC(insn->code) != BPF_X)
7824                 return false;
7825
7826         /* Pointers are always 64-bit. */
7827         if (BPF_CLASS(insn->code) == BPF_JMP32)
7828                 return false;
7829
7830         switch (BPF_OP(insn->code)) {
7831         case BPF_JGT:
7832                 if ((dst_reg->type == PTR_TO_PACKET &&
7833                      src_reg->type == PTR_TO_PACKET_END) ||
7834                     (dst_reg->type == PTR_TO_PACKET_META &&
7835                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7836                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7837                         find_good_pkt_pointers(this_branch, dst_reg,
7838                                                dst_reg->type, false);
7839                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7840                             src_reg->type == PTR_TO_PACKET) ||
7841                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7842                             src_reg->type == PTR_TO_PACKET_META)) {
7843                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
7844                         find_good_pkt_pointers(other_branch, src_reg,
7845                                                src_reg->type, true);
7846                 } else {
7847                         return false;
7848                 }
7849                 break;
7850         case BPF_JLT:
7851                 if ((dst_reg->type == PTR_TO_PACKET &&
7852                      src_reg->type == PTR_TO_PACKET_END) ||
7853                     (dst_reg->type == PTR_TO_PACKET_META &&
7854                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7855                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7856                         find_good_pkt_pointers(other_branch, dst_reg,
7857                                                dst_reg->type, true);
7858                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7859                             src_reg->type == PTR_TO_PACKET) ||
7860                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7861                             src_reg->type == PTR_TO_PACKET_META)) {
7862                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
7863                         find_good_pkt_pointers(this_branch, src_reg,
7864                                                src_reg->type, false);
7865                 } else {
7866                         return false;
7867                 }
7868                 break;
7869         case BPF_JGE:
7870                 if ((dst_reg->type == PTR_TO_PACKET &&
7871                      src_reg->type == PTR_TO_PACKET_END) ||
7872                     (dst_reg->type == PTR_TO_PACKET_META &&
7873                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7874                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7875                         find_good_pkt_pointers(this_branch, dst_reg,
7876                                                dst_reg->type, true);
7877                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7878                             src_reg->type == PTR_TO_PACKET) ||
7879                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7880                             src_reg->type == PTR_TO_PACKET_META)) {
7881                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7882                         find_good_pkt_pointers(other_branch, src_reg,
7883                                                src_reg->type, false);
7884                 } else {
7885                         return false;
7886                 }
7887                 break;
7888         case BPF_JLE:
7889                 if ((dst_reg->type == PTR_TO_PACKET &&
7890                      src_reg->type == PTR_TO_PACKET_END) ||
7891                     (dst_reg->type == PTR_TO_PACKET_META &&
7892                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7893                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7894                         find_good_pkt_pointers(other_branch, dst_reg,
7895                                                dst_reg->type, false);
7896                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7897                             src_reg->type == PTR_TO_PACKET) ||
7898                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7899                             src_reg->type == PTR_TO_PACKET_META)) {
7900                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7901                         find_good_pkt_pointers(this_branch, src_reg,
7902                                                src_reg->type, true);
7903                 } else {
7904                         return false;
7905                 }
7906                 break;
7907         default:
7908                 return false;
7909         }
7910
7911         return true;
7912 }
7913
7914 static void find_equal_scalars(struct bpf_verifier_state *vstate,
7915                                struct bpf_reg_state *known_reg)
7916 {
7917         struct bpf_func_state *state;
7918         struct bpf_reg_state *reg;
7919         int i, j;
7920
7921         for (i = 0; i <= vstate->curframe; i++) {
7922                 state = vstate->frame[i];
7923                 for (j = 0; j < MAX_BPF_REG; j++) {
7924                         reg = &state->regs[j];
7925                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7926                                 *reg = *known_reg;
7927                 }
7928
7929                 bpf_for_each_spilled_reg(j, state, reg) {
7930                         if (!reg)
7931                                 continue;
7932                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7933                                 *reg = *known_reg;
7934                 }
7935         }
7936 }
7937
7938 static int check_cond_jmp_op(struct bpf_verifier_env *env,
7939                              struct bpf_insn *insn, int *insn_idx)
7940 {
7941         struct bpf_verifier_state *this_branch = env->cur_state;
7942         struct bpf_verifier_state *other_branch;
7943         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7944         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7945         u8 opcode = BPF_OP(insn->code);
7946         bool is_jmp32;
7947         int pred = -1;
7948         int err;
7949
7950         /* Only conditional jumps are expected to reach here. */
7951         if (opcode == BPF_JA || opcode > BPF_JSLE) {
7952                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7953                 return -EINVAL;
7954         }
7955
7956         if (BPF_SRC(insn->code) == BPF_X) {
7957                 if (insn->imm != 0) {
7958                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7959                         return -EINVAL;
7960                 }
7961
7962                 /* check src1 operand */
7963                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7964                 if (err)
7965                         return err;
7966
7967                 if (is_pointer_value(env, insn->src_reg)) {
7968                         verbose(env, "R%d pointer comparison prohibited\n",
7969                                 insn->src_reg);
7970                         return -EACCES;
7971                 }
7972                 src_reg = &regs[insn->src_reg];
7973         } else {
7974                 if (insn->src_reg != BPF_REG_0) {
7975                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7976                         return -EINVAL;
7977                 }
7978         }
7979
7980         /* check src2 operand */
7981         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7982         if (err)
7983                 return err;
7984
7985         dst_reg = &regs[insn->dst_reg];
7986         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7987
7988         if (BPF_SRC(insn->code) == BPF_K) {
7989                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7990         } else if (src_reg->type == SCALAR_VALUE &&
7991                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7992                 pred = is_branch_taken(dst_reg,
7993                                        tnum_subreg(src_reg->var_off).value,
7994                                        opcode,
7995                                        is_jmp32);
7996         } else if (src_reg->type == SCALAR_VALUE &&
7997                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7998                 pred = is_branch_taken(dst_reg,
7999                                        src_reg->var_off.value,
8000                                        opcode,
8001                                        is_jmp32);
8002         }
8003
8004         if (pred >= 0) {
8005                 /* If we get here with a dst_reg pointer type it is because
8006                  * above is_branch_taken() special cased the 0 comparison.
8007                  */
8008                 if (!__is_pointer_value(false, dst_reg))
8009                         err = mark_chain_precision(env, insn->dst_reg);
8010                 if (BPF_SRC(insn->code) == BPF_X && !err)
8011                         err = mark_chain_precision(env, insn->src_reg);
8012                 if (err)
8013                         return err;
8014         }
8015
8016         if (pred == 1) {
8017                 /* Only follow the goto, ignore fall-through. If needed, push
8018                  * the fall-through branch for simulation under speculative
8019                  * execution.
8020                  */
8021                 if (!env->bypass_spec_v1 &&
8022                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
8023                                                *insn_idx))
8024                         return -EFAULT;
8025                 *insn_idx += insn->off;
8026                 return 0;
8027         } else if (pred == 0) {
8028                 /* Only follow the fall-through branch, since that's where the
8029                  * program will go. If needed, push the goto branch for
8030                  * simulation under speculative execution.
8031                  */
8032                 if (!env->bypass_spec_v1 &&
8033                     !sanitize_speculative_path(env, insn,
8034                                                *insn_idx + insn->off + 1,
8035                                                *insn_idx))
8036                         return -EFAULT;
8037                 return 0;
8038         }
8039
8040         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
8041                                   false);
8042         if (!other_branch)
8043                 return -EFAULT;
8044         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
8045
8046         /* detect if we are comparing against a constant value so we can adjust
8047          * our min/max values for our dst register.
8048          * this is only legit if both are scalars (or pointers to the same
8049          * object, I suppose, but we don't support that right now), because
8050          * otherwise the different base pointers mean the offsets aren't
8051          * comparable.
8052          */
8053         if (BPF_SRC(insn->code) == BPF_X) {
8054                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
8055
8056                 if (dst_reg->type == SCALAR_VALUE &&
8057                     src_reg->type == SCALAR_VALUE) {
8058                         if (tnum_is_const(src_reg->var_off) ||
8059                             (is_jmp32 &&
8060                              tnum_is_const(tnum_subreg(src_reg->var_off))))
8061                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
8062                                                 dst_reg,
8063                                                 src_reg->var_off.value,
8064                                                 tnum_subreg(src_reg->var_off).value,
8065                                                 opcode, is_jmp32);
8066                         else if (tnum_is_const(dst_reg->var_off) ||
8067                                  (is_jmp32 &&
8068                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
8069                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
8070                                                     src_reg,
8071                                                     dst_reg->var_off.value,
8072                                                     tnum_subreg(dst_reg->var_off).value,
8073                                                     opcode, is_jmp32);
8074                         else if (!is_jmp32 &&
8075                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
8076                                 /* Comparing for equality, we can combine knowledge */
8077                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
8078                                                     &other_branch_regs[insn->dst_reg],
8079                                                     src_reg, dst_reg, opcode);
8080                         if (src_reg->id &&
8081                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
8082                                 find_equal_scalars(this_branch, src_reg);
8083                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
8084                         }
8085
8086                 }
8087         } else if (dst_reg->type == SCALAR_VALUE) {
8088                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
8089                                         dst_reg, insn->imm, (u32)insn->imm,
8090                                         opcode, is_jmp32);
8091         }
8092
8093         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
8094             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
8095                 find_equal_scalars(this_branch, dst_reg);
8096                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
8097         }
8098
8099         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
8100          * NOTE: these optimizations below are related with pointer comparison
8101          *       which will never be JMP32.
8102          */
8103         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
8104             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
8105             reg_type_may_be_null(dst_reg->type)) {
8106                 /* Mark all identical registers in each branch as either
8107                  * safe or unknown depending R == 0 or R != 0 conditional.
8108                  */
8109                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
8110                                       opcode == BPF_JNE);
8111                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
8112                                       opcode == BPF_JEQ);
8113         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
8114                                            this_branch, other_branch) &&
8115                    is_pointer_value(env, insn->dst_reg)) {
8116                 verbose(env, "R%d pointer comparison prohibited\n",
8117                         insn->dst_reg);
8118                 return -EACCES;
8119         }
8120         if (env->log.level & BPF_LOG_LEVEL)
8121                 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
8122         return 0;
8123 }
8124
8125 /* verify BPF_LD_IMM64 instruction */
8126 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
8127 {
8128         struct bpf_insn_aux_data *aux = cur_aux(env);
8129         struct bpf_reg_state *regs = cur_regs(env);
8130         struct bpf_reg_state *dst_reg;
8131         struct bpf_map *map;
8132         int err;
8133
8134         if (BPF_SIZE(insn->code) != BPF_DW) {
8135                 verbose(env, "invalid BPF_LD_IMM insn\n");
8136                 return -EINVAL;
8137         }
8138         if (insn->off != 0) {
8139                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
8140                 return -EINVAL;
8141         }
8142
8143         err = check_reg_arg(env, insn->dst_reg, DST_OP);
8144         if (err)
8145                 return err;
8146
8147         dst_reg = &regs[insn->dst_reg];
8148         if (insn->src_reg == 0) {
8149                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
8150
8151                 dst_reg->type = SCALAR_VALUE;
8152                 __mark_reg_known(&regs[insn->dst_reg], imm);
8153                 return 0;
8154         }
8155
8156         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8157                 mark_reg_known_zero(env, regs, insn->dst_reg);
8158
8159                 dst_reg->type = aux->btf_var.reg_type;
8160                 switch (dst_reg->type) {
8161                 case PTR_TO_MEM:
8162                         dst_reg->mem_size = aux->btf_var.mem_size;
8163                         break;
8164                 case PTR_TO_BTF_ID:
8165                 case PTR_TO_PERCPU_BTF_ID:
8166                         dst_reg->btf_id = aux->btf_var.btf_id;
8167                         break;
8168                 default:
8169                         verbose(env, "bpf verifier is misconfigured\n");
8170                         return -EFAULT;
8171                 }
8172                 return 0;
8173         }
8174
8175         map = env->used_maps[aux->map_index];
8176         mark_reg_known_zero(env, regs, insn->dst_reg);
8177         dst_reg->map_ptr = map;
8178
8179         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8180                 dst_reg->type = PTR_TO_MAP_VALUE;
8181                 dst_reg->off = aux->map_off;
8182                 if (map_value_has_spin_lock(map))
8183                         dst_reg->id = ++env->id_gen;
8184         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8185                 dst_reg->type = CONST_PTR_TO_MAP;
8186         } else {
8187                 verbose(env, "bpf verifier is misconfigured\n");
8188                 return -EINVAL;
8189         }
8190
8191         return 0;
8192 }
8193
8194 static bool may_access_skb(enum bpf_prog_type type)
8195 {
8196         switch (type) {
8197         case BPF_PROG_TYPE_SOCKET_FILTER:
8198         case BPF_PROG_TYPE_SCHED_CLS:
8199         case BPF_PROG_TYPE_SCHED_ACT:
8200                 return true;
8201         default:
8202                 return false;
8203         }
8204 }
8205
8206 /* verify safety of LD_ABS|LD_IND instructions:
8207  * - they can only appear in the programs where ctx == skb
8208  * - since they are wrappers of function calls, they scratch R1-R5 registers,
8209  *   preserve R6-R9, and store return value into R0
8210  *
8211  * Implicit input:
8212  *   ctx == skb == R6 == CTX
8213  *
8214  * Explicit input:
8215  *   SRC == any register
8216  *   IMM == 32-bit immediate
8217  *
8218  * Output:
8219  *   R0 - 8/16/32-bit skb data converted to cpu endianness
8220  */
8221 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8222 {
8223         struct bpf_reg_state *regs = cur_regs(env);
8224         static const int ctx_reg = BPF_REG_6;
8225         u8 mode = BPF_MODE(insn->code);
8226         int i, err;
8227
8228         if (!may_access_skb(resolve_prog_type(env->prog))) {
8229                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8230                 return -EINVAL;
8231         }
8232
8233         if (!env->ops->gen_ld_abs) {
8234                 verbose(env, "bpf verifier is misconfigured\n");
8235                 return -EINVAL;
8236         }
8237
8238         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
8239             BPF_SIZE(insn->code) == BPF_DW ||
8240             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8241                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8242                 return -EINVAL;
8243         }
8244
8245         /* check whether implicit source operand (register R6) is readable */
8246         err = check_reg_arg(env, ctx_reg, SRC_OP);
8247         if (err)
8248                 return err;
8249
8250         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8251          * gen_ld_abs() may terminate the program at runtime, leading to
8252          * reference leak.
8253          */
8254         err = check_reference_leak(env);
8255         if (err) {
8256                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8257                 return err;
8258         }
8259
8260         if (env->cur_state->active_spin_lock) {
8261                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8262                 return -EINVAL;
8263         }
8264
8265         if (regs[ctx_reg].type != PTR_TO_CTX) {
8266                 verbose(env,
8267                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8268                 return -EINVAL;
8269         }
8270
8271         if (mode == BPF_IND) {
8272                 /* check explicit source operand */
8273                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
8274                 if (err)
8275                         return err;
8276         }
8277
8278         err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
8279         if (err < 0)
8280                 return err;
8281
8282         /* reset caller saved regs to unreadable */
8283         for (i = 0; i < CALLER_SAVED_REGS; i++) {
8284                 mark_reg_not_init(env, regs, caller_saved[i]);
8285                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8286         }
8287
8288         /* mark destination R0 register as readable, since it contains
8289          * the value fetched from the packet.
8290          * Already marked as written above.
8291          */
8292         mark_reg_unknown(env, regs, BPF_REG_0);
8293         /* ld_abs load up to 32-bit skb data. */
8294         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8295         return 0;
8296 }
8297
8298 static int check_return_code(struct bpf_verifier_env *env)
8299 {
8300         struct tnum enforce_attach_type_range = tnum_unknown;
8301         const struct bpf_prog *prog = env->prog;
8302         struct bpf_reg_state *reg;
8303         struct tnum range = tnum_range(0, 1);
8304         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8305         int err;
8306         const bool is_subprog = env->cur_state->frame[0]->subprogno;
8307
8308         /* LSM and struct_ops func-ptr's return type could be "void" */
8309         if (!is_subprog &&
8310             (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
8311              prog_type == BPF_PROG_TYPE_LSM) &&
8312             !prog->aux->attach_func_proto->type)
8313                 return 0;
8314
8315         /* eBPF calling convetion is such that R0 is used
8316          * to return the value from eBPF program.
8317          * Make sure that it's readable at this time
8318          * of bpf_exit, which means that program wrote
8319          * something into it earlier
8320          */
8321         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8322         if (err)
8323                 return err;
8324
8325         if (is_pointer_value(env, BPF_REG_0)) {
8326                 verbose(env, "R0 leaks addr as return value\n");
8327                 return -EACCES;
8328         }
8329
8330         reg = cur_regs(env) + BPF_REG_0;
8331         if (is_subprog) {
8332                 if (reg->type != SCALAR_VALUE) {
8333                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8334                                 reg_type_str[reg->type]);
8335                         return -EINVAL;
8336                 }
8337                 return 0;
8338         }
8339
8340         switch (prog_type) {
8341         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8342                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8343                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8344                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8345                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8346                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8347                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
8348                         range = tnum_range(1, 1);
8349                 break;
8350         case BPF_PROG_TYPE_CGROUP_SKB:
8351                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8352                         range = tnum_range(0, 3);
8353                         enforce_attach_type_range = tnum_range(2, 3);
8354                 }
8355                 break;
8356         case BPF_PROG_TYPE_CGROUP_SOCK:
8357         case BPF_PROG_TYPE_SOCK_OPS:
8358         case BPF_PROG_TYPE_CGROUP_DEVICE:
8359         case BPF_PROG_TYPE_CGROUP_SYSCTL:
8360         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8361                 break;
8362         case BPF_PROG_TYPE_RAW_TRACEPOINT:
8363                 if (!env->prog->aux->attach_btf_id)
8364                         return 0;
8365                 range = tnum_const(0);
8366                 break;
8367         case BPF_PROG_TYPE_TRACING:
8368                 switch (env->prog->expected_attach_type) {
8369                 case BPF_TRACE_FENTRY:
8370                 case BPF_TRACE_FEXIT:
8371                         range = tnum_const(0);
8372                         break;
8373                 case BPF_TRACE_RAW_TP:
8374                 case BPF_MODIFY_RETURN:
8375                         return 0;
8376                 case BPF_TRACE_ITER:
8377                         break;
8378                 default:
8379                         return -ENOTSUPP;
8380                 }
8381                 break;
8382         case BPF_PROG_TYPE_SK_LOOKUP:
8383                 range = tnum_range(SK_DROP, SK_PASS);
8384                 break;
8385         case BPF_PROG_TYPE_EXT:
8386                 /* freplace program can return anything as its return value
8387                  * depends on the to-be-replaced kernel func or bpf program.
8388                  */
8389         default:
8390                 return 0;
8391         }
8392
8393         if (reg->type != SCALAR_VALUE) {
8394                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
8395                         reg_type_str[reg->type]);
8396                 return -EINVAL;
8397         }
8398
8399         if (!tnum_in(range, reg->var_off)) {
8400                 char tn_buf[48];
8401
8402                 verbose(env, "At program exit the register R0 ");
8403                 if (!tnum_is_unknown(reg->var_off)) {
8404                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8405                         verbose(env, "has value %s", tn_buf);
8406                 } else {
8407                         verbose(env, "has unknown scalar value");
8408                 }
8409                 tnum_strn(tn_buf, sizeof(tn_buf), range);
8410                 verbose(env, " should have been in %s\n", tn_buf);
8411                 return -EINVAL;
8412         }
8413
8414         if (!tnum_is_unknown(enforce_attach_type_range) &&
8415             tnum_in(enforce_attach_type_range, reg->var_off))
8416                 env->prog->enforce_expected_attach_type = 1;
8417         return 0;
8418 }
8419
8420 /* non-recursive DFS pseudo code
8421  * 1  procedure DFS-iterative(G,v):
8422  * 2      label v as discovered
8423  * 3      let S be a stack
8424  * 4      S.push(v)
8425  * 5      while S is not empty
8426  * 6            t <- S.pop()
8427  * 7            if t is what we're looking for:
8428  * 8                return t
8429  * 9            for all edges e in G.adjacentEdges(t) do
8430  * 10               if edge e is already labelled
8431  * 11                   continue with the next edge
8432  * 12               w <- G.adjacentVertex(t,e)
8433  * 13               if vertex w is not discovered and not explored
8434  * 14                   label e as tree-edge
8435  * 15                   label w as discovered
8436  * 16                   S.push(w)
8437  * 17                   continue at 5
8438  * 18               else if vertex w is discovered
8439  * 19                   label e as back-edge
8440  * 20               else
8441  * 21                   // vertex w is explored
8442  * 22                   label e as forward- or cross-edge
8443  * 23           label t as explored
8444  * 24           S.pop()
8445  *
8446  * convention:
8447  * 0x10 - discovered
8448  * 0x11 - discovered and fall-through edge labelled
8449  * 0x12 - discovered and fall-through and branch edges labelled
8450  * 0x20 - explored
8451  */
8452
8453 enum {
8454         DISCOVERED = 0x10,
8455         EXPLORED = 0x20,
8456         FALLTHROUGH = 1,
8457         BRANCH = 2,
8458 };
8459
8460 static u32 state_htab_size(struct bpf_verifier_env *env)
8461 {
8462         return env->prog->len;
8463 }
8464
8465 static struct bpf_verifier_state_list **explored_state(
8466                                         struct bpf_verifier_env *env,
8467                                         int idx)
8468 {
8469         struct bpf_verifier_state *cur = env->cur_state;
8470         struct bpf_func_state *state = cur->frame[cur->curframe];
8471
8472         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8473 }
8474
8475 static void init_explored_state(struct bpf_verifier_env *env, int idx)
8476 {
8477         env->insn_aux_data[idx].prune_point = true;
8478 }
8479
8480 /* t, w, e - match pseudo-code above:
8481  * t - index of current instruction
8482  * w - next instruction
8483  * e - edge
8484  */
8485 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
8486                      bool loop_ok)
8487 {
8488         int *insn_stack = env->cfg.insn_stack;
8489         int *insn_state = env->cfg.insn_state;
8490
8491         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
8492                 return 0;
8493
8494         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
8495                 return 0;
8496
8497         if (w < 0 || w >= env->prog->len) {
8498                 verbose_linfo(env, t, "%d: ", t);
8499                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
8500                 return -EINVAL;
8501         }
8502
8503         if (e == BRANCH)
8504                 /* mark branch target for state pruning */
8505                 init_explored_state(env, w);
8506
8507         if (insn_state[w] == 0) {
8508                 /* tree-edge */
8509                 insn_state[t] = DISCOVERED | e;
8510                 insn_state[w] = DISCOVERED;
8511                 if (env->cfg.cur_stack >= env->prog->len)
8512                         return -E2BIG;
8513                 insn_stack[env->cfg.cur_stack++] = w;
8514                 return 1;
8515         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8516                 if (loop_ok && env->bpf_capable)
8517                         return 0;
8518                 verbose_linfo(env, t, "%d: ", t);
8519                 verbose_linfo(env, w, "%d: ", w);
8520                 verbose(env, "back-edge from insn %d to %d\n", t, w);
8521                 return -EINVAL;
8522         } else if (insn_state[w] == EXPLORED) {
8523                 /* forward- or cross-edge */
8524                 insn_state[t] = DISCOVERED | e;
8525         } else {
8526                 verbose(env, "insn state internal bug\n");
8527                 return -EFAULT;
8528         }
8529         return 0;
8530 }
8531
8532 /* non-recursive depth-first-search to detect loops in BPF program
8533  * loop == back-edge in directed graph
8534  */
8535 static int check_cfg(struct bpf_verifier_env *env)
8536 {
8537         struct bpf_insn *insns = env->prog->insnsi;
8538         int insn_cnt = env->prog->len;
8539         int *insn_stack, *insn_state;
8540         int ret = 0;
8541         int i, t;
8542
8543         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8544         if (!insn_state)
8545                 return -ENOMEM;
8546
8547         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8548         if (!insn_stack) {
8549                 kvfree(insn_state);
8550                 return -ENOMEM;
8551         }
8552
8553         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8554         insn_stack[0] = 0; /* 0 is the first instruction */
8555         env->cfg.cur_stack = 1;
8556
8557 peek_stack:
8558         if (env->cfg.cur_stack == 0)
8559                 goto check_state;
8560         t = insn_stack[env->cfg.cur_stack - 1];
8561
8562         if (BPF_CLASS(insns[t].code) == BPF_JMP ||
8563             BPF_CLASS(insns[t].code) == BPF_JMP32) {
8564                 u8 opcode = BPF_OP(insns[t].code);
8565
8566                 if (opcode == BPF_EXIT) {
8567                         goto mark_explored;
8568                 } else if (opcode == BPF_CALL) {
8569                         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8570                         if (ret == 1)
8571                                 goto peek_stack;
8572                         else if (ret < 0)
8573                                 goto err_free;
8574                         if (t + 1 < insn_cnt)
8575                                 init_explored_state(env, t + 1);
8576                         if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8577                                 init_explored_state(env, t);
8578                                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
8579                                                 env, false);
8580                                 if (ret == 1)
8581                                         goto peek_stack;
8582                                 else if (ret < 0)
8583                                         goto err_free;
8584                         }
8585                 } else if (opcode == BPF_JA) {
8586                         if (BPF_SRC(insns[t].code) != BPF_K) {
8587                                 ret = -EINVAL;
8588                                 goto err_free;
8589                         }
8590                         /* unconditional jump with single edge */
8591                         ret = push_insn(t, t + insns[t].off + 1,
8592                                         FALLTHROUGH, env, true);
8593                         if (ret == 1)
8594                                 goto peek_stack;
8595                         else if (ret < 0)
8596                                 goto err_free;
8597                         /* unconditional jmp is not a good pruning point,
8598                          * but it's marked, since backtracking needs
8599                          * to record jmp history in is_state_visited().
8600                          */
8601                         init_explored_state(env, t + insns[t].off + 1);
8602                         /* tell verifier to check for equivalent states
8603                          * after every call and jump
8604                          */
8605                         if (t + 1 < insn_cnt)
8606                                 init_explored_state(env, t + 1);
8607                 } else {
8608                         /* conditional jump with two edges */
8609                         init_explored_state(env, t);
8610                         ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8611                         if (ret == 1)
8612                                 goto peek_stack;
8613                         else if (ret < 0)
8614                                 goto err_free;
8615
8616                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8617                         if (ret == 1)
8618                                 goto peek_stack;
8619                         else if (ret < 0)
8620                                 goto err_free;
8621                 }
8622         } else {
8623                 /* all other non-branch instructions with single
8624                  * fall-through edge
8625                  */
8626                 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8627                 if (ret == 1)
8628                         goto peek_stack;
8629                 else if (ret < 0)
8630                         goto err_free;
8631         }
8632
8633 mark_explored:
8634         insn_state[t] = EXPLORED;
8635         if (env->cfg.cur_stack-- <= 0) {
8636                 verbose(env, "pop stack internal bug\n");
8637                 ret = -EFAULT;
8638                 goto err_free;
8639         }
8640         goto peek_stack;
8641
8642 check_state:
8643         for (i = 0; i < insn_cnt; i++) {
8644                 if (insn_state[i] != EXPLORED) {
8645                         verbose(env, "unreachable insn %d\n", i);
8646                         ret = -EINVAL;
8647                         goto err_free;
8648                 }
8649         }
8650         ret = 0; /* cfg looks good */
8651
8652 err_free:
8653         kvfree(insn_state);
8654         kvfree(insn_stack);
8655         env->cfg.insn_state = env->cfg.insn_stack = NULL;
8656         return ret;
8657 }
8658
8659 static int check_abnormal_return(struct bpf_verifier_env *env)
8660 {
8661         int i;
8662
8663         for (i = 1; i < env->subprog_cnt; i++) {
8664                 if (env->subprog_info[i].has_ld_abs) {
8665                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8666                         return -EINVAL;
8667                 }
8668                 if (env->subprog_info[i].has_tail_call) {
8669                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8670                         return -EINVAL;
8671                 }
8672         }
8673         return 0;
8674 }
8675
8676 /* The minimum supported BTF func info size */
8677 #define MIN_BPF_FUNCINFO_SIZE   8
8678 #define MAX_FUNCINFO_REC_SIZE   252
8679
8680 static int check_btf_func(struct bpf_verifier_env *env,
8681                           const union bpf_attr *attr,
8682                           union bpf_attr __user *uattr)
8683 {
8684         const struct btf_type *type, *func_proto, *ret_type;
8685         u32 i, nfuncs, urec_size, min_size;
8686         u32 krec_size = sizeof(struct bpf_func_info);
8687         struct bpf_func_info *krecord;
8688         struct bpf_func_info_aux *info_aux = NULL;
8689         struct bpf_prog *prog;
8690         const struct btf *btf;
8691         void __user *urecord;
8692         u32 prev_offset = 0;
8693         bool scalar_return;
8694         int ret = -ENOMEM;
8695
8696         nfuncs = attr->func_info_cnt;
8697         if (!nfuncs) {
8698                 if (check_abnormal_return(env))
8699                         return -EINVAL;
8700                 return 0;
8701         }
8702
8703         if (nfuncs != env->subprog_cnt) {
8704                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8705                 return -EINVAL;
8706         }
8707
8708         urec_size = attr->func_info_rec_size;
8709         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
8710             urec_size > MAX_FUNCINFO_REC_SIZE ||
8711             urec_size % sizeof(u32)) {
8712                 verbose(env, "invalid func info rec size %u\n", urec_size);
8713                 return -EINVAL;
8714         }
8715
8716         prog = env->prog;
8717         btf = prog->aux->btf;
8718
8719         urecord = u64_to_user_ptr(attr->func_info);
8720         min_size = min_t(u32, krec_size, urec_size);
8721
8722         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8723         if (!krecord)
8724                 return -ENOMEM;
8725         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8726         if (!info_aux)
8727                 goto err_free;
8728
8729         for (i = 0; i < nfuncs; i++) {
8730                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8731                 if (ret) {
8732                         if (ret == -E2BIG) {
8733                                 verbose(env, "nonzero tailing record in func info");
8734                                 /* set the size kernel expects so loader can zero
8735                                  * out the rest of the record.
8736                                  */
8737                                 if (put_user(min_size, &uattr->func_info_rec_size))
8738                                         ret = -EFAULT;
8739                         }
8740                         goto err_free;
8741                 }
8742
8743                 if (copy_from_user(&krecord[i], urecord, min_size)) {
8744                         ret = -EFAULT;
8745                         goto err_free;
8746                 }
8747
8748                 /* check insn_off */
8749                 ret = -EINVAL;
8750                 if (i == 0) {
8751                         if (krecord[i].insn_off) {
8752                                 verbose(env,
8753                                         "nonzero insn_off %u for the first func info record",
8754                                         krecord[i].insn_off);
8755                                 goto err_free;
8756                         }
8757                 } else if (krecord[i].insn_off <= prev_offset) {
8758                         verbose(env,
8759                                 "same or smaller insn offset (%u) than previous func info record (%u)",
8760                                 krecord[i].insn_off, prev_offset);
8761                         goto err_free;
8762                 }
8763
8764                 if (env->subprog_info[i].start != krecord[i].insn_off) {
8765                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8766                         goto err_free;
8767                 }
8768
8769                 /* check type_id */
8770                 type = btf_type_by_id(btf, krecord[i].type_id);
8771                 if (!type || !btf_type_is_func(type)) {
8772                         verbose(env, "invalid type id %d in func info",
8773                                 krecord[i].type_id);
8774                         goto err_free;
8775                 }
8776                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8777
8778                 func_proto = btf_type_by_id(btf, type->type);
8779                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
8780                         /* btf_func_check() already verified it during BTF load */
8781                         goto err_free;
8782                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8783                 scalar_return =
8784                         btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8785                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8786                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8787                         goto err_free;
8788                 }
8789                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8790                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8791                         goto err_free;
8792                 }
8793
8794                 prev_offset = krecord[i].insn_off;
8795                 urecord += urec_size;
8796         }
8797
8798         prog->aux->func_info = krecord;
8799         prog->aux->func_info_cnt = nfuncs;
8800         prog->aux->func_info_aux = info_aux;
8801         return 0;
8802
8803 err_free:
8804         kvfree(krecord);
8805         kfree(info_aux);
8806         return ret;
8807 }
8808
8809 static void adjust_btf_func(struct bpf_verifier_env *env)
8810 {
8811         struct bpf_prog_aux *aux = env->prog->aux;
8812         int i;
8813
8814         if (!aux->func_info)
8815                 return;
8816
8817         for (i = 0; i < env->subprog_cnt; i++)
8818                 aux->func_info[i].insn_off = env->subprog_info[i].start;
8819 }
8820
8821 #define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
8822                 sizeof(((struct bpf_line_info *)(0))->line_col))
8823 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
8824
8825 static int check_btf_line(struct bpf_verifier_env *env,
8826                           const union bpf_attr *attr,
8827                           union bpf_attr __user *uattr)
8828 {
8829         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8830         struct bpf_subprog_info *sub;
8831         struct bpf_line_info *linfo;
8832         struct bpf_prog *prog;
8833         const struct btf *btf;
8834         void __user *ulinfo;
8835         int err;
8836
8837         nr_linfo = attr->line_info_cnt;
8838         if (!nr_linfo)
8839                 return 0;
8840         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
8841                 return -EINVAL;
8842
8843         rec_size = attr->line_info_rec_size;
8844         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
8845             rec_size > MAX_LINEINFO_REC_SIZE ||
8846             rec_size & (sizeof(u32) - 1))
8847                 return -EINVAL;
8848
8849         /* Need to zero it in case the userspace may
8850          * pass in a smaller bpf_line_info object.
8851          */
8852         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
8853                          GFP_KERNEL | __GFP_NOWARN);
8854         if (!linfo)
8855                 return -ENOMEM;
8856
8857         prog = env->prog;
8858         btf = prog->aux->btf;
8859
8860         s = 0;
8861         sub = env->subprog_info;
8862         ulinfo = u64_to_user_ptr(attr->line_info);
8863         expected_size = sizeof(struct bpf_line_info);
8864         ncopy = min_t(u32, expected_size, rec_size);
8865         for (i = 0; i < nr_linfo; i++) {
8866                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8867                 if (err) {
8868                         if (err == -E2BIG) {
8869                                 verbose(env, "nonzero tailing record in line_info");
8870                                 if (put_user(expected_size,
8871                                              &uattr->line_info_rec_size))
8872                                         err = -EFAULT;
8873                         }
8874                         goto err_free;
8875                 }
8876
8877                 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8878                         err = -EFAULT;
8879                         goto err_free;
8880                 }
8881
8882                 /*
8883                  * Check insn_off to ensure
8884                  * 1) strictly increasing AND
8885                  * 2) bounded by prog->len
8886                  *
8887                  * The linfo[0].insn_off == 0 check logically falls into
8888                  * the later "missing bpf_line_info for func..." case
8889                  * because the first linfo[0].insn_off must be the
8890                  * first sub also and the first sub must have
8891                  * subprog_info[0].start == 0.
8892                  */
8893                 if ((i && linfo[i].insn_off <= prev_offset) ||
8894                     linfo[i].insn_off >= prog->len) {
8895                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
8896                                 i, linfo[i].insn_off, prev_offset,
8897                                 prog->len);
8898                         err = -EINVAL;
8899                         goto err_free;
8900                 }
8901
8902                 if (!prog->insnsi[linfo[i].insn_off].code) {
8903                         verbose(env,
8904                                 "Invalid insn code at line_info[%u].insn_off\n",
8905                                 i);
8906                         err = -EINVAL;
8907                         goto err_free;
8908                 }
8909
8910                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
8911                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8912                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8913                         err = -EINVAL;
8914                         goto err_free;
8915                 }
8916
8917                 if (s != env->subprog_cnt) {
8918                         if (linfo[i].insn_off == sub[s].start) {
8919                                 sub[s].linfo_idx = i;
8920                                 s++;
8921                         } else if (sub[s].start < linfo[i].insn_off) {
8922                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
8923                                 err = -EINVAL;
8924                                 goto err_free;
8925                         }
8926                 }
8927
8928                 prev_offset = linfo[i].insn_off;
8929                 ulinfo += rec_size;
8930         }
8931
8932         if (s != env->subprog_cnt) {
8933                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
8934                         env->subprog_cnt - s, s);
8935                 err = -EINVAL;
8936                 goto err_free;
8937         }
8938
8939         prog->aux->linfo = linfo;
8940         prog->aux->nr_linfo = nr_linfo;
8941
8942         return 0;
8943
8944 err_free:
8945         kvfree(linfo);
8946         return err;
8947 }
8948
8949 static int check_btf_info(struct bpf_verifier_env *env,
8950                           const union bpf_attr *attr,
8951                           union bpf_attr __user *uattr)
8952 {
8953         struct btf *btf;
8954         int err;
8955
8956         if (!attr->func_info_cnt && !attr->line_info_cnt) {
8957                 if (check_abnormal_return(env))
8958                         return -EINVAL;
8959                 return 0;
8960         }
8961
8962         btf = btf_get_by_fd(attr->prog_btf_fd);
8963         if (IS_ERR(btf))
8964                 return PTR_ERR(btf);
8965         env->prog->aux->btf = btf;
8966
8967         err = check_btf_func(env, attr, uattr);
8968         if (err)
8969                 return err;
8970
8971         err = check_btf_line(env, attr, uattr);
8972         if (err)
8973                 return err;
8974
8975         return 0;
8976 }
8977
8978 /* check %cur's range satisfies %old's */
8979 static bool range_within(struct bpf_reg_state *old,
8980                          struct bpf_reg_state *cur)
8981 {
8982         return old->umin_value <= cur->umin_value &&
8983                old->umax_value >= cur->umax_value &&
8984                old->smin_value <= cur->smin_value &&
8985                old->smax_value >= cur->smax_value &&
8986                old->u32_min_value <= cur->u32_min_value &&
8987                old->u32_max_value >= cur->u32_max_value &&
8988                old->s32_min_value <= cur->s32_min_value &&
8989                old->s32_max_value >= cur->s32_max_value;
8990 }
8991
8992 /* If in the old state two registers had the same id, then they need to have
8993  * the same id in the new state as well.  But that id could be different from
8994  * the old state, so we need to track the mapping from old to new ids.
8995  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8996  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
8997  * regs with a different old id could still have new id 9, we don't care about
8998  * that.
8999  * So we look through our idmap to see if this old id has been seen before.  If
9000  * so, we require the new id to match; otherwise, we add the id pair to the map.
9001  */
9002 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
9003 {
9004         unsigned int i;
9005
9006         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
9007                 if (!idmap[i].old) {
9008                         /* Reached an empty slot; haven't seen this id before */
9009                         idmap[i].old = old_id;
9010                         idmap[i].cur = cur_id;
9011                         return true;
9012                 }
9013                 if (idmap[i].old == old_id)
9014                         return idmap[i].cur == cur_id;
9015         }
9016         /* We ran out of idmap slots, which should be impossible */
9017         WARN_ON_ONCE(1);
9018         return false;
9019 }
9020
9021 static void clean_func_state(struct bpf_verifier_env *env,
9022                              struct bpf_func_state *st)
9023 {
9024         enum bpf_reg_liveness live;
9025         int i, j;
9026
9027         for (i = 0; i < BPF_REG_FP; i++) {
9028                 live = st->regs[i].live;
9029                 /* liveness must not touch this register anymore */
9030                 st->regs[i].live |= REG_LIVE_DONE;
9031                 if (!(live & REG_LIVE_READ))
9032                         /* since the register is unused, clear its state
9033                          * to make further comparison simpler
9034                          */
9035                         __mark_reg_not_init(env, &st->regs[i]);
9036         }
9037
9038         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
9039                 live = st->stack[i].spilled_ptr.live;
9040                 /* liveness must not touch this stack slot anymore */
9041                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
9042                 if (!(live & REG_LIVE_READ)) {
9043                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9044                         for (j = 0; j < BPF_REG_SIZE; j++)
9045                                 st->stack[i].slot_type[j] = STACK_INVALID;
9046                 }
9047         }
9048 }
9049
9050 static void clean_verifier_state(struct bpf_verifier_env *env,
9051                                  struct bpf_verifier_state *st)
9052 {
9053         int i;
9054
9055         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
9056                 /* all regs in this state in all frames were already marked */
9057                 return;
9058
9059         for (i = 0; i <= st->curframe; i++)
9060                 clean_func_state(env, st->frame[i]);
9061 }
9062
9063 /* the parentage chains form a tree.
9064  * the verifier states are added to state lists at given insn and
9065  * pushed into state stack for future exploration.
9066  * when the verifier reaches bpf_exit insn some of the verifer states
9067  * stored in the state lists have their final liveness state already,
9068  * but a lot of states will get revised from liveness point of view when
9069  * the verifier explores other branches.
9070  * Example:
9071  * 1: r0 = 1
9072  * 2: if r1 == 100 goto pc+1
9073  * 3: r0 = 2
9074  * 4: exit
9075  * when the verifier reaches exit insn the register r0 in the state list of
9076  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
9077  * of insn 2 and goes exploring further. At the insn 4 it will walk the
9078  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
9079  *
9080  * Since the verifier pushes the branch states as it sees them while exploring
9081  * the program the condition of walking the branch instruction for the second
9082  * time means that all states below this branch were already explored and
9083  * their final liveness markes are already propagated.
9084  * Hence when the verifier completes the search of state list in is_state_visited()
9085  * we can call this clean_live_states() function to mark all liveness states
9086  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
9087  * will not be used.
9088  * This function also clears the registers and stack for states that !READ
9089  * to simplify state merging.
9090  *
9091  * Important note here that walking the same branch instruction in the callee
9092  * doesn't meant that the states are DONE. The verifier has to compare
9093  * the callsites
9094  */
9095 static void clean_live_states(struct bpf_verifier_env *env, int insn,
9096                               struct bpf_verifier_state *cur)
9097 {
9098         struct bpf_verifier_state_list *sl;
9099         int i;
9100
9101         sl = *explored_state(env, insn);
9102         while (sl) {
9103                 if (sl->state.branches)
9104                         goto next;
9105                 if (sl->state.insn_idx != insn ||
9106                     sl->state.curframe != cur->curframe)
9107                         goto next;
9108                 for (i = 0; i <= cur->curframe; i++)
9109                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
9110                                 goto next;
9111                 clean_verifier_state(env, &sl->state);
9112 next:
9113                 sl = sl->next;
9114         }
9115 }
9116
9117 /* Returns true if (rold safe implies rcur safe) */
9118 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
9119                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
9120 {
9121         bool equal;
9122
9123         if (!(rold->live & REG_LIVE_READ))
9124                 /* explored state didn't use this */
9125                 return true;
9126
9127         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
9128
9129         if (rold->type == PTR_TO_STACK)
9130                 /* two stack pointers are equal only if they're pointing to
9131                  * the same stack frame, since fp-8 in foo != fp-8 in bar
9132                  */
9133                 return equal && rold->frameno == rcur->frameno;
9134
9135         if (equal)
9136                 return true;
9137
9138         if (rold->type == NOT_INIT)
9139                 /* explored state can't have used this */
9140                 return true;
9141         if (rcur->type == NOT_INIT)
9142                 return false;
9143         switch (rold->type) {
9144         case SCALAR_VALUE:
9145                 if (env->explore_alu_limits)
9146                         return false;
9147                 if (rcur->type == SCALAR_VALUE) {
9148                         if (!rold->precise && !rcur->precise)
9149                                 return true;
9150                         /* new val must satisfy old val knowledge */
9151                         return range_within(rold, rcur) &&
9152                                tnum_in(rold->var_off, rcur->var_off);
9153                 } else {
9154                         /* We're trying to use a pointer in place of a scalar.
9155                          * Even if the scalar was unbounded, this could lead to
9156                          * pointer leaks because scalars are allowed to leak
9157                          * while pointers are not. We could make this safe in
9158                          * special cases if root is calling us, but it's
9159                          * probably not worth the hassle.
9160                          */
9161                         return false;
9162                 }
9163         case PTR_TO_MAP_VALUE:
9164                 /* If the new min/max/var_off satisfy the old ones and
9165                  * everything else matches, we are OK.
9166                  * 'id' is not compared, since it's only used for maps with
9167                  * bpf_spin_lock inside map element and in such cases if
9168                  * the rest of the prog is valid for one map element then
9169                  * it's valid for all map elements regardless of the key
9170                  * used in bpf_map_lookup()
9171                  */
9172                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
9173                        range_within(rold, rcur) &&
9174                        tnum_in(rold->var_off, rcur->var_off);
9175         case PTR_TO_MAP_VALUE_OR_NULL:
9176                 /* a PTR_TO_MAP_VALUE could be safe to use as a
9177                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9178                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9179                  * checked, doing so could have affected others with the same
9180                  * id, and we can't check for that because we lost the id when
9181                  * we converted to a PTR_TO_MAP_VALUE.
9182                  */
9183                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
9184                         return false;
9185                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
9186                         return false;
9187                 /* Check our ids match any regs they're supposed to */
9188                 return check_ids(rold->id, rcur->id, idmap);
9189         case PTR_TO_PACKET_META:
9190         case PTR_TO_PACKET:
9191                 if (rcur->type != rold->type)
9192                         return false;
9193                 /* We must have at least as much range as the old ptr
9194                  * did, so that any accesses which were safe before are
9195                  * still safe.  This is true even if old range < old off,
9196                  * since someone could have accessed through (ptr - k), or
9197                  * even done ptr -= k in a register, to get a safe access.
9198                  */
9199                 if (rold->range > rcur->range)
9200                         return false;
9201                 /* If the offsets don't match, we can't trust our alignment;
9202                  * nor can we be sure that we won't fall out of range.
9203                  */
9204                 if (rold->off != rcur->off)
9205                         return false;
9206                 /* id relations must be preserved */
9207                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
9208                         return false;
9209                 /* new val must satisfy old val knowledge */
9210                 return range_within(rold, rcur) &&
9211                        tnum_in(rold->var_off, rcur->var_off);
9212         case PTR_TO_CTX:
9213         case CONST_PTR_TO_MAP:
9214         case PTR_TO_PACKET_END:
9215         case PTR_TO_FLOW_KEYS:
9216         case PTR_TO_SOCKET:
9217         case PTR_TO_SOCKET_OR_NULL:
9218         case PTR_TO_SOCK_COMMON:
9219         case PTR_TO_SOCK_COMMON_OR_NULL:
9220         case PTR_TO_TCP_SOCK:
9221         case PTR_TO_TCP_SOCK_OR_NULL:
9222         case PTR_TO_XDP_SOCK:
9223                 /* Only valid matches are exact, which memcmp() above
9224                  * would have accepted
9225                  */
9226         default:
9227                 /* Don't know what's going on, just say it's not safe */
9228                 return false;
9229         }
9230
9231         /* Shouldn't get here; if we do, say it's not safe */
9232         WARN_ON_ONCE(1);
9233         return false;
9234 }
9235
9236 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
9237                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
9238 {
9239         int i, spi;
9240
9241         /* walk slots of the explored stack and ignore any additional
9242          * slots in the current stack, since explored(safe) state
9243          * didn't use them
9244          */
9245         for (i = 0; i < old->allocated_stack; i++) {
9246                 spi = i / BPF_REG_SIZE;
9247
9248                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9249                         i += BPF_REG_SIZE - 1;
9250                         /* explored state didn't use this */
9251                         continue;
9252                 }
9253
9254                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
9255                         continue;
9256
9257                 /* explored stack has more populated slots than current stack
9258                  * and these slots were used
9259                  */
9260                 if (i >= cur->allocated_stack)
9261                         return false;
9262
9263                 /* if old state was safe with misc data in the stack
9264                  * it will be safe with zero-initialized stack.
9265                  * The opposite is not true
9266                  */
9267                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9268                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
9269                         continue;
9270                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
9271                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
9272                         /* Ex: old explored (safe) state has STACK_SPILL in
9273                          * this stack slot, but current has STACK_MISC ->
9274                          * this verifier states are not equivalent,
9275                          * return false to continue verification of this path
9276                          */
9277                         return false;
9278                 if (i % BPF_REG_SIZE)
9279                         continue;
9280                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
9281                         continue;
9282                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
9283                              &cur->stack[spi].spilled_ptr, idmap))
9284                         /* when explored and current stack slot are both storing
9285                          * spilled registers, check that stored pointers types
9286                          * are the same as well.
9287                          * Ex: explored safe path could have stored
9288                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9289                          * but current path has stored:
9290                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9291                          * such verifier states are not equivalent.
9292                          * return false to continue verification of this path
9293                          */
9294                         return false;
9295         }
9296         return true;
9297 }
9298
9299 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9300 {
9301         if (old->acquired_refs != cur->acquired_refs)
9302                 return false;
9303         return !memcmp(old->refs, cur->refs,
9304                        sizeof(*old->refs) * old->acquired_refs);
9305 }
9306
9307 /* compare two verifier states
9308  *
9309  * all states stored in state_list are known to be valid, since
9310  * verifier reached 'bpf_exit' instruction through them
9311  *
9312  * this function is called when verifier exploring different branches of
9313  * execution popped from the state stack. If it sees an old state that has
9314  * more strict register state and more strict stack state then this execution
9315  * branch doesn't need to be explored further, since verifier already
9316  * concluded that more strict state leads to valid finish.
9317  *
9318  * Therefore two states are equivalent if register state is more conservative
9319  * and explored stack state is more conservative than the current one.
9320  * Example:
9321  *       explored                   current
9322  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9323  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9324  *
9325  * In other words if current stack state (one being explored) has more
9326  * valid slots than old one that already passed validation, it means
9327  * the verifier can stop exploring and conclude that current state is valid too
9328  *
9329  * Similarly with registers. If explored state has register type as invalid
9330  * whereas register type in current state is meaningful, it means that
9331  * the current state will reach 'bpf_exit' instruction safely
9332  */
9333 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
9334                               struct bpf_func_state *cur)
9335 {
9336         int i;
9337
9338         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9339         for (i = 0; i < MAX_BPF_REG; i++)
9340                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
9341                              env->idmap_scratch))
9342                         return false;
9343
9344         if (!stacksafe(env, old, cur, env->idmap_scratch))
9345                 return false;
9346
9347         if (!refsafe(old, cur))
9348                 return false;
9349
9350         return true;
9351 }
9352
9353 static bool states_equal(struct bpf_verifier_env *env,
9354                          struct bpf_verifier_state *old,
9355                          struct bpf_verifier_state *cur)
9356 {
9357         int i;
9358
9359         if (old->curframe != cur->curframe)
9360                 return false;
9361
9362         /* Verification state from speculative execution simulation
9363          * must never prune a non-speculative execution one.
9364          */
9365         if (old->speculative && !cur->speculative)
9366                 return false;
9367
9368         if (old->active_spin_lock != cur->active_spin_lock)
9369                 return false;
9370
9371         /* for states to be equal callsites have to be the same
9372          * and all frame states need to be equivalent
9373          */
9374         for (i = 0; i <= old->curframe; i++) {
9375                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
9376                         return false;
9377                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
9378                         return false;
9379         }
9380         return true;
9381 }
9382
9383 /* Return 0 if no propagation happened. Return negative error code if error
9384  * happened. Otherwise, return the propagated bit.
9385  */
9386 static int propagate_liveness_reg(struct bpf_verifier_env *env,
9387                                   struct bpf_reg_state *reg,
9388                                   struct bpf_reg_state *parent_reg)
9389 {
9390         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9391         u8 flag = reg->live & REG_LIVE_READ;
9392         int err;
9393
9394         /* When comes here, read flags of PARENT_REG or REG could be any of
9395          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9396          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9397          */
9398         if (parent_flag == REG_LIVE_READ64 ||
9399             /* Or if there is no read flag from REG. */
9400             !flag ||
9401             /* Or if the read flag from REG is the same as PARENT_REG. */
9402             parent_flag == flag)
9403                 return 0;
9404
9405         err = mark_reg_read(env, reg, parent_reg, flag);
9406         if (err)
9407                 return err;
9408
9409         return flag;
9410 }
9411
9412 /* A write screens off any subsequent reads; but write marks come from the
9413  * straight-line code between a state and its parent.  When we arrive at an
9414  * equivalent state (jump target or such) we didn't arrive by the straight-line
9415  * code, so read marks in the state must propagate to the parent regardless
9416  * of the state's write marks. That's what 'parent == state->parent' comparison
9417  * in mark_reg_read() is for.
9418  */
9419 static int propagate_liveness(struct bpf_verifier_env *env,
9420                               const struct bpf_verifier_state *vstate,
9421                               struct bpf_verifier_state *vparent)
9422 {
9423         struct bpf_reg_state *state_reg, *parent_reg;
9424         struct bpf_func_state *state, *parent;
9425         int i, frame, err = 0;
9426
9427         if (vparent->curframe != vstate->curframe) {
9428                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
9429                      vparent->curframe, vstate->curframe);
9430                 return -EFAULT;
9431         }
9432         /* Propagate read liveness of registers... */
9433         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9434         for (frame = 0; frame <= vstate->curframe; frame++) {
9435                 parent = vparent->frame[frame];
9436                 state = vstate->frame[frame];
9437                 parent_reg = parent->regs;
9438                 state_reg = state->regs;
9439                 /* We don't need to worry about FP liveness, it's read-only */
9440                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9441                         err = propagate_liveness_reg(env, &state_reg[i],
9442                                                      &parent_reg[i]);
9443                         if (err < 0)
9444                                 return err;
9445                         if (err == REG_LIVE_READ64)
9446                                 mark_insn_zext(env, &parent_reg[i]);
9447                 }
9448
9449                 /* Propagate stack slots. */
9450                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
9451                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9452                         parent_reg = &parent->stack[i].spilled_ptr;
9453                         state_reg = &state->stack[i].spilled_ptr;
9454                         err = propagate_liveness_reg(env, state_reg,
9455                                                      parent_reg);
9456                         if (err < 0)
9457                                 return err;
9458                 }
9459         }
9460         return 0;
9461 }
9462
9463 /* find precise scalars in the previous equivalent state and
9464  * propagate them into the current state
9465  */
9466 static int propagate_precision(struct bpf_verifier_env *env,
9467                                const struct bpf_verifier_state *old)
9468 {
9469         struct bpf_reg_state *state_reg;
9470         struct bpf_func_state *state;
9471         int i, err = 0;
9472
9473         state = old->frame[old->curframe];
9474         state_reg = state->regs;
9475         for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9476                 if (state_reg->type != SCALAR_VALUE ||
9477                     !state_reg->precise)
9478                         continue;
9479                 if (env->log.level & BPF_LOG_LEVEL2)
9480                         verbose(env, "propagating r%d\n", i);
9481                 err = mark_chain_precision(env, i);
9482                 if (err < 0)
9483                         return err;
9484         }
9485
9486         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9487                 if (state->stack[i].slot_type[0] != STACK_SPILL)
9488                         continue;
9489                 state_reg = &state->stack[i].spilled_ptr;
9490                 if (state_reg->type != SCALAR_VALUE ||
9491                     !state_reg->precise)
9492                         continue;
9493                 if (env->log.level & BPF_LOG_LEVEL2)
9494                         verbose(env, "propagating fp%d\n",
9495                                 (-i - 1) * BPF_REG_SIZE);
9496                 err = mark_chain_precision_stack(env, i);
9497                 if (err < 0)
9498                         return err;
9499         }
9500         return 0;
9501 }
9502
9503 static bool states_maybe_looping(struct bpf_verifier_state *old,
9504                                  struct bpf_verifier_state *cur)
9505 {
9506         struct bpf_func_state *fold, *fcur;
9507         int i, fr = cur->curframe;
9508
9509         if (old->curframe != fr)
9510                 return false;
9511
9512         fold = old->frame[fr];
9513         fcur = cur->frame[fr];
9514         for (i = 0; i < MAX_BPF_REG; i++)
9515                 if (memcmp(&fold->regs[i], &fcur->regs[i],
9516                            offsetof(struct bpf_reg_state, parent)))
9517                         return false;
9518         return true;
9519 }
9520
9521
9522 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9523 {
9524         struct bpf_verifier_state_list *new_sl;
9525         struct bpf_verifier_state_list *sl, **pprev;
9526         struct bpf_verifier_state *cur = env->cur_state, *new;
9527         int i, j, err, states_cnt = 0;
9528         bool add_new_state = env->test_state_freq ? true : false;
9529
9530         cur->last_insn_idx = env->prev_insn_idx;
9531         if (!env->insn_aux_data[insn_idx].prune_point)
9532                 /* this 'insn_idx' instruction wasn't marked, so we will not
9533                  * be doing state search here
9534                  */
9535                 return 0;
9536
9537         /* bpf progs typically have pruning point every 4 instructions
9538          * http://vger.kernel.org/bpfconf2019.html#session-1
9539          * Do not add new state for future pruning if the verifier hasn't seen
9540          * at least 2 jumps and at least 8 instructions.
9541          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9542          * In tests that amounts to up to 50% reduction into total verifier
9543          * memory consumption and 20% verifier time speedup.
9544          */
9545         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
9546             env->insn_processed - env->prev_insn_processed >= 8)
9547                 add_new_state = true;
9548
9549         pprev = explored_state(env, insn_idx);
9550         sl = *pprev;
9551
9552         clean_live_states(env, insn_idx, cur);
9553
9554         while (sl) {
9555                 states_cnt++;
9556                 if (sl->state.insn_idx != insn_idx)
9557                         goto next;
9558                 if (sl->state.branches) {
9559                         if (states_maybe_looping(&sl->state, cur) &&
9560                             states_equal(env, &sl->state, cur)) {
9561                                 verbose_linfo(env, insn_idx, "; ");
9562                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9563                                 return -EINVAL;
9564                         }
9565                         /* if the verifier is processing a loop, avoid adding new state
9566                          * too often, since different loop iterations have distinct
9567                          * states and may not help future pruning.
9568                          * This threshold shouldn't be too low to make sure that
9569                          * a loop with large bound will be rejected quickly.
9570                          * The most abusive loop will be:
9571                          * r1 += 1
9572                          * if r1 < 1000000 goto pc-2
9573                          * 1M insn_procssed limit / 100 == 10k peak states.
9574                          * This threshold shouldn't be too high either, since states
9575                          * at the end of the loop are likely to be useful in pruning.
9576                          */
9577                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9578                             env->insn_processed - env->prev_insn_processed < 100)
9579                                 add_new_state = false;
9580                         goto miss;
9581                 }
9582                 if (states_equal(env, &sl->state, cur)) {
9583                         sl->hit_cnt++;
9584                         /* reached equivalent register/stack state,
9585                          * prune the search.
9586                          * Registers read by the continuation are read by us.
9587                          * If we have any write marks in env->cur_state, they
9588                          * will prevent corresponding reads in the continuation
9589                          * from reaching our parent (an explored_state).  Our
9590                          * own state will get the read marks recorded, but
9591                          * they'll be immediately forgotten as we're pruning
9592                          * this state and will pop a new one.
9593                          */
9594                         err = propagate_liveness(env, &sl->state, cur);
9595
9596                         /* if previous state reached the exit with precision and
9597                          * current state is equivalent to it (except precsion marks)
9598                          * the precision needs to be propagated back in
9599                          * the current state.
9600                          */
9601                         err = err ? : push_jmp_history(env, cur);
9602                         err = err ? : propagate_precision(env, &sl->state);
9603                         if (err)
9604                                 return err;
9605                         return 1;
9606                 }
9607 miss:
9608                 /* when new state is not going to be added do not increase miss count.
9609                  * Otherwise several loop iterations will remove the state
9610                  * recorded earlier. The goal of these heuristics is to have
9611                  * states from some iterations of the loop (some in the beginning
9612                  * and some at the end) to help pruning.
9613                  */
9614                 if (add_new_state)
9615                         sl->miss_cnt++;
9616                 /* heuristic to determine whether this state is beneficial
9617                  * to keep checking from state equivalence point of view.
9618                  * Higher numbers increase max_states_per_insn and verification time,
9619                  * but do not meaningfully decrease insn_processed.
9620                  */
9621                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9622                         /* the state is unlikely to be useful. Remove it to
9623                          * speed up verification
9624                          */
9625                         *pprev = sl->next;
9626                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9627                                 u32 br = sl->state.branches;
9628
9629                                 WARN_ONCE(br,
9630                                           "BUG live_done but branches_to_explore %d\n",
9631                                           br);
9632                                 free_verifier_state(&sl->state, false);
9633                                 kfree(sl);
9634                                 env->peak_states--;
9635                         } else {
9636                                 /* cannot free this state, since parentage chain may
9637                                  * walk it later. Add it for free_list instead to
9638                                  * be freed at the end of verification
9639                                  */
9640                                 sl->next = env->free_list;
9641                                 env->free_list = sl;
9642                         }
9643                         sl = *pprev;
9644                         continue;
9645                 }
9646 next:
9647                 pprev = &sl->next;
9648                 sl = *pprev;
9649         }
9650
9651         if (env->max_states_per_insn < states_cnt)
9652                 env->max_states_per_insn = states_cnt;
9653
9654         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
9655                 return push_jmp_history(env, cur);
9656
9657         if (!add_new_state)
9658                 return push_jmp_history(env, cur);
9659
9660         /* There were no equivalent states, remember the current one.
9661          * Technically the current state is not proven to be safe yet,
9662          * but it will either reach outer most bpf_exit (which means it's safe)
9663          * or it will be rejected. When there are no loops the verifier won't be
9664          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9665          * again on the way to bpf_exit.
9666          * When looping the sl->state.branches will be > 0 and this state
9667          * will not be considered for equivalence until branches == 0.
9668          */
9669         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9670         if (!new_sl)
9671                 return -ENOMEM;
9672         env->total_states++;
9673         env->peak_states++;
9674         env->prev_jmps_processed = env->jmps_processed;
9675         env->prev_insn_processed = env->insn_processed;
9676
9677         /* add new state to the head of linked list */
9678         new = &new_sl->state;
9679         err = copy_verifier_state(new, cur);
9680         if (err) {
9681                 free_verifier_state(new, false);
9682                 kfree(new_sl);
9683                 return err;
9684         }
9685         new->insn_idx = insn_idx;
9686         WARN_ONCE(new->branches != 1,
9687                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9688
9689         cur->parent = new;
9690         cur->first_insn_idx = insn_idx;
9691         clear_jmp_history(cur);
9692         new_sl->next = *explored_state(env, insn_idx);
9693         *explored_state(env, insn_idx) = new_sl;
9694         /* connect new state to parentage chain. Current frame needs all
9695          * registers connected. Only r6 - r9 of the callers are alive (pushed
9696          * to the stack implicitly by JITs) so in callers' frames connect just
9697          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9698          * the state of the call instruction (with WRITTEN set), and r0 comes
9699          * from callee with its full parentage chain, anyway.
9700          */
9701         /* clear write marks in current state: the writes we did are not writes
9702          * our child did, so they don't screen off its reads from us.
9703          * (There are no read marks in current state, because reads always mark
9704          * their parent and current state never has children yet.  Only
9705          * explored_states can get read marks.)
9706          */
9707         for (j = 0; j <= cur->curframe; j++) {
9708                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
9709                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9710                 for (i = 0; i < BPF_REG_FP; i++)
9711                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9712         }
9713
9714         /* all stack frames are accessible from callee, clear them all */
9715         for (j = 0; j <= cur->curframe; j++) {
9716                 struct bpf_func_state *frame = cur->frame[j];
9717                 struct bpf_func_state *newframe = new->frame[j];
9718
9719                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9720                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9721                         frame->stack[i].spilled_ptr.parent =
9722                                                 &newframe->stack[i].spilled_ptr;
9723                 }
9724         }
9725         return 0;
9726 }
9727
9728 /* Return true if it's OK to have the same insn return a different type. */
9729 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9730 {
9731         switch (type) {
9732         case PTR_TO_CTX:
9733         case PTR_TO_SOCKET:
9734         case PTR_TO_SOCKET_OR_NULL:
9735         case PTR_TO_SOCK_COMMON:
9736         case PTR_TO_SOCK_COMMON_OR_NULL:
9737         case PTR_TO_TCP_SOCK:
9738         case PTR_TO_TCP_SOCK_OR_NULL:
9739         case PTR_TO_XDP_SOCK:
9740         case PTR_TO_BTF_ID:
9741         case PTR_TO_BTF_ID_OR_NULL:
9742                 return false;
9743         default:
9744                 return true;
9745         }
9746 }
9747
9748 /* If an instruction was previously used with particular pointer types, then we
9749  * need to be careful to avoid cases such as the below, where it may be ok
9750  * for one branch accessing the pointer, but not ok for the other branch:
9751  *
9752  * R1 = sock_ptr
9753  * goto X;
9754  * ...
9755  * R1 = some_other_valid_ptr;
9756  * goto X;
9757  * ...
9758  * R2 = *(u32 *)(R1 + 0);
9759  */
9760 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9761 {
9762         return src != prev && (!reg_type_mismatch_ok(src) ||
9763                                !reg_type_mismatch_ok(prev));
9764 }
9765
9766 static int do_check(struct bpf_verifier_env *env)
9767 {
9768         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9769         struct bpf_verifier_state *state = env->cur_state;
9770         struct bpf_insn *insns = env->prog->insnsi;
9771         struct bpf_reg_state *regs;
9772         int insn_cnt = env->prog->len;
9773         bool do_print_state = false;
9774         int prev_insn_idx = -1;
9775
9776         for (;;) {
9777                 struct bpf_insn *insn;
9778                 u8 class;
9779                 int err;
9780
9781                 env->prev_insn_idx = prev_insn_idx;
9782                 if (env->insn_idx >= insn_cnt) {
9783                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
9784                                 env->insn_idx, insn_cnt);
9785                         return -EFAULT;
9786                 }
9787
9788                 insn = &insns[env->insn_idx];
9789                 class = BPF_CLASS(insn->code);
9790
9791                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9792                         verbose(env,
9793                                 "BPF program is too large. Processed %d insn\n",
9794                                 env->insn_processed);
9795                         return -E2BIG;
9796                 }
9797
9798                 err = is_state_visited(env, env->insn_idx);
9799                 if (err < 0)
9800                         return err;
9801                 if (err == 1) {
9802                         /* found equivalent state, can prune the search */
9803                         if (env->log.level & BPF_LOG_LEVEL) {
9804                                 if (do_print_state)
9805                                         verbose(env, "\nfrom %d to %d%s: safe\n",
9806                                                 env->prev_insn_idx, env->insn_idx,
9807                                                 env->cur_state->speculative ?
9808                                                 " (speculative execution)" : "");
9809                                 else
9810                                         verbose(env, "%d: safe\n", env->insn_idx);
9811                         }
9812                         goto process_bpf_exit;
9813                 }
9814
9815                 if (signal_pending(current))
9816                         return -EAGAIN;
9817
9818                 if (need_resched())
9819                         cond_resched();
9820
9821                 if (env->log.level & BPF_LOG_LEVEL2 ||
9822                     (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
9823                         if (env->log.level & BPF_LOG_LEVEL2)
9824                                 verbose(env, "%d:", env->insn_idx);
9825                         else
9826                                 verbose(env, "\nfrom %d to %d%s:",
9827                                         env->prev_insn_idx, env->insn_idx,
9828                                         env->cur_state->speculative ?
9829                                         " (speculative execution)" : "");
9830                         print_verifier_state(env, state->frame[state->curframe]);
9831                         do_print_state = false;
9832                 }
9833
9834                 if (env->log.level & BPF_LOG_LEVEL) {
9835                         const struct bpf_insn_cbs cbs = {
9836                                 .cb_print       = verbose,
9837                                 .private_data   = env,
9838                         };
9839
9840                         verbose_linfo(env, env->insn_idx, "; ");
9841                         verbose(env, "%d: ", env->insn_idx);
9842                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9843                 }
9844
9845                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
9846                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
9847                                                            env->prev_insn_idx);
9848                         if (err)
9849                                 return err;
9850                 }
9851
9852                 regs = cur_regs(env);
9853                 sanitize_mark_insn_seen(env);
9854                 prev_insn_idx = env->insn_idx;
9855
9856                 if (class == BPF_ALU || class == BPF_ALU64) {
9857                         err = check_alu_op(env, insn);
9858                         if (err)
9859                                 return err;
9860
9861                 } else if (class == BPF_LDX) {
9862                         enum bpf_reg_type *prev_src_type, src_reg_type;
9863
9864                         /* check for reserved fields is already done */
9865
9866                         /* check src operand */
9867                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
9868                         if (err)
9869                                 return err;
9870
9871                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9872                         if (err)
9873                                 return err;
9874
9875                         src_reg_type = regs[insn->src_reg].type;
9876
9877                         /* check that memory (src_reg + off) is readable,
9878                          * the state of dst_reg will be updated by this func
9879                          */
9880                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
9881                                                insn->off, BPF_SIZE(insn->code),
9882                                                BPF_READ, insn->dst_reg, false);
9883                         if (err)
9884                                 return err;
9885
9886                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9887
9888                         if (*prev_src_type == NOT_INIT) {
9889                                 /* saw a valid insn
9890                                  * dst_reg = *(u32 *)(src_reg + off)
9891                                  * save type to validate intersecting paths
9892                                  */
9893                                 *prev_src_type = src_reg_type;
9894
9895                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9896                                 /* ABuser program is trying to use the same insn
9897                                  * dst_reg = *(u32*) (src_reg + off)
9898                                  * with different pointer types:
9899                                  * src_reg == ctx in one branch and
9900                                  * src_reg == stack|map in some other branch.
9901                                  * Reject it.
9902                                  */
9903                                 verbose(env, "same insn cannot be used with different pointers\n");
9904                                 return -EINVAL;
9905                         }
9906
9907                 } else if (class == BPF_STX) {
9908                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
9909
9910                         if (BPF_MODE(insn->code) == BPF_XADD) {
9911                                 err = check_xadd(env, env->insn_idx, insn);
9912                                 if (err)
9913                                         return err;
9914                                 env->insn_idx++;
9915                                 continue;
9916                         }
9917
9918                         /* check src1 operand */
9919                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
9920                         if (err)
9921                                 return err;
9922                         /* check src2 operand */
9923                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9924                         if (err)
9925                                 return err;
9926
9927                         dst_reg_type = regs[insn->dst_reg].type;
9928
9929                         /* check that memory (dst_reg + off) is writeable */
9930                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9931                                                insn->off, BPF_SIZE(insn->code),
9932                                                BPF_WRITE, insn->src_reg, false);
9933                         if (err)
9934                                 return err;
9935
9936                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9937
9938                         if (*prev_dst_type == NOT_INIT) {
9939                                 *prev_dst_type = dst_reg_type;
9940                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9941                                 verbose(env, "same insn cannot be used with different pointers\n");
9942                                 return -EINVAL;
9943                         }
9944
9945                 } else if (class == BPF_ST) {
9946                         if (BPF_MODE(insn->code) != BPF_MEM ||
9947                             insn->src_reg != BPF_REG_0) {
9948                                 verbose(env, "BPF_ST uses reserved fields\n");
9949                                 return -EINVAL;
9950                         }
9951                         /* check src operand */
9952                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9953                         if (err)
9954                                 return err;
9955
9956                         if (is_ctx_reg(env, insn->dst_reg)) {
9957                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
9958                                         insn->dst_reg,
9959                                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
9960                                 return -EACCES;
9961                         }
9962
9963                         /* check that memory (dst_reg + off) is writeable */
9964                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9965                                                insn->off, BPF_SIZE(insn->code),
9966                                                BPF_WRITE, -1, false);
9967                         if (err)
9968                                 return err;
9969
9970                 } else if (class == BPF_JMP || class == BPF_JMP32) {
9971                         u8 opcode = BPF_OP(insn->code);
9972
9973                         env->jmps_processed++;
9974                         if (opcode == BPF_CALL) {
9975                                 if (BPF_SRC(insn->code) != BPF_K ||
9976                                     insn->off != 0 ||
9977                                     (insn->src_reg != BPF_REG_0 &&
9978                                      insn->src_reg != BPF_PSEUDO_CALL) ||
9979                                     insn->dst_reg != BPF_REG_0 ||
9980                                     class == BPF_JMP32) {
9981                                         verbose(env, "BPF_CALL uses reserved fields\n");
9982                                         return -EINVAL;
9983                                 }
9984
9985                                 if (env->cur_state->active_spin_lock &&
9986                                     (insn->src_reg == BPF_PSEUDO_CALL ||
9987                                      insn->imm != BPF_FUNC_spin_unlock)) {
9988                                         verbose(env, "function calls are not allowed while holding a lock\n");
9989                                         return -EINVAL;
9990                                 }
9991                                 if (insn->src_reg == BPF_PSEUDO_CALL)
9992                                         err = check_func_call(env, insn, &env->insn_idx);
9993                                 else
9994                                         err = check_helper_call(env, insn->imm, env->insn_idx);
9995                                 if (err)
9996                                         return err;
9997
9998                         } else if (opcode == BPF_JA) {
9999                                 if (BPF_SRC(insn->code) != BPF_K ||
10000                                     insn->imm != 0 ||
10001                                     insn->src_reg != BPF_REG_0 ||
10002                                     insn->dst_reg != BPF_REG_0 ||
10003                                     class == BPF_JMP32) {
10004                                         verbose(env, "BPF_JA uses reserved fields\n");
10005                                         return -EINVAL;
10006                                 }
10007
10008                                 env->insn_idx += insn->off + 1;
10009                                 continue;
10010
10011                         } else if (opcode == BPF_EXIT) {
10012                                 if (BPF_SRC(insn->code) != BPF_K ||
10013                                     insn->imm != 0 ||
10014                                     insn->src_reg != BPF_REG_0 ||
10015                                     insn->dst_reg != BPF_REG_0 ||
10016                                     class == BPF_JMP32) {
10017                                         verbose(env, "BPF_EXIT uses reserved fields\n");
10018                                         return -EINVAL;
10019                                 }
10020
10021                                 if (env->cur_state->active_spin_lock) {
10022                                         verbose(env, "bpf_spin_unlock is missing\n");
10023                                         return -EINVAL;
10024                                 }
10025
10026                                 if (state->curframe) {
10027                                         /* exit from nested function */
10028                                         err = prepare_func_exit(env, &env->insn_idx);
10029                                         if (err)
10030                                                 return err;
10031                                         do_print_state = true;
10032                                         continue;
10033                                 }
10034
10035                                 err = check_reference_leak(env);
10036                                 if (err)
10037                                         return err;
10038
10039                                 err = check_return_code(env);
10040                                 if (err)
10041                                         return err;
10042 process_bpf_exit:
10043                                 update_branch_counts(env, env->cur_state);
10044                                 err = pop_stack(env, &prev_insn_idx,
10045                                                 &env->insn_idx, pop_log);
10046                                 if (err < 0) {
10047                                         if (err != -ENOENT)
10048                                                 return err;
10049                                         break;
10050                                 } else {
10051                                         do_print_state = true;
10052                                         continue;
10053                                 }
10054                         } else {
10055                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
10056                                 if (err)
10057                                         return err;
10058                         }
10059                 } else if (class == BPF_LD) {
10060                         u8 mode = BPF_MODE(insn->code);
10061
10062                         if (mode == BPF_ABS || mode == BPF_IND) {
10063                                 err = check_ld_abs(env, insn);
10064                                 if (err)
10065                                         return err;
10066
10067                         } else if (mode == BPF_IMM) {
10068                                 err = check_ld_imm(env, insn);
10069                                 if (err)
10070                                         return err;
10071
10072                                 env->insn_idx++;
10073                                 sanitize_mark_insn_seen(env);
10074                         } else {
10075                                 verbose(env, "invalid BPF_LD mode\n");
10076                                 return -EINVAL;
10077                         }
10078                 } else {
10079                         verbose(env, "unknown insn class %d\n", class);
10080                         return -EINVAL;
10081                 }
10082
10083                 env->insn_idx++;
10084         }
10085
10086         return 0;
10087 }
10088
10089 /* replace pseudo btf_id with kernel symbol address */
10090 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
10091                                struct bpf_insn *insn,
10092                                struct bpf_insn_aux_data *aux)
10093 {
10094         const struct btf_var_secinfo *vsi;
10095         const struct btf_type *datasec;
10096         const struct btf_type *t;
10097         const char *sym_name;
10098         bool percpu = false;
10099         u32 type, id = insn->imm;
10100         s32 datasec_id;
10101         u64 addr;
10102         int i;
10103
10104         if (!btf_vmlinux) {
10105                 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
10106                 return -EINVAL;
10107         }
10108
10109         if (insn[1].imm != 0) {
10110                 verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
10111                 return -EINVAL;
10112         }
10113
10114         t = btf_type_by_id(btf_vmlinux, id);
10115         if (!t) {
10116                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
10117                 return -ENOENT;
10118         }
10119
10120         if (!btf_type_is_var(t)) {
10121                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
10122                         id);
10123                 return -EINVAL;
10124         }
10125
10126         sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
10127         addr = kallsyms_lookup_name(sym_name);
10128         if (!addr) {
10129                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
10130                         sym_name);
10131                 return -ENOENT;
10132         }
10133
10134         datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
10135                                            BTF_KIND_DATASEC);
10136         if (datasec_id > 0) {
10137                 datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10138                 for_each_vsi(i, datasec, vsi) {
10139                         if (vsi->type == id) {
10140                                 percpu = true;
10141                                 break;
10142                         }
10143                 }
10144         }
10145
10146         insn[0].imm = (u32)addr;
10147         insn[1].imm = addr >> 32;
10148
10149         type = t->type;
10150         t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10151         if (percpu) {
10152                 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10153                 aux->btf_var.btf_id = type;
10154         } else if (!btf_type_is_struct(t)) {
10155                 const struct btf_type *ret;
10156                 const char *tname;
10157                 u32 tsize;
10158
10159                 /* resolve the type size of ksym. */
10160                 ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10161                 if (IS_ERR(ret)) {
10162                         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10163                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
10164                                 tname, PTR_ERR(ret));
10165                         return -EINVAL;
10166                 }
10167                 aux->btf_var.reg_type = PTR_TO_MEM;
10168                 aux->btf_var.mem_size = tsize;
10169         } else {
10170                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
10171                 aux->btf_var.btf_id = type;
10172         }
10173         return 0;
10174 }
10175
10176 static int check_map_prealloc(struct bpf_map *map)
10177 {
10178         return (map->map_type != BPF_MAP_TYPE_HASH &&
10179                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10180                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10181                 !(map->map_flags & BPF_F_NO_PREALLOC);
10182 }
10183
10184 static bool is_tracing_prog_type(enum bpf_prog_type type)
10185 {
10186         switch (type) {
10187         case BPF_PROG_TYPE_KPROBE:
10188         case BPF_PROG_TYPE_TRACEPOINT:
10189         case BPF_PROG_TYPE_PERF_EVENT:
10190         case BPF_PROG_TYPE_RAW_TRACEPOINT:
10191                 return true;
10192         default:
10193                 return false;
10194         }
10195 }
10196
10197 static bool is_preallocated_map(struct bpf_map *map)
10198 {
10199         if (!check_map_prealloc(map))
10200                 return false;
10201         if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
10202                 return false;
10203         return true;
10204 }
10205
10206 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
10207                                         struct bpf_map *map,
10208                                         struct bpf_prog *prog)
10209
10210 {
10211         enum bpf_prog_type prog_type = resolve_prog_type(prog);
10212         /*
10213          * Validate that trace type programs use preallocated hash maps.
10214          *
10215          * For programs attached to PERF events this is mandatory as the
10216          * perf NMI can hit any arbitrary code sequence.
10217          *
10218          * All other trace types using preallocated hash maps are unsafe as
10219          * well because tracepoint or kprobes can be inside locked regions
10220          * of the memory allocator or at a place where a recursion into the
10221          * memory allocator would see inconsistent state.
10222          *
10223          * On RT enabled kernels run-time allocation of all trace type
10224          * programs is strictly prohibited due to lock type constraints. On
10225          * !RT kernels it is allowed for backwards compatibility reasons for
10226          * now, but warnings are emitted so developers are made aware of
10227          * the unsafety and can fix their programs before this is enforced.
10228          */
10229         if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10230                 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10231                         verbose(env, "perf_event programs can only use preallocated hash map\n");
10232                         return -EINVAL;
10233                 }
10234                 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10235                         verbose(env, "trace type programs can only use preallocated hash map\n");
10236                         return -EINVAL;
10237                 }
10238                 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10239                 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10240         }
10241
10242         if ((is_tracing_prog_type(prog_type) ||
10243              prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
10244             map_value_has_spin_lock(map)) {
10245                 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10246                 return -EINVAL;
10247         }
10248
10249         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
10250             !bpf_offload_prog_map_match(prog, map)) {
10251                 verbose(env, "offload device mismatch between prog and map\n");
10252                 return -EINVAL;
10253         }
10254
10255         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10256                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10257                 return -EINVAL;
10258         }
10259
10260         if (prog->aux->sleepable)
10261                 switch (map->map_type) {
10262                 case BPF_MAP_TYPE_HASH:
10263                 case BPF_MAP_TYPE_LRU_HASH:
10264                 case BPF_MAP_TYPE_ARRAY:
10265                         if (!is_preallocated_map(map)) {
10266                                 verbose(env,
10267                                         "Sleepable programs can only use preallocated hash maps\n");
10268                                 return -EINVAL;
10269                         }
10270                         break;
10271                 default:
10272                         verbose(env,
10273                                 "Sleepable programs can only use array and hash maps\n");
10274                         return -EINVAL;
10275                 }
10276
10277         return 0;
10278 }
10279
10280 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10281 {
10282         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
10283                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10284 }
10285
10286 /* find and rewrite pseudo imm in ld_imm64 instructions:
10287  *
10288  * 1. if it accesses map FD, replace it with actual map pointer.
10289  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10290  *
10291  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10292  */
10293 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10294 {
10295         struct bpf_insn *insn = env->prog->insnsi;
10296         int insn_cnt = env->prog->len;
10297         int i, j, err;
10298
10299         err = bpf_prog_calc_tag(env->prog);
10300         if (err)
10301                 return err;
10302
10303         for (i = 0; i < insn_cnt; i++, insn++) {
10304                 if (BPF_CLASS(insn->code) == BPF_LDX &&
10305                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10306                         verbose(env, "BPF_LDX uses reserved fields\n");
10307                         return -EINVAL;
10308                 }
10309
10310                 if (BPF_CLASS(insn->code) == BPF_STX &&
10311                     ((BPF_MODE(insn->code) != BPF_MEM &&
10312                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10313                         verbose(env, "BPF_STX uses reserved fields\n");
10314                         return -EINVAL;
10315                 }
10316
10317                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10318                         struct bpf_insn_aux_data *aux;
10319                         struct bpf_map *map;
10320                         struct fd f;
10321                         u64 addr;
10322
10323                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
10324                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10325                             insn[1].off != 0) {
10326                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
10327                                 return -EINVAL;
10328                         }
10329
10330                         if (insn[0].src_reg == 0)
10331                                 /* valid generic load 64-bit imm */
10332                                 goto next_insn;
10333
10334                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10335                                 aux = &env->insn_aux_data[i];
10336                                 err = check_pseudo_btf_id(env, insn, aux);
10337                                 if (err)
10338                                         return err;
10339                                 goto next_insn;
10340                         }
10341
10342                         /* In final convert_pseudo_ld_imm64() step, this is
10343                          * converted into regular 64-bit imm load insn.
10344                          */
10345                         if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
10346                              insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10347                             (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
10348                              insn[1].imm != 0)) {
10349                                 verbose(env,
10350                                         "unrecognized bpf_ld_imm64 insn\n");
10351                                 return -EINVAL;
10352                         }
10353
10354                         f = fdget(insn[0].imm);
10355                         map = __bpf_map_get(f);
10356                         if (IS_ERR(map)) {
10357                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
10358                                         insn[0].imm);
10359                                 return PTR_ERR(map);
10360                         }
10361
10362                         err = check_map_prog_compatibility(env, map, env->prog);
10363                         if (err) {
10364                                 fdput(f);
10365                                 return err;
10366                         }
10367
10368                         aux = &env->insn_aux_data[i];
10369                         if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10370                                 addr = (unsigned long)map;
10371                         } else {
10372                                 u32 off = insn[1].imm;
10373
10374                                 if (off >= BPF_MAX_VAR_OFF) {
10375                                         verbose(env, "direct value offset of %u is not allowed\n", off);
10376                                         fdput(f);
10377                                         return -EINVAL;
10378                                 }
10379
10380                                 if (!map->ops->map_direct_value_addr) {
10381                                         verbose(env, "no direct value access support for this map type\n");
10382                                         fdput(f);
10383                                         return -EINVAL;
10384                                 }
10385
10386                                 err = map->ops->map_direct_value_addr(map, &addr, off);
10387                                 if (err) {
10388                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
10389                                                 map->value_size, off);
10390                                         fdput(f);
10391                                         return err;
10392                                 }
10393
10394                                 aux->map_off = off;
10395                                 addr += off;
10396                         }
10397
10398                         insn[0].imm = (u32)addr;
10399                         insn[1].imm = addr >> 32;
10400
10401                         /* check whether we recorded this map already */
10402                         for (j = 0; j < env->used_map_cnt; j++) {
10403                                 if (env->used_maps[j] == map) {
10404                                         aux->map_index = j;
10405                                         fdput(f);
10406                                         goto next_insn;
10407                                 }
10408                         }
10409
10410                         if (env->used_map_cnt >= MAX_USED_MAPS) {
10411                                 fdput(f);
10412                                 return -E2BIG;
10413                         }
10414
10415                         /* hold the map. If the program is rejected by verifier,
10416                          * the map will be released by release_maps() or it
10417                          * will be used by the valid program until it's unloaded
10418                          * and all maps are released in free_used_maps()
10419                          */
10420                         bpf_map_inc(map);
10421
10422                         aux->map_index = env->used_map_cnt;
10423                         env->used_maps[env->used_map_cnt++] = map;
10424
10425                         if (bpf_map_is_cgroup_storage(map) &&
10426                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
10427                                 verbose(env, "only one cgroup storage of each type is allowed\n");
10428                                 fdput(f);
10429                                 return -EBUSY;
10430                         }
10431
10432                         fdput(f);
10433 next_insn:
10434                         insn++;
10435                         i++;
10436                         continue;
10437                 }
10438
10439                 /* Basic sanity check before we invest more work here. */
10440                 if (!bpf_opcode_in_insntable(insn->code)) {
10441                         verbose(env, "unknown opcode %02x\n", insn->code);
10442                         return -EINVAL;
10443                 }
10444         }
10445
10446         /* now all pseudo BPF_LD_IMM64 instructions load valid
10447          * 'struct bpf_map *' into a register instead of user map_fd.
10448          * These pointers will be used later by verifier to validate map access.
10449          */
10450         return 0;
10451 }
10452
10453 /* drop refcnt of maps used by the rejected program */
10454 static void release_maps(struct bpf_verifier_env *env)
10455 {
10456         __bpf_free_used_maps(env->prog->aux, env->used_maps,
10457                              env->used_map_cnt);
10458 }
10459
10460 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
10461 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10462 {
10463         struct bpf_insn *insn = env->prog->insnsi;
10464         int insn_cnt = env->prog->len;
10465         int i;
10466
10467         for (i = 0; i < insn_cnt; i++, insn++)
10468                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
10469                         insn->src_reg = 0;
10470 }
10471
10472 /* single env->prog->insni[off] instruction was replaced with the range
10473  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
10474  * [0, off) and [off, end) to new locations, so the patched range stays zero
10475  */
10476 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
10477                                  struct bpf_insn_aux_data *new_data,
10478                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
10479 {
10480         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10481         struct bpf_insn *insn = new_prog->insnsi;
10482         u32 old_seen = old_data[off].seen;
10483         u32 prog_len;
10484         int i;
10485
10486         /* aux info at OFF always needs adjustment, no matter fast path
10487          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10488          * original insn at old prog.
10489          */
10490         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10491
10492         if (cnt == 1)
10493                 return;
10494         prog_len = new_prog->len;
10495
10496         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10497         memcpy(new_data + off + cnt - 1, old_data + off,
10498                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10499         for (i = off; i < off + cnt - 1; i++) {
10500                 /* Expand insni[off]'s seen count to the patched range. */
10501                 new_data[i].seen = old_seen;
10502                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
10503         }
10504         env->insn_aux_data = new_data;
10505         vfree(old_data);
10506 }
10507
10508 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10509 {
10510         int i;
10511
10512         if (len == 1)
10513                 return;
10514         /* NOTE: fake 'exit' subprog should be updated as well. */
10515         for (i = 0; i <= env->subprog_cnt; i++) {
10516                 if (env->subprog_info[i].start <= off)
10517                         continue;
10518                 env->subprog_info[i].start += len - 1;
10519         }
10520 }
10521
10522 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10523 {
10524         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10525         int i, sz = prog->aux->size_poke_tab;
10526         struct bpf_jit_poke_descriptor *desc;
10527
10528         for (i = 0; i < sz; i++) {
10529                 desc = &tab[i];
10530                 if (desc->insn_idx <= off)
10531                         continue;
10532                 desc->insn_idx += len - 1;
10533         }
10534 }
10535
10536 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
10537                                             const struct bpf_insn *patch, u32 len)
10538 {
10539         struct bpf_prog *new_prog;
10540         struct bpf_insn_aux_data *new_data = NULL;
10541
10542         if (len > 1) {
10543                 new_data = vzalloc(array_size(env->prog->len + len - 1,
10544                                               sizeof(struct bpf_insn_aux_data)));
10545                 if (!new_data)
10546                         return NULL;
10547         }
10548
10549         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10550         if (IS_ERR(new_prog)) {
10551                 if (PTR_ERR(new_prog) == -ERANGE)
10552                         verbose(env,
10553                                 "insn %d cannot be patched due to 16-bit range\n",
10554                                 env->insn_aux_data[off].orig_idx);
10555                 vfree(new_data);
10556                 return NULL;
10557         }
10558         adjust_insn_aux_data(env, new_data, new_prog, off, len);
10559         adjust_subprog_starts(env, off, len);
10560         adjust_poke_descs(new_prog, off, len);
10561         return new_prog;
10562 }
10563
10564 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
10565                                               u32 off, u32 cnt)
10566 {
10567         int i, j;
10568
10569         /* find first prog starting at or after off (first to remove) */
10570         for (i = 0; i < env->subprog_cnt; i++)
10571                 if (env->subprog_info[i].start >= off)
10572                         break;
10573         /* find first prog starting at or after off + cnt (first to stay) */
10574         for (j = i; j < env->subprog_cnt; j++)
10575                 if (env->subprog_info[j].start >= off + cnt)
10576                         break;
10577         /* if j doesn't start exactly at off + cnt, we are just removing
10578          * the front of previous prog
10579          */
10580         if (env->subprog_info[j].start != off + cnt)
10581                 j--;
10582
10583         if (j > i) {
10584                 struct bpf_prog_aux *aux = env->prog->aux;
10585                 int move;
10586
10587                 /* move fake 'exit' subprog as well */
10588                 move = env->subprog_cnt + 1 - j;
10589
10590                 memmove(env->subprog_info + i,
10591                         env->subprog_info + j,
10592                         sizeof(*env->subprog_info) * move);
10593                 env->subprog_cnt -= j - i;
10594
10595                 /* remove func_info */
10596                 if (aux->func_info) {
10597                         move = aux->func_info_cnt - j;
10598
10599                         memmove(aux->func_info + i,
10600                                 aux->func_info + j,
10601                                 sizeof(*aux->func_info) * move);
10602                         aux->func_info_cnt -= j - i;
10603                         /* func_info->insn_off is set after all code rewrites,
10604                          * in adjust_btf_func() - no need to adjust
10605                          */
10606                 }
10607         } else {
10608                 /* convert i from "first prog to remove" to "first to adjust" */
10609                 if (env->subprog_info[i].start == off)
10610                         i++;
10611         }
10612
10613         /* update fake 'exit' subprog as well */
10614         for (; i <= env->subprog_cnt; i++)
10615                 env->subprog_info[i].start -= cnt;
10616
10617         return 0;
10618 }
10619
10620 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
10621                                       u32 cnt)
10622 {
10623         struct bpf_prog *prog = env->prog;
10624         u32 i, l_off, l_cnt, nr_linfo;
10625         struct bpf_line_info *linfo;
10626
10627         nr_linfo = prog->aux->nr_linfo;
10628         if (!nr_linfo)
10629                 return 0;
10630
10631         linfo = prog->aux->linfo;
10632
10633         /* find first line info to remove, count lines to be removed */
10634         for (i = 0; i < nr_linfo; i++)
10635                 if (linfo[i].insn_off >= off)
10636                         break;
10637
10638         l_off = i;
10639         l_cnt = 0;
10640         for (; i < nr_linfo; i++)
10641                 if (linfo[i].insn_off < off + cnt)
10642                         l_cnt++;
10643                 else
10644                         break;
10645
10646         /* First live insn doesn't match first live linfo, it needs to "inherit"
10647          * last removed linfo.  prog is already modified, so prog->len == off
10648          * means no live instructions after (tail of the program was removed).
10649          */
10650         if (prog->len != off && l_cnt &&
10651             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10652                 l_cnt--;
10653                 linfo[--i].insn_off = off + cnt;
10654         }
10655
10656         /* remove the line info which refer to the removed instructions */
10657         if (l_cnt) {
10658                 memmove(linfo + l_off, linfo + i,
10659                         sizeof(*linfo) * (nr_linfo - i));
10660
10661                 prog->aux->nr_linfo -= l_cnt;
10662                 nr_linfo = prog->aux->nr_linfo;
10663         }
10664
10665         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10666         for (i = l_off; i < nr_linfo; i++)
10667                 linfo[i].insn_off -= cnt;
10668
10669         /* fix up all subprogs (incl. 'exit') which start >= off */
10670         for (i = 0; i <= env->subprog_cnt; i++)
10671                 if (env->subprog_info[i].linfo_idx > l_off) {
10672                         /* program may have started in the removed region but
10673                          * may not be fully removed
10674                          */
10675                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
10676                                 env->subprog_info[i].linfo_idx -= l_cnt;
10677                         else
10678                                 env->subprog_info[i].linfo_idx = l_off;
10679                 }
10680
10681         return 0;
10682 }
10683
10684 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10685 {
10686         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10687         unsigned int orig_prog_len = env->prog->len;
10688         int err;
10689
10690         if (bpf_prog_is_dev_bound(env->prog->aux))
10691                 bpf_prog_offload_remove_insns(env, off, cnt);
10692
10693         err = bpf_remove_insns(env->prog, off, cnt);
10694         if (err)
10695                 return err;
10696
10697         err = adjust_subprog_starts_after_remove(env, off, cnt);
10698         if (err)
10699                 return err;
10700
10701         err = bpf_adj_linfo_after_remove(env, off, cnt);
10702         if (err)
10703                 return err;
10704
10705         memmove(aux_data + off, aux_data + off + cnt,
10706                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
10707
10708         return 0;
10709 }
10710
10711 /* The verifier does more data flow analysis than llvm and will not
10712  * explore branches that are dead at run time. Malicious programs can
10713  * have dead code too. Therefore replace all dead at-run-time code
10714  * with 'ja -1'.
10715  *
10716  * Just nops are not optimal, e.g. if they would sit at the end of the
10717  * program and through another bug we would manage to jump there, then
10718  * we'd execute beyond program memory otherwise. Returning exception
10719  * code also wouldn't work since we can have subprogs where the dead
10720  * code could be located.
10721  */
10722 static void sanitize_dead_code(struct bpf_verifier_env *env)
10723 {
10724         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10725         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10726         struct bpf_insn *insn = env->prog->insnsi;
10727         const int insn_cnt = env->prog->len;
10728         int i;
10729
10730         for (i = 0; i < insn_cnt; i++) {
10731                 if (aux_data[i].seen)
10732                         continue;
10733                 memcpy(insn + i, &trap, sizeof(trap));
10734                 aux_data[i].zext_dst = false;
10735         }
10736 }
10737
10738 static bool insn_is_cond_jump(u8 code)
10739 {
10740         u8 op;
10741
10742         if (BPF_CLASS(code) == BPF_JMP32)
10743                 return true;
10744
10745         if (BPF_CLASS(code) != BPF_JMP)
10746                 return false;
10747
10748         op = BPF_OP(code);
10749         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10750 }
10751
10752 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10753 {
10754         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10755         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10756         struct bpf_insn *insn = env->prog->insnsi;
10757         const int insn_cnt = env->prog->len;
10758         int i;
10759
10760         for (i = 0; i < insn_cnt; i++, insn++) {
10761                 if (!insn_is_cond_jump(insn->code))
10762                         continue;
10763
10764                 if (!aux_data[i + 1].seen)
10765                         ja.off = insn->off;
10766                 else if (!aux_data[i + 1 + insn->off].seen)
10767                         ja.off = 0;
10768                 else
10769                         continue;
10770
10771                 if (bpf_prog_is_dev_bound(env->prog->aux))
10772                         bpf_prog_offload_replace_insn(env, i, &ja);
10773
10774                 memcpy(insn, &ja, sizeof(ja));
10775         }
10776 }
10777
10778 static int opt_remove_dead_code(struct bpf_verifier_env *env)
10779 {
10780         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10781         int insn_cnt = env->prog->len;
10782         int i, err;
10783
10784         for (i = 0; i < insn_cnt; i++) {
10785                 int j;
10786
10787                 j = 0;
10788                 while (i + j < insn_cnt && !aux_data[i + j].seen)
10789                         j++;
10790                 if (!j)
10791                         continue;
10792
10793                 err = verifier_remove_insns(env, i, j);
10794                 if (err)
10795                         return err;
10796                 insn_cnt = env->prog->len;
10797         }
10798
10799         return 0;
10800 }
10801
10802 static int opt_remove_nops(struct bpf_verifier_env *env)
10803 {
10804         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10805         struct bpf_insn *insn = env->prog->insnsi;
10806         int insn_cnt = env->prog->len;
10807         int i, err;
10808
10809         for (i = 0; i < insn_cnt; i++) {
10810                 if (memcmp(&insn[i], &ja, sizeof(ja)))
10811                         continue;
10812
10813                 err = verifier_remove_insns(env, i, 1);
10814                 if (err)
10815                         return err;
10816                 insn_cnt--;
10817                 i--;
10818         }
10819
10820         return 0;
10821 }
10822
10823 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
10824                                          const union bpf_attr *attr)
10825 {
10826         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10827         struct bpf_insn_aux_data *aux = env->insn_aux_data;
10828         int i, patch_len, delta = 0, len = env->prog->len;
10829         struct bpf_insn *insns = env->prog->insnsi;
10830         struct bpf_prog *new_prog;
10831         bool rnd_hi32;
10832
10833         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10834         zext_patch[1] = BPF_ZEXT_REG(0);
10835         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10836         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10837         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10838         for (i = 0; i < len; i++) {
10839                 int adj_idx = i + delta;
10840                 struct bpf_insn insn;
10841
10842                 insn = insns[adj_idx];
10843                 if (!aux[adj_idx].zext_dst) {
10844                         u8 code, class;
10845                         u32 imm_rnd;
10846
10847                         if (!rnd_hi32)
10848                                 continue;
10849
10850                         code = insn.code;
10851                         class = BPF_CLASS(code);
10852                         if (insn_no_def(&insn))
10853                                 continue;
10854
10855                         /* NOTE: arg "reg" (the fourth one) is only used for
10856                          *       BPF_STX which has been ruled out in above
10857                          *       check, it is safe to pass NULL here.
10858                          */
10859                         if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10860                                 if (class == BPF_LD &&
10861                                     BPF_MODE(code) == BPF_IMM)
10862                                         i++;
10863                                 continue;
10864                         }
10865
10866                         /* ctx load could be transformed into wider load. */
10867                         if (class == BPF_LDX &&
10868                             aux[adj_idx].ptr_type == PTR_TO_CTX)
10869                                 continue;
10870
10871                         imm_rnd = get_random_int();
10872                         rnd_hi32_patch[0] = insn;
10873                         rnd_hi32_patch[1].imm = imm_rnd;
10874                         rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10875                         patch = rnd_hi32_patch;
10876                         patch_len = 4;
10877                         goto apply_patch_buffer;
10878                 }
10879
10880                 if (!bpf_jit_needs_zext())
10881                         continue;
10882
10883                 zext_patch[0] = insn;
10884                 zext_patch[1].dst_reg = insn.dst_reg;
10885                 zext_patch[1].src_reg = insn.dst_reg;
10886                 patch = zext_patch;
10887                 patch_len = 2;
10888 apply_patch_buffer:
10889                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10890                 if (!new_prog)
10891                         return -ENOMEM;
10892                 env->prog = new_prog;
10893                 insns = new_prog->insnsi;
10894                 aux = env->insn_aux_data;
10895                 delta += patch_len - 1;
10896         }
10897
10898         return 0;
10899 }
10900
10901 /* convert load instructions that access fields of a context type into a
10902  * sequence of instructions that access fields of the underlying structure:
10903  *     struct __sk_buff    -> struct sk_buff
10904  *     struct bpf_sock_ops -> struct sock
10905  */
10906 static int convert_ctx_accesses(struct bpf_verifier_env *env)
10907 {
10908         const struct bpf_verifier_ops *ops = env->ops;
10909         int i, cnt, size, ctx_field_size, delta = 0;
10910         const int insn_cnt = env->prog->len;
10911         struct bpf_insn insn_buf[16], *insn;
10912         u32 target_size, size_default, off;
10913         struct bpf_prog *new_prog;
10914         enum bpf_access_type type;
10915         bool is_narrower_load;
10916
10917         if (ops->gen_prologue || env->seen_direct_write) {
10918                 if (!ops->gen_prologue) {
10919                         verbose(env, "bpf verifier is misconfigured\n");
10920                         return -EINVAL;
10921                 }
10922                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
10923                                         env->prog);
10924                 if (cnt >= ARRAY_SIZE(insn_buf)) {
10925                         verbose(env, "bpf verifier is misconfigured\n");
10926                         return -EINVAL;
10927                 } else if (cnt) {
10928                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10929                         if (!new_prog)
10930                                 return -ENOMEM;
10931
10932                         env->prog = new_prog;
10933                         delta += cnt - 1;
10934                 }
10935         }
10936
10937         if (bpf_prog_is_dev_bound(env->prog->aux))
10938                 return 0;
10939
10940         insn = env->prog->insnsi + delta;
10941
10942         for (i = 0; i < insn_cnt; i++, insn++) {
10943                 bpf_convert_ctx_access_t convert_ctx_access;
10944                 bool ctx_access;
10945
10946                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
10947                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10948                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
10949                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10950                         type = BPF_READ;
10951                         ctx_access = true;
10952                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
10953                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10954                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
10955                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10956                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
10957                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10958                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
10959                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10960                         type = BPF_WRITE;
10961                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10962                 } else {
10963                         continue;
10964                 }
10965
10966                 if (type == BPF_WRITE &&
10967                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
10968                         struct bpf_insn patch[] = {
10969                                 *insn,
10970                                 BPF_ST_NOSPEC(),
10971                         };
10972
10973                         cnt = ARRAY_SIZE(patch);
10974                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10975                         if (!new_prog)
10976                                 return -ENOMEM;
10977
10978                         delta    += cnt - 1;
10979                         env->prog = new_prog;
10980                         insn      = new_prog->insnsi + i + delta;
10981                         continue;
10982                 }
10983
10984                 if (!ctx_access)
10985                         continue;
10986
10987                 switch (env->insn_aux_data[i + delta].ptr_type) {
10988                 case PTR_TO_CTX:
10989                         if (!ops->convert_ctx_access)
10990                                 continue;
10991                         convert_ctx_access = ops->convert_ctx_access;
10992                         break;
10993                 case PTR_TO_SOCKET:
10994                 case PTR_TO_SOCK_COMMON:
10995                         convert_ctx_access = bpf_sock_convert_ctx_access;
10996                         break;
10997                 case PTR_TO_TCP_SOCK:
10998                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10999                         break;
11000                 case PTR_TO_XDP_SOCK:
11001                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
11002                         break;
11003                 case PTR_TO_BTF_ID:
11004                         if (type == BPF_READ) {
11005                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
11006                                         BPF_SIZE((insn)->code);
11007                                 env->prog->aux->num_exentries++;
11008                         } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
11009                                 verbose(env, "Writes through BTF pointers are not allowed\n");
11010                                 return -EINVAL;
11011                         }
11012                         continue;
11013                 default:
11014                         continue;
11015                 }
11016
11017                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
11018                 size = BPF_LDST_BYTES(insn);
11019
11020                 /* If the read access is a narrower load of the field,
11021                  * convert to a 4/8-byte load, to minimum program type specific
11022                  * convert_ctx_access changes. If conversion is successful,
11023                  * we will apply proper mask to the result.
11024                  */
11025                 is_narrower_load = size < ctx_field_size;
11026                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
11027                 off = insn->off;
11028                 if (is_narrower_load) {
11029                         u8 size_code;
11030
11031                         if (type == BPF_WRITE) {
11032                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
11033                                 return -EINVAL;
11034                         }
11035
11036                         size_code = BPF_H;
11037                         if (ctx_field_size == 4)
11038                                 size_code = BPF_W;
11039                         else if (ctx_field_size == 8)
11040                                 size_code = BPF_DW;
11041
11042                         insn->off = off & ~(size_default - 1);
11043                         insn->code = BPF_LDX | BPF_MEM | size_code;
11044                 }
11045
11046                 target_size = 0;
11047                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
11048                                          &target_size);
11049                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
11050                     (ctx_field_size && !target_size)) {
11051                         verbose(env, "bpf verifier is misconfigured\n");
11052                         return -EINVAL;
11053                 }
11054
11055                 if (is_narrower_load && size < target_size) {
11056                         u8 shift = bpf_ctx_narrow_access_offset(
11057                                 off, size, size_default) * 8;
11058                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
11059                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
11060                                 return -EINVAL;
11061                         }
11062                         if (ctx_field_size <= 4) {
11063                                 if (shift)
11064                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
11065                                                                         insn->dst_reg,
11066                                                                         shift);
11067                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
11068                                                                 (1 << size * 8) - 1);
11069                         } else {
11070                                 if (shift)
11071                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
11072                                                                         insn->dst_reg,
11073                                                                         shift);
11074                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
11075                                                                 (1ULL << size * 8) - 1);
11076                         }
11077                 }
11078
11079                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11080                 if (!new_prog)
11081                         return -ENOMEM;
11082
11083                 delta += cnt - 1;
11084
11085                 /* keep walking new program and skip insns we just inserted */
11086                 env->prog = new_prog;
11087                 insn      = new_prog->insnsi + i + delta;
11088         }
11089
11090         return 0;
11091 }
11092
11093 static int jit_subprogs(struct bpf_verifier_env *env)
11094 {
11095         struct bpf_prog *prog = env->prog, **func, *tmp;
11096         int i, j, subprog_start, subprog_end = 0, len, subprog;
11097         struct bpf_map *map_ptr;
11098         struct bpf_insn *insn;
11099         void *old_bpf_func;
11100         int err, num_exentries;
11101
11102         if (env->subprog_cnt <= 1)
11103                 return 0;
11104
11105         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11106                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11107                     insn->src_reg != BPF_PSEUDO_CALL)
11108                         continue;
11109                 /* Upon error here we cannot fall back to interpreter but
11110                  * need a hard reject of the program. Thus -EFAULT is
11111                  * propagated in any case.
11112                  */
11113                 subprog = find_subprog(env, i + insn->imm + 1);
11114                 if (subprog < 0) {
11115                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
11116                                   i + insn->imm + 1);
11117                         return -EFAULT;
11118                 }
11119                 /* temporarily remember subprog id inside insn instead of
11120                  * aux_data, since next loop will split up all insns into funcs
11121                  */
11122                 insn->off = subprog;
11123                 /* remember original imm in case JIT fails and fallback
11124                  * to interpreter will be needed
11125                  */
11126                 env->insn_aux_data[i].call_imm = insn->imm;
11127                 /* point imm to __bpf_call_base+1 from JITs point of view */
11128                 insn->imm = 1;
11129         }
11130
11131         err = bpf_prog_alloc_jited_linfo(prog);
11132         if (err)
11133                 goto out_undo_insn;
11134
11135         err = -ENOMEM;
11136         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11137         if (!func)
11138                 goto out_undo_insn;
11139
11140         for (i = 0; i < env->subprog_cnt; i++) {
11141                 subprog_start = subprog_end;
11142                 subprog_end = env->subprog_info[i + 1].start;
11143
11144                 len = subprog_end - subprog_start;
11145                 /* BPF_PROG_RUN doesn't call subprogs directly,
11146                  * hence main prog stats include the runtime of subprogs.
11147                  * subprogs don't have IDs and not reachable via prog_get_next_id
11148                  * func[i]->aux->stats will never be accessed and stays NULL
11149                  */
11150                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11151                 if (!func[i])
11152                         goto out_free;
11153                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
11154                        len * sizeof(struct bpf_insn));
11155                 func[i]->type = prog->type;
11156                 func[i]->len = len;
11157                 if (bpf_prog_calc_tag(func[i]))
11158                         goto out_free;
11159                 func[i]->is_func = 1;
11160                 func[i]->aux->func_idx = i;
11161                 /* Below members will be freed only at prog->aux */
11162                 func[i]->aux->btf = prog->aux->btf;
11163                 func[i]->aux->func_info = prog->aux->func_info;
11164                 func[i]->aux->poke_tab = prog->aux->poke_tab;
11165                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
11166
11167                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
11168                         struct bpf_jit_poke_descriptor *poke;
11169
11170                         poke = &prog->aux->poke_tab[j];
11171                         if (poke->insn_idx < subprog_end &&
11172                             poke->insn_idx >= subprog_start)
11173                                 poke->aux = func[i]->aux;
11174                 }
11175
11176                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
11177                  * Long term would need debug info to populate names
11178                  */
11179                 func[i]->aux->name[0] = 'F';
11180                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11181                 func[i]->jit_requested = 1;
11182                 func[i]->aux->linfo = prog->aux->linfo;
11183                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11184                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11185                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11186                 num_exentries = 0;
11187                 insn = func[i]->insnsi;
11188                 for (j = 0; j < func[i]->len; j++, insn++) {
11189                         if (BPF_CLASS(insn->code) == BPF_LDX &&
11190                             BPF_MODE(insn->code) == BPF_PROBE_MEM)
11191                                 num_exentries++;
11192                 }
11193                 func[i]->aux->num_exentries = num_exentries;
11194                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11195                 func[i] = bpf_int_jit_compile(func[i]);
11196                 if (!func[i]->jited) {
11197                         err = -ENOTSUPP;
11198                         goto out_free;
11199                 }
11200                 cond_resched();
11201         }
11202
11203         /* at this point all bpf functions were successfully JITed
11204          * now populate all bpf_calls with correct addresses and
11205          * run last pass of JIT
11206          */
11207         for (i = 0; i < env->subprog_cnt; i++) {
11208                 insn = func[i]->insnsi;
11209                 for (j = 0; j < func[i]->len; j++, insn++) {
11210                         if (insn->code != (BPF_JMP | BPF_CALL) ||
11211                             insn->src_reg != BPF_PSEUDO_CALL)
11212                                 continue;
11213                         subprog = insn->off;
11214                         insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
11215                                     __bpf_call_base;
11216                 }
11217
11218                 /* we use the aux data to keep a list of the start addresses
11219                  * of the JITed images for each function in the program
11220                  *
11221                  * for some architectures, such as powerpc64, the imm field
11222                  * might not be large enough to hold the offset of the start
11223                  * address of the callee's JITed image from __bpf_call_base
11224                  *
11225                  * in such cases, we can lookup the start address of a callee
11226                  * by using its subprog id, available from the off field of
11227                  * the call instruction, as an index for this list
11228                  */
11229                 func[i]->aux->func = func;
11230                 func[i]->aux->func_cnt = env->subprog_cnt;
11231         }
11232         for (i = 0; i < env->subprog_cnt; i++) {
11233                 old_bpf_func = func[i]->bpf_func;
11234                 tmp = bpf_int_jit_compile(func[i]);
11235                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11236                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11237                         err = -ENOTSUPP;
11238                         goto out_free;
11239                 }
11240                 cond_resched();
11241         }
11242
11243         /* finally lock prog and jit images for all functions and
11244          * populate kallsysm
11245          */
11246         for (i = 0; i < env->subprog_cnt; i++) {
11247                 bpf_prog_lock_ro(func[i]);
11248                 bpf_prog_kallsyms_add(func[i]);
11249         }
11250
11251         /* Last step: make now unused interpreter insns from main
11252          * prog consistent for later dump requests, so they can
11253          * later look the same as if they were interpreted only.
11254          */
11255         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11256                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11257                     insn->src_reg != BPF_PSEUDO_CALL)
11258                         continue;
11259                 insn->off = env->insn_aux_data[i].call_imm;
11260                 subprog = find_subprog(env, i + insn->off + 1);
11261                 insn->imm = subprog;
11262         }
11263
11264         prog->jited = 1;
11265         prog->bpf_func = func[0]->bpf_func;
11266         prog->aux->func = func;
11267         prog->aux->func_cnt = env->subprog_cnt;
11268         bpf_prog_free_unused_jited_linfo(prog);
11269         return 0;
11270 out_free:
11271         /* We failed JIT'ing, so at this point we need to unregister poke
11272          * descriptors from subprogs, so that kernel is not attempting to
11273          * patch it anymore as we're freeing the subprog JIT memory.
11274          */
11275         for (i = 0; i < prog->aux->size_poke_tab; i++) {
11276                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11277                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11278         }
11279         /* At this point we're guaranteed that poke descriptors are not
11280          * live anymore. We can just unlink its descriptor table as it's
11281          * released with the main prog.
11282          */
11283         for (i = 0; i < env->subprog_cnt; i++) {
11284                 if (!func[i])
11285                         continue;
11286                 func[i]->aux->poke_tab = NULL;
11287                 bpf_jit_free(func[i]);
11288         }
11289         kfree(func);
11290 out_undo_insn:
11291         /* cleanup main prog to be interpreted */
11292         prog->jit_requested = 0;
11293         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11294                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11295                     insn->src_reg != BPF_PSEUDO_CALL)
11296                         continue;
11297                 insn->off = 0;
11298                 insn->imm = env->insn_aux_data[i].call_imm;
11299         }
11300         bpf_prog_free_jited_linfo(prog);
11301         return err;
11302 }
11303
11304 static int fixup_call_args(struct bpf_verifier_env *env)
11305 {
11306 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11307         struct bpf_prog *prog = env->prog;
11308         struct bpf_insn *insn = prog->insnsi;
11309         int i, depth;
11310 #endif
11311         int err = 0;
11312
11313         if (env->prog->jit_requested &&
11314             !bpf_prog_is_dev_bound(env->prog->aux)) {
11315                 err = jit_subprogs(env);
11316                 if (err == 0)
11317                         return 0;
11318                 if (err == -EFAULT)
11319                         return err;
11320         }
11321 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11322         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11323                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
11324                  * have to be rejected, since interpreter doesn't support them yet.
11325                  */
11326                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11327                 return -EINVAL;
11328         }
11329         for (i = 0; i < prog->len; i++, insn++) {
11330                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11331                     insn->src_reg != BPF_PSEUDO_CALL)
11332                         continue;
11333                 depth = get_callee_stack_depth(env, insn, i);
11334                 if (depth < 0)
11335                         return depth;
11336                 bpf_patch_call_args(insn, depth);
11337         }
11338         err = 0;
11339 #endif
11340         return err;
11341 }
11342
11343 /* fixup insn->imm field of bpf_call instructions
11344  * and inline eligible helpers as explicit sequence of BPF instructions
11345  *
11346  * this function is called after eBPF program passed verification
11347  */
11348 static int fixup_bpf_calls(struct bpf_verifier_env *env)
11349 {
11350         struct bpf_prog *prog = env->prog;
11351         bool expect_blinding = bpf_jit_blinding_enabled(prog);
11352         struct bpf_insn *insn = prog->insnsi;
11353         const struct bpf_func_proto *fn;
11354         const int insn_cnt = prog->len;
11355         const struct bpf_map_ops *ops;
11356         struct bpf_insn_aux_data *aux;
11357         struct bpf_insn insn_buf[16];
11358         struct bpf_prog *new_prog;
11359         struct bpf_map *map_ptr;
11360         int i, ret, cnt, delta = 0;
11361
11362         for (i = 0; i < insn_cnt; i++, insn++) {
11363                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
11364                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11365                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
11366                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11367                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11368                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11369                         struct bpf_insn *patchlet;
11370                         struct bpf_insn chk_and_div[] = {
11371                                 /* [R,W]x div 0 -> 0 */
11372                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11373                                              BPF_JNE | BPF_K, insn->src_reg,
11374                                              0, 2, 0),
11375                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11376                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11377                                 *insn,
11378                         };
11379                         struct bpf_insn chk_and_mod[] = {
11380                                 /* [R,W]x mod 0 -> [R,W]x */
11381                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11382                                              BPF_JEQ | BPF_K, insn->src_reg,
11383                                              0, 1 + (is64 ? 0 : 1), 0),
11384                                 *insn,
11385                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11386                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11387                         };
11388
11389                         patchlet = isdiv ? chk_and_div : chk_and_mod;
11390                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
11391                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
11392
11393                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11394                         if (!new_prog)
11395                                 return -ENOMEM;
11396
11397                         delta    += cnt - 1;
11398                         env->prog = prog = new_prog;
11399                         insn      = new_prog->insnsi + i + delta;
11400                         continue;
11401                 }
11402
11403                 if (BPF_CLASS(insn->code) == BPF_LD &&
11404                     (BPF_MODE(insn->code) == BPF_ABS ||
11405                      BPF_MODE(insn->code) == BPF_IND)) {
11406                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
11407                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11408                                 verbose(env, "bpf verifier is misconfigured\n");
11409                                 return -EINVAL;
11410                         }
11411
11412                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11413                         if (!new_prog)
11414                                 return -ENOMEM;
11415
11416                         delta    += cnt - 1;
11417                         env->prog = prog = new_prog;
11418                         insn      = new_prog->insnsi + i + delta;
11419                         continue;
11420                 }
11421
11422                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
11423                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11424                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11425                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11426                         struct bpf_insn insn_buf[16];
11427                         struct bpf_insn *patch = &insn_buf[0];
11428                         bool issrc, isneg, isimm;
11429                         u32 off_reg;
11430
11431                         aux = &env->insn_aux_data[i + delta];
11432                         if (!aux->alu_state ||
11433                             aux->alu_state == BPF_ALU_NON_POINTER)
11434                                 continue;
11435
11436                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11437                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
11438                                 BPF_ALU_SANITIZE_SRC;
11439                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11440
11441                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
11442                         if (isimm) {
11443                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11444                         } else {
11445                                 if (isneg)
11446                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11447                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11448                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11449                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11450                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11451                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
11452                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11453                         }
11454                         if (!issrc)
11455                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11456                         insn->src_reg = BPF_REG_AX;
11457                         if (isneg)
11458                                 insn->code = insn->code == code_add ?
11459                                              code_sub : code_add;
11460                         *patch++ = *insn;
11461                         if (issrc && isneg && !isimm)
11462                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11463                         cnt = patch - insn_buf;
11464
11465                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11466                         if (!new_prog)
11467                                 return -ENOMEM;
11468
11469                         delta    += cnt - 1;
11470                         env->prog = prog = new_prog;
11471                         insn      = new_prog->insnsi + i + delta;
11472                         continue;
11473                 }
11474
11475                 if (insn->code != (BPF_JMP | BPF_CALL))
11476                         continue;
11477                 if (insn->src_reg == BPF_PSEUDO_CALL)
11478                         continue;
11479
11480                 if (insn->imm == BPF_FUNC_get_route_realm)
11481                         prog->dst_needed = 1;
11482                 if (insn->imm == BPF_FUNC_get_prandom_u32)
11483                         bpf_user_rnd_init_once();
11484                 if (insn->imm == BPF_FUNC_override_return)
11485                         prog->kprobe_override = 1;
11486                 if (insn->imm == BPF_FUNC_tail_call) {
11487                         /* If we tail call into other programs, we
11488                          * cannot make any assumptions since they can
11489                          * be replaced dynamically during runtime in
11490                          * the program array.
11491                          */
11492                         prog->cb_access = 1;
11493                         if (!allow_tail_call_in_subprogs(env))
11494                                 prog->aux->stack_depth = MAX_BPF_STACK;
11495                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11496
11497                         /* mark bpf_tail_call as different opcode to avoid
11498                          * conditional branch in the interpeter for every normal
11499                          * call and to prevent accidental JITing by JIT compiler
11500                          * that doesn't support bpf_tail_call yet
11501                          */
11502                         insn->imm = 0;
11503                         insn->code = BPF_JMP | BPF_TAIL_CALL;
11504
11505                         aux = &env->insn_aux_data[i + delta];
11506                         if (env->bpf_capable && !expect_blinding &&
11507                             prog->jit_requested &&
11508                             !bpf_map_key_poisoned(aux) &&
11509                             !bpf_map_ptr_poisoned(aux) &&
11510                             !bpf_map_ptr_unpriv(aux)) {
11511                                 struct bpf_jit_poke_descriptor desc = {
11512                                         .reason = BPF_POKE_REASON_TAIL_CALL,
11513                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11514                                         .tail_call.key = bpf_map_key_immediate(aux),
11515                                         .insn_idx = i + delta,
11516                                 };
11517
11518                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
11519                                 if (ret < 0) {
11520                                         verbose(env, "adding tail call poke descriptor failed\n");
11521                                         return ret;
11522                                 }
11523
11524                                 insn->imm = ret + 1;
11525                                 continue;
11526                         }
11527
11528                         if (!bpf_map_ptr_unpriv(aux))
11529                                 continue;
11530
11531                         /* instead of changing every JIT dealing with tail_call
11532                          * emit two extra insns:
11533                          * if (index >= max_entries) goto out;
11534                          * index &= array->index_mask;
11535                          * to avoid out-of-bounds cpu speculation
11536                          */
11537                         if (bpf_map_ptr_poisoned(aux)) {
11538                                 verbose(env, "tail_call abusing map_ptr\n");
11539                                 return -EINVAL;
11540                         }
11541
11542                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11543                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
11544                                                   map_ptr->max_entries, 2);
11545                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
11546                                                     container_of(map_ptr,
11547                                                                  struct bpf_array,
11548                                                                  map)->index_mask);
11549                         insn_buf[2] = *insn;
11550                         cnt = 3;
11551                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11552                         if (!new_prog)
11553                                 return -ENOMEM;
11554
11555                         delta    += cnt - 1;
11556                         env->prog = prog = new_prog;
11557                         insn      = new_prog->insnsi + i + delta;
11558                         continue;
11559                 }
11560
11561                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11562                  * and other inlining handlers are currently limited to 64 bit
11563                  * only.
11564                  */
11565                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
11566                     (insn->imm == BPF_FUNC_map_lookup_elem ||
11567                      insn->imm == BPF_FUNC_map_update_elem ||
11568                      insn->imm == BPF_FUNC_map_delete_elem ||
11569                      insn->imm == BPF_FUNC_map_push_elem   ||
11570                      insn->imm == BPF_FUNC_map_pop_elem    ||
11571                      insn->imm == BPF_FUNC_map_peek_elem)) {
11572                         aux = &env->insn_aux_data[i + delta];
11573                         if (bpf_map_ptr_poisoned(aux))
11574                                 goto patch_call_imm;
11575
11576                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11577                         ops = map_ptr->ops;
11578                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
11579                             ops->map_gen_lookup) {
11580                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11581                                 if (cnt == -EOPNOTSUPP)
11582                                         goto patch_map_ops_generic;
11583                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11584                                         verbose(env, "bpf verifier is misconfigured\n");
11585                                         return -EINVAL;
11586                                 }
11587
11588                                 new_prog = bpf_patch_insn_data(env, i + delta,
11589                                                                insn_buf, cnt);
11590                                 if (!new_prog)
11591                                         return -ENOMEM;
11592
11593                                 delta    += cnt - 1;
11594                                 env->prog = prog = new_prog;
11595                                 insn      = new_prog->insnsi + i + delta;
11596                                 continue;
11597                         }
11598
11599                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
11600                                      (void *(*)(struct bpf_map *map, void *key))NULL));
11601                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
11602                                      (int (*)(struct bpf_map *map, void *key))NULL));
11603                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11604                                      (int (*)(struct bpf_map *map, void *key, void *value,
11605                                               u64 flags))NULL));
11606                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
11607                                      (int (*)(struct bpf_map *map, void *value,
11608                                               u64 flags))NULL));
11609                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
11610                                      (int (*)(struct bpf_map *map, void *value))NULL));
11611                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
11612                                      (int (*)(struct bpf_map *map, void *value))NULL));
11613 patch_map_ops_generic:
11614                         switch (insn->imm) {
11615                         case BPF_FUNC_map_lookup_elem:
11616                                 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
11617                                             __bpf_call_base;
11618                                 continue;
11619                         case BPF_FUNC_map_update_elem:
11620                                 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
11621                                             __bpf_call_base;
11622                                 continue;
11623                         case BPF_FUNC_map_delete_elem:
11624                                 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
11625                                             __bpf_call_base;
11626                                 continue;
11627                         case BPF_FUNC_map_push_elem:
11628                                 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
11629                                             __bpf_call_base;
11630                                 continue;
11631                         case BPF_FUNC_map_pop_elem:
11632                                 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
11633                                             __bpf_call_base;
11634                                 continue;
11635                         case BPF_FUNC_map_peek_elem:
11636                                 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
11637                                             __bpf_call_base;
11638                                 continue;
11639                         }
11640
11641                         goto patch_call_imm;
11642                 }
11643
11644                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
11645                     insn->imm == BPF_FUNC_jiffies64) {
11646                         struct bpf_insn ld_jiffies_addr[2] = {
11647                                 BPF_LD_IMM64(BPF_REG_0,
11648                                              (unsigned long)&jiffies),
11649                         };
11650
11651                         insn_buf[0] = ld_jiffies_addr[0];
11652                         insn_buf[1] = ld_jiffies_addr[1];
11653                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
11654                                                   BPF_REG_0, 0);
11655                         cnt = 3;
11656
11657                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
11658                                                        cnt);
11659                         if (!new_prog)
11660                                 return -ENOMEM;
11661
11662                         delta    += cnt - 1;
11663                         env->prog = prog = new_prog;
11664                         insn      = new_prog->insnsi + i + delta;
11665                         continue;
11666                 }
11667
11668 patch_call_imm:
11669                 fn = env->ops->get_func_proto(insn->imm, env->prog);
11670                 /* all functions that have prototype and verifier allowed
11671                  * programs to call them, must be real in-kernel functions
11672                  */
11673                 if (!fn->func) {
11674                         verbose(env,
11675                                 "kernel subsystem misconfigured func %s#%d\n",
11676                                 func_id_name(insn->imm), insn->imm);
11677                         return -EFAULT;
11678                 }
11679                 insn->imm = fn->func - __bpf_call_base;
11680         }
11681
11682         /* Since poke tab is now finalized, publish aux to tracker. */
11683         for (i = 0; i < prog->aux->size_poke_tab; i++) {
11684                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11685                 if (!map_ptr->ops->map_poke_track ||
11686                     !map_ptr->ops->map_poke_untrack ||
11687                     !map_ptr->ops->map_poke_run) {
11688                         verbose(env, "bpf verifier is misconfigured\n");
11689                         return -EINVAL;
11690                 }
11691
11692                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11693                 if (ret < 0) {
11694                         verbose(env, "tracking tail call prog failed\n");
11695                         return ret;
11696                 }
11697         }
11698
11699         return 0;
11700 }
11701
11702 static void free_states(struct bpf_verifier_env *env)
11703 {
11704         struct bpf_verifier_state_list *sl, *sln;
11705         int i;
11706
11707         sl = env->free_list;
11708         while (sl) {
11709                 sln = sl->next;
11710                 free_verifier_state(&sl->state, false);
11711                 kfree(sl);
11712                 sl = sln;
11713         }
11714         env->free_list = NULL;
11715
11716         if (!env->explored_states)
11717                 return;
11718
11719         for (i = 0; i < state_htab_size(env); i++) {
11720                 sl = env->explored_states[i];
11721
11722                 while (sl) {
11723                         sln = sl->next;
11724                         free_verifier_state(&sl->state, false);
11725                         kfree(sl);
11726                         sl = sln;
11727                 }
11728                 env->explored_states[i] = NULL;
11729         }
11730 }
11731
11732 static int do_check_common(struct bpf_verifier_env *env, int subprog)
11733 {
11734         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11735         struct bpf_verifier_state *state;
11736         struct bpf_reg_state *regs;
11737         int ret, i;
11738
11739         env->prev_linfo = NULL;
11740         env->pass_cnt++;
11741
11742         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11743         if (!state)
11744                 return -ENOMEM;
11745         state->curframe = 0;
11746         state->speculative = false;
11747         state->branches = 1;
11748         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11749         if (!state->frame[0]) {
11750                 kfree(state);
11751                 return -ENOMEM;
11752         }
11753         env->cur_state = state;
11754         init_func_state(env, state->frame[0],
11755                         BPF_MAIN_FUNC /* callsite */,
11756                         0 /* frameno */,
11757                         subprog);
11758
11759         regs = state->frame[state->curframe]->regs;
11760         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11761                 ret = btf_prepare_func_args(env, subprog, regs);
11762                 if (ret)
11763                         goto out;
11764                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11765                         if (regs[i].type == PTR_TO_CTX)
11766                                 mark_reg_known_zero(env, regs, i);
11767                         else if (regs[i].type == SCALAR_VALUE)
11768                                 mark_reg_unknown(env, regs, i);
11769                 }
11770         } else {
11771                 /* 1st arg to a function */
11772                 regs[BPF_REG_1].type = PTR_TO_CTX;
11773                 mark_reg_known_zero(env, regs, BPF_REG_1);
11774                 ret = btf_check_func_arg_match(env, subprog, regs);
11775                 if (ret == -EFAULT)
11776                         /* unlikely verifier bug. abort.
11777                          * ret == 0 and ret < 0 are sadly acceptable for
11778                          * main() function due to backward compatibility.
11779                          * Like socket filter program may be written as:
11780                          * int bpf_prog(struct pt_regs *ctx)
11781                          * and never dereference that ctx in the program.
11782                          * 'struct pt_regs' is a type mismatch for socket
11783                          * filter that should be using 'struct __sk_buff'.
11784                          */
11785                         goto out;
11786         }
11787
11788         ret = do_check(env);
11789 out:
11790         /* check for NULL is necessary, since cur_state can be freed inside
11791          * do_check() under memory pressure.
11792          */
11793         if (env->cur_state) {
11794                 free_verifier_state(env->cur_state, true);
11795                 env->cur_state = NULL;
11796         }
11797         while (!pop_stack(env, NULL, NULL, false));
11798         if (!ret && pop_log)
11799                 bpf_vlog_reset(&env->log, 0);
11800         free_states(env);
11801         return ret;
11802 }
11803
11804 /* Verify all global functions in a BPF program one by one based on their BTF.
11805  * All global functions must pass verification. Otherwise the whole program is rejected.
11806  * Consider:
11807  * int bar(int);
11808  * int foo(int f)
11809  * {
11810  *    return bar(f);
11811  * }
11812  * int bar(int b)
11813  * {
11814  *    ...
11815  * }
11816  * foo() will be verified first for R1=any_scalar_value. During verification it
11817  * will be assumed that bar() already verified successfully and call to bar()
11818  * from foo() will be checked for type match only. Later bar() will be verified
11819  * independently to check that it's safe for R1=any_scalar_value.
11820  */
11821 static int do_check_subprogs(struct bpf_verifier_env *env)
11822 {
11823         struct bpf_prog_aux *aux = env->prog->aux;
11824         int i, ret;
11825
11826         if (!aux->func_info)
11827                 return 0;
11828
11829         for (i = 1; i < env->subprog_cnt; i++) {
11830                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
11831                         continue;
11832                 env->insn_idx = env->subprog_info[i].start;
11833                 WARN_ON_ONCE(env->insn_idx == 0);
11834                 ret = do_check_common(env, i);
11835                 if (ret) {
11836                         return ret;
11837                 } else if (env->log.level & BPF_LOG_LEVEL) {
11838                         verbose(env,
11839                                 "Func#%d is safe for any args that match its prototype\n",
11840                                 i);
11841                 }
11842         }
11843         return 0;
11844 }
11845
11846 static int do_check_main(struct bpf_verifier_env *env)
11847 {
11848         int ret;
11849
11850         env->insn_idx = 0;
11851         ret = do_check_common(env, 0);
11852         if (!ret)
11853                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11854         return ret;
11855 }
11856
11857
11858 static void print_verification_stats(struct bpf_verifier_env *env)
11859 {
11860         int i;
11861
11862         if (env->log.level & BPF_LOG_STATS) {
11863                 verbose(env, "verification time %lld usec\n",
11864                         div_u64(env->verification_time, 1000));
11865                 verbose(env, "stack depth ");
11866                 for (i = 0; i < env->subprog_cnt; i++) {
11867                         u32 depth = env->subprog_info[i].stack_depth;
11868
11869                         verbose(env, "%d", depth);
11870                         if (i + 1 < env->subprog_cnt)
11871                                 verbose(env, "+");
11872                 }
11873                 verbose(env, "\n");
11874         }
11875         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
11876                 "total_states %d peak_states %d mark_read %d\n",
11877                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
11878                 env->max_states_per_insn, env->total_states,
11879                 env->peak_states, env->longest_mark_read_walk);
11880 }
11881
11882 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11883 {
11884         const struct btf_type *t, *func_proto;
11885         const struct bpf_struct_ops *st_ops;
11886         const struct btf_member *member;
11887         struct bpf_prog *prog = env->prog;
11888         u32 btf_id, member_idx;
11889         const char *mname;
11890
11891         if (!prog->gpl_compatible) {
11892                 verbose(env, "struct ops programs must have a GPL compatible license\n");
11893                 return -EINVAL;
11894         }
11895
11896         btf_id = prog->aux->attach_btf_id;
11897         st_ops = bpf_struct_ops_find(btf_id);
11898         if (!st_ops) {
11899                 verbose(env, "attach_btf_id %u is not a supported struct\n",
11900                         btf_id);
11901                 return -ENOTSUPP;
11902         }
11903
11904         t = st_ops->type;
11905         member_idx = prog->expected_attach_type;
11906         if (member_idx >= btf_type_vlen(t)) {
11907                 verbose(env, "attach to invalid member idx %u of struct %s\n",
11908                         member_idx, st_ops->name);
11909                 return -EINVAL;
11910         }
11911
11912         member = &btf_type_member(t)[member_idx];
11913         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11914         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
11915                                                NULL);
11916         if (!func_proto) {
11917                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
11918                         mname, member_idx, st_ops->name);
11919                 return -EINVAL;
11920         }
11921
11922         if (st_ops->check_member) {
11923                 int err = st_ops->check_member(t, member);
11924
11925                 if (err) {
11926                         verbose(env, "attach to unsupported member %s of struct %s\n",
11927                                 mname, st_ops->name);
11928                         return err;
11929                 }
11930         }
11931
11932         prog->aux->attach_func_proto = func_proto;
11933         prog->aux->attach_func_name = mname;
11934         env->ops = st_ops->verifier_ops;
11935
11936         return 0;
11937 }
11938 #define SECURITY_PREFIX "security_"
11939
11940 static int check_attach_modify_return(unsigned long addr, const char *func_name)
11941 {
11942         if (within_error_injection_list(addr) ||
11943             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
11944                 return 0;
11945
11946         return -EINVAL;
11947 }
11948
11949 /* non exhaustive list of sleepable bpf_lsm_*() functions */
11950 BTF_SET_START(btf_sleepable_lsm_hooks)
11951 #ifdef CONFIG_BPF_LSM
11952 BTF_ID(func, bpf_lsm_bprm_committed_creds)
11953 #else
11954 BTF_ID_UNUSED
11955 #endif
11956 BTF_SET_END(btf_sleepable_lsm_hooks)
11957
11958 static int check_sleepable_lsm_hook(u32 btf_id)
11959 {
11960         return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11961 }
11962
11963 /* list of non-sleepable functions that are otherwise on
11964  * ALLOW_ERROR_INJECTION list
11965  */
11966 BTF_SET_START(btf_non_sleepable_error_inject)
11967 /* Three functions below can be called from sleepable and non-sleepable context.
11968  * Assume non-sleepable from bpf safety point of view.
11969  */
11970 BTF_ID(func, __add_to_page_cache_locked)
11971 BTF_ID(func, should_fail_alloc_page)
11972 BTF_ID(func, should_failslab)
11973 BTF_SET_END(btf_non_sleepable_error_inject)
11974
11975 static int check_non_sleepable_error_inject(u32 btf_id)
11976 {
11977         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11978 }
11979
11980 int bpf_check_attach_target(struct bpf_verifier_log *log,
11981                             const struct bpf_prog *prog,
11982                             const struct bpf_prog *tgt_prog,
11983                             u32 btf_id,
11984                             struct bpf_attach_target_info *tgt_info)
11985 {
11986         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11987         const char prefix[] = "btf_trace_";
11988         int ret = 0, subprog = -1, i;
11989         const struct btf_type *t;
11990         bool conservative = true;
11991         const char *tname;
11992         struct btf *btf;
11993         long addr = 0;
11994
11995         if (!btf_id) {
11996                 bpf_log(log, "Tracing programs must provide btf_id\n");
11997                 return -EINVAL;
11998         }
11999         btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
12000         if (!btf) {
12001                 bpf_log(log,
12002                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
12003                 return -EINVAL;
12004         }
12005         t = btf_type_by_id(btf, btf_id);
12006         if (!t) {
12007                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
12008                 return -EINVAL;
12009         }
12010         tname = btf_name_by_offset(btf, t->name_off);
12011         if (!tname) {
12012                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
12013                 return -EINVAL;
12014         }
12015         if (tgt_prog) {
12016                 struct bpf_prog_aux *aux = tgt_prog->aux;
12017
12018                 for (i = 0; i < aux->func_info_cnt; i++)
12019                         if (aux->func_info[i].type_id == btf_id) {
12020                                 subprog = i;
12021                                 break;
12022                         }
12023                 if (subprog == -1) {
12024                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
12025                         return -EINVAL;
12026                 }
12027                 conservative = aux->func_info_aux[subprog].unreliable;
12028                 if (prog_extension) {
12029                         if (conservative) {
12030                                 bpf_log(log,
12031                                         "Cannot replace static functions\n");
12032                                 return -EINVAL;
12033                         }
12034                         if (!prog->jit_requested) {
12035                                 bpf_log(log,
12036                                         "Extension programs should be JITed\n");
12037                                 return -EINVAL;
12038                         }
12039                 }
12040                 if (!tgt_prog->jited) {
12041                         bpf_log(log, "Can attach to only JITed progs\n");
12042                         return -EINVAL;
12043                 }
12044                 if (tgt_prog->type == prog->type) {
12045                         /* Cannot fentry/fexit another fentry/fexit program.
12046                          * Cannot attach program extension to another extension.
12047                          * It's ok to attach fentry/fexit to extension program.
12048                          */
12049                         bpf_log(log, "Cannot recursively attach\n");
12050                         return -EINVAL;
12051                 }
12052                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
12053                     prog_extension &&
12054                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
12055                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
12056                         /* Program extensions can extend all program types
12057                          * except fentry/fexit. The reason is the following.
12058                          * The fentry/fexit programs are used for performance
12059                          * analysis, stats and can be attached to any program
12060                          * type except themselves. When extension program is
12061                          * replacing XDP function it is necessary to allow
12062                          * performance analysis of all functions. Both original
12063                          * XDP program and its program extension. Hence
12064                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
12065                          * allowed. If extending of fentry/fexit was allowed it
12066                          * would be possible to create long call chain
12067                          * fentry->extension->fentry->extension beyond
12068                          * reasonable stack size. Hence extending fentry is not
12069                          * allowed.
12070                          */
12071                         bpf_log(log, "Cannot extend fentry/fexit\n");
12072                         return -EINVAL;
12073                 }
12074         } else {
12075                 if (prog_extension) {
12076                         bpf_log(log, "Cannot replace kernel functions\n");
12077                         return -EINVAL;
12078                 }
12079         }
12080
12081         switch (prog->expected_attach_type) {
12082         case BPF_TRACE_RAW_TP:
12083                 if (tgt_prog) {
12084                         bpf_log(log,
12085                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
12086                         return -EINVAL;
12087                 }
12088                 if (!btf_type_is_typedef(t)) {
12089                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
12090                                 btf_id);
12091                         return -EINVAL;
12092                 }
12093                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
12094                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
12095                                 btf_id, tname);
12096                         return -EINVAL;
12097                 }
12098                 tname += sizeof(prefix) - 1;
12099                 t = btf_type_by_id(btf, t->type);
12100                 if (!btf_type_is_ptr(t))
12101                         /* should never happen in valid vmlinux build */
12102                         return -EINVAL;
12103                 t = btf_type_by_id(btf, t->type);
12104                 if (!btf_type_is_func_proto(t))
12105                         /* should never happen in valid vmlinux build */
12106                         return -EINVAL;
12107
12108                 break;
12109         case BPF_TRACE_ITER:
12110                 if (!btf_type_is_func(t)) {
12111                         bpf_log(log, "attach_btf_id %u is not a function\n",
12112                                 btf_id);
12113                         return -EINVAL;
12114                 }
12115                 t = btf_type_by_id(btf, t->type);
12116                 if (!btf_type_is_func_proto(t))
12117                         return -EINVAL;
12118                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12119                 if (ret)
12120                         return ret;
12121                 break;
12122         default:
12123                 if (!prog_extension)
12124                         return -EINVAL;
12125                 fallthrough;
12126         case BPF_MODIFY_RETURN:
12127         case BPF_LSM_MAC:
12128         case BPF_TRACE_FENTRY:
12129         case BPF_TRACE_FEXIT:
12130                 if (!btf_type_is_func(t)) {
12131                         bpf_log(log, "attach_btf_id %u is not a function\n",
12132                                 btf_id);
12133                         return -EINVAL;
12134                 }
12135                 if (prog_extension &&
12136                     btf_check_type_match(log, prog, btf, t))
12137                         return -EINVAL;
12138                 t = btf_type_by_id(btf, t->type);
12139                 if (!btf_type_is_func_proto(t))
12140                         return -EINVAL;
12141
12142                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
12143                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
12144                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
12145                         return -EINVAL;
12146
12147                 if (tgt_prog && conservative)
12148                         t = NULL;
12149
12150                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12151                 if (ret < 0)
12152                         return ret;
12153
12154                 if (tgt_prog) {
12155                         if (subprog == 0)
12156                                 addr = (long) tgt_prog->bpf_func;
12157                         else
12158                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
12159                 } else {
12160                         addr = kallsyms_lookup_name(tname);
12161                         if (!addr) {
12162                                 bpf_log(log,
12163                                         "The address of function %s cannot be found\n",
12164                                         tname);
12165                                 return -ENOENT;
12166                         }
12167                 }
12168
12169                 if (prog->aux->sleepable) {
12170                         ret = -EINVAL;
12171                         switch (prog->type) {
12172                         case BPF_PROG_TYPE_TRACING:
12173                                 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12174                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12175                                  */
12176                                 if (!check_non_sleepable_error_inject(btf_id) &&
12177                                     within_error_injection_list(addr))
12178                                         ret = 0;
12179                                 break;
12180                         case BPF_PROG_TYPE_LSM:
12181                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12182                                  * Only some of them are sleepable.
12183                                  */
12184                                 if (check_sleepable_lsm_hook(btf_id))
12185                                         ret = 0;
12186                                 break;
12187                         default:
12188                                 break;
12189                         }
12190                         if (ret) {
12191                                 bpf_log(log, "%s is not sleepable\n", tname);
12192                                 return ret;
12193                         }
12194                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12195                         if (tgt_prog) {
12196                                 bpf_log(log, "can't modify return codes of BPF programs\n");
12197                                 return -EINVAL;
12198                         }
12199                         ret = check_attach_modify_return(addr, tname);
12200                         if (ret) {
12201                                 bpf_log(log, "%s() is not modifiable\n", tname);
12202                                 return ret;
12203                         }
12204                 }
12205
12206                 break;
12207         }
12208         tgt_info->tgt_addr = addr;
12209         tgt_info->tgt_name = tname;
12210         tgt_info->tgt_type = t;
12211         return 0;
12212 }
12213
12214 static int check_attach_btf_id(struct bpf_verifier_env *env)
12215 {
12216         struct bpf_prog *prog = env->prog;
12217         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12218         struct bpf_attach_target_info tgt_info = {};
12219         u32 btf_id = prog->aux->attach_btf_id;
12220         struct bpf_trampoline *tr;
12221         int ret;
12222         u64 key;
12223
12224         if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
12225             prog->type != BPF_PROG_TYPE_LSM) {
12226                 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12227                 return -EINVAL;
12228         }
12229
12230         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
12231                 return check_struct_ops_btf_id(env);
12232
12233         if (prog->type != BPF_PROG_TYPE_TRACING &&
12234             prog->type != BPF_PROG_TYPE_LSM &&
12235             prog->type != BPF_PROG_TYPE_EXT)
12236                 return 0;
12237
12238         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12239         if (ret)
12240                 return ret;
12241
12242         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12243                 /* to make freplace equivalent to their targets, they need to
12244                  * inherit env->ops and expected_attach_type for the rest of the
12245                  * verification
12246                  */
12247                 env->ops = bpf_verifier_ops[tgt_prog->type];
12248                 prog->expected_attach_type = tgt_prog->expected_attach_type;
12249         }
12250
12251         /* store info about the attachment target that will be used later */
12252         prog->aux->attach_func_proto = tgt_info.tgt_type;
12253         prog->aux->attach_func_name = tgt_info.tgt_name;
12254
12255         if (tgt_prog) {
12256                 prog->aux->saved_dst_prog_type = tgt_prog->type;
12257                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12258         }
12259
12260         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12261                 prog->aux->attach_btf_trace = true;
12262                 return 0;
12263         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12264                 if (!bpf_iter_prog_supported(prog))
12265                         return -EINVAL;
12266                 return 0;
12267         }
12268
12269         if (prog->type == BPF_PROG_TYPE_LSM) {
12270                 ret = bpf_lsm_verify_prog(&env->log, prog);
12271                 if (ret < 0)
12272                         return ret;
12273         }
12274
12275         key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12276         tr = bpf_trampoline_get(key, &tgt_info);
12277         if (!tr)
12278                 return -ENOMEM;
12279
12280         prog->aux->dst_trampoline = tr;
12281         return 0;
12282 }
12283
12284 struct btf *bpf_get_btf_vmlinux(void)
12285 {
12286         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12287                 mutex_lock(&bpf_verifier_lock);
12288                 if (!btf_vmlinux)
12289                         btf_vmlinux = btf_parse_vmlinux();
12290                 mutex_unlock(&bpf_verifier_lock);
12291         }
12292         return btf_vmlinux;
12293 }
12294
12295 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
12296               union bpf_attr __user *uattr)
12297 {
12298         u64 start_time = ktime_get_ns();
12299         struct bpf_verifier_env *env;
12300         struct bpf_verifier_log *log;
12301         int i, len, ret = -EINVAL;
12302         bool is_priv;
12303
12304         /* no program is valid */
12305         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
12306                 return -EINVAL;
12307
12308         /* 'struct bpf_verifier_env' can be global, but since it's not small,
12309          * allocate/free it every time bpf_check() is called
12310          */
12311         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12312         if (!env)
12313                 return -ENOMEM;
12314         log = &env->log;
12315
12316         len = (*prog)->len;
12317         env->insn_aux_data =
12318                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12319         ret = -ENOMEM;
12320         if (!env->insn_aux_data)
12321                 goto err_free_env;
12322         for (i = 0; i < len; i++)
12323                 env->insn_aux_data[i].orig_idx = i;
12324         env->prog = *prog;
12325         env->ops = bpf_verifier_ops[env->prog->type];
12326         is_priv = bpf_capable();
12327
12328         bpf_get_btf_vmlinux();
12329
12330         /* grab the mutex to protect few globals used by verifier */
12331         if (!is_priv)
12332                 mutex_lock(&bpf_verifier_lock);
12333
12334         if (attr->log_level || attr->log_buf || attr->log_size) {
12335                 /* user requested verbose verifier output
12336                  * and supplied buffer to store the verification trace
12337                  */
12338                 log->level = attr->log_level;
12339                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
12340                 log->len_total = attr->log_size;
12341
12342                 ret = -EINVAL;
12343                 /* log attributes have to be sane */
12344                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
12345                     !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
12346                         goto err_unlock;
12347         }
12348
12349         if (IS_ERR(btf_vmlinux)) {
12350                 /* Either gcc or pahole or kernel are broken. */
12351                 verbose(env, "in-kernel BTF is malformed\n");
12352                 ret = PTR_ERR(btf_vmlinux);
12353                 goto skip_full_check;
12354         }
12355
12356         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12357         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
12358                 env->strict_alignment = true;
12359         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
12360                 env->strict_alignment = false;
12361
12362         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12363         env->allow_uninit_stack = bpf_allow_uninit_stack();
12364         env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12365         env->bypass_spec_v1 = bpf_bypass_spec_v1();
12366         env->bypass_spec_v4 = bpf_bypass_spec_v4();
12367         env->bpf_capable = bpf_capable();
12368
12369         if (is_priv)
12370                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12371
12372         env->explored_states = kvcalloc(state_htab_size(env),
12373                                        sizeof(struct bpf_verifier_state_list *),
12374                                        GFP_USER);
12375         ret = -ENOMEM;
12376         if (!env->explored_states)
12377                 goto skip_full_check;
12378
12379         ret = check_subprogs(env);
12380         if (ret < 0)
12381                 goto skip_full_check;
12382
12383         ret = check_btf_info(env, attr, uattr);
12384         if (ret < 0)
12385                 goto skip_full_check;
12386
12387         ret = check_attach_btf_id(env);
12388         if (ret)
12389                 goto skip_full_check;
12390
12391         ret = resolve_pseudo_ldimm64(env);
12392         if (ret < 0)
12393                 goto skip_full_check;
12394
12395         if (bpf_prog_is_dev_bound(env->prog->aux)) {
12396                 ret = bpf_prog_offload_verifier_prep(env->prog);
12397                 if (ret)
12398                         goto skip_full_check;
12399         }
12400
12401         ret = check_cfg(env);
12402         if (ret < 0)
12403                 goto skip_full_check;
12404
12405         ret = do_check_subprogs(env);
12406         ret = ret ?: do_check_main(env);
12407
12408         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
12409                 ret = bpf_prog_offload_finalize(env);
12410
12411 skip_full_check:
12412         kvfree(env->explored_states);
12413
12414         if (ret == 0)
12415                 ret = check_max_stack_depth(env);
12416
12417         /* instruction rewrites happen after this point */
12418         if (is_priv) {
12419                 if (ret == 0)
12420                         opt_hard_wire_dead_code_branches(env);
12421                 if (ret == 0)
12422                         ret = opt_remove_dead_code(env);
12423                 if (ret == 0)
12424                         ret = opt_remove_nops(env);
12425         } else {
12426                 if (ret == 0)
12427                         sanitize_dead_code(env);
12428         }
12429
12430         if (ret == 0)
12431                 /* program is valid, convert *(u32*)(ctx + off) accesses */
12432                 ret = convert_ctx_accesses(env);
12433
12434         if (ret == 0)
12435                 ret = fixup_bpf_calls(env);
12436
12437         /* do 32-bit optimization after insn patching has done so those patched
12438          * insns could be handled correctly.
12439          */
12440         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12441                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12442                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
12443                                                                      : false;
12444         }
12445
12446         if (ret == 0)
12447                 ret = fixup_call_args(env);
12448
12449         env->verification_time = ktime_get_ns() - start_time;
12450         print_verification_stats(env);
12451
12452         if (log->level && bpf_verifier_log_full(log))
12453                 ret = -ENOSPC;
12454         if (log->level && !log->ubuf) {
12455                 ret = -EFAULT;
12456                 goto err_release_maps;
12457         }
12458
12459         if (ret == 0 && env->used_map_cnt) {
12460                 /* if program passed verifier, update used_maps in bpf_prog_info */
12461                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
12462                                                           sizeof(env->used_maps[0]),
12463                                                           GFP_KERNEL);
12464
12465                 if (!env->prog->aux->used_maps) {
12466                         ret = -ENOMEM;
12467                         goto err_release_maps;
12468                 }
12469
12470                 memcpy(env->prog->aux->used_maps, env->used_maps,
12471                        sizeof(env->used_maps[0]) * env->used_map_cnt);
12472                 env->prog->aux->used_map_cnt = env->used_map_cnt;
12473
12474                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
12475                  * bpf_ld_imm64 instructions
12476                  */
12477                 convert_pseudo_ld_imm64(env);
12478         }
12479
12480         if (ret == 0)
12481                 adjust_btf_func(env);
12482
12483 err_release_maps:
12484         if (!env->prog->aux->used_maps)
12485                 /* if we didn't copy map pointers into bpf_prog_info, release
12486                  * them now. Otherwise free_used_maps() will release them.
12487                  */
12488                 release_maps(env);
12489
12490         /* extension progs temporarily inherit the attach_type of their targets
12491            for verification purposes, so set it back to zero before returning
12492          */
12493         if (env->prog->type == BPF_PROG_TYPE_EXT)
12494                 env->prog->expected_attach_type = 0;
12495
12496         *prog = env->prog;
12497 err_unlock:
12498         if (!is_priv)
12499                 mutex_unlock(&bpf_verifier_lock);
12500         vfree(env->insn_aux_data);
12501 err_free_env:
12502         kfree(env);
12503         return ret;
12504 }