linux/kernel/bpf/verifier.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 * Copyright (c) 2016 Facebook
   4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5 */
   6#include <uapi/linux/btf.h>
   7#include <linux/kernel.h>
   8#include <linux/types.h>
   9#include <linux/slab.h>
  10#include <linux/bpf.h>
  11#include <linux/btf.h>
  12#include <linux/bpf_verifier.h>
  13#include <linux/filter.h>
  14#include <net/netlink.h>
  15#include <linux/file.h>
  16#include <linux/vmalloc.h>
  17#include <linux/stringify.h>
  18#include <linux/bsearch.h>
  19#include <linux/sort.h>
  20#include <linux/perf_event.h>
  21#include <linux/ctype.h>
  22#include <linux/error-injection.h>
  23#include <linux/bpf_lsm.h>
  24#include <linux/btf_ids.h>
  25
  26#include "disasm.h"
  27
  28static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  29#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  30        [_id] = & _name ## _verifier_ops,
  31#define BPF_MAP_TYPE(_id, _ops)
  32#define BPF_LINK_TYPE(_id, _name)
  33#include <linux/bpf_types.h>
  34#undef BPF_PROG_TYPE
  35#undef BPF_MAP_TYPE
  36#undef BPF_LINK_TYPE
  37};
  38
  39/* bpf_check() is a static code analyzer that walks eBPF program
  40 * instruction by instruction and updates register/stack state.
  41 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  42 *
  43 * The first pass is depth-first-search to check that the program is a DAG.
  44 * It rejects the following programs:
  45 * - larger than BPF_MAXINSNS insns
  46 * - if loop is present (detected via back-edge)
  47 * - unreachable insns exist (shouldn't be a forest. program = one function)
  48 * - out of bounds or malformed jumps
  49 * The second pass is all possible path descent from the 1st insn.
  50 * Since it's analyzing all paths through the program, the length of the
  51 * analysis is limited to 64k insn, which may be hit even if total number of
  52 * insn is less then 4K, but there are too many branches that change stack/regs.
  53 * Number of 'branches to be analyzed' is limited to 1k
  54 *
  55 * On entry to each instruction, each register has a type, and the instruction
  56 * changes the types of the registers depending on instruction semantics.
  57 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  58 * copied to R1.
  59 *
  60 * All registers are 64-bit.
  61 * R0 - return register
  62 * R1-R5 argument passing registers
  63 * R6-R9 callee saved registers
  64 * R10 - frame pointer read-only
  65 *
  66 * At the start of BPF program the register R1 contains a pointer to bpf_context
  67 * and has type PTR_TO_CTX.
  68 *
  69 * Verifier tracks arithmetic operations on pointers in case:
  70 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  71 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  72 * 1st insn copies R10 (which has FRAME_PTR) type into R1
  73 * and 2nd arithmetic instruction is pattern matched to recognize
  74 * that it wants to construct a pointer to some element within stack.
  75 * So after 2nd insn, the register R1 has type PTR_TO_STACK
  76 * (and -20 constant is saved for further stack bounds checking).
  77 * Meaning that this reg is a pointer to stack plus known immediate constant.
  78 *
  79 * Most of the time the registers have SCALAR_VALUE type, which
  80 * means the register has some value, but it's not a valid pointer.
  81 * (like pointer plus pointer becomes SCALAR_VALUE type)
  82 *
  83 * When verifier sees load or store instructions the type of base register
  84 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  85 * four pointer types recognized by check_mem_access() function.
  86 *
  87 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  88 * and the range of [ptr, ptr + map's value_size) is accessible.
  89 *
  90 * registers used to pass values to function calls are checked against
  91 * function argument constraints.
  92 *
  93 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  94 * It means that the register type passed to this function must be
  95 * PTR_TO_STACK and it will be used inside the function as
  96 * 'pointer to map element key'
  97 *
  98 * For example the argument constraints for bpf_map_lookup_elem():
  99 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 100 *   .arg1_type = ARG_CONST_MAP_PTR,
 101 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 102 *
 103 * ret_type says that this function returns 'pointer to map elem value or null'
 104 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 105 * 2nd argument should be a pointer to stack, which will be used inside
 106 * the helper function as a pointer to map element key.
 107 *
 108 * On the kernel side the helper function looks like:
 109 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 110 * {
 111 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 112 *    void *key = (void *) (unsigned long) r2;
 113 *    void *value;
 114 *
 115 *    here kernel can access 'key' and 'map' pointers safely, knowing that
 116 *    [key, key + map->key_size) bytes are valid and were initialized on
 117 *    the stack of eBPF program.
 118 * }
 119 *
 120 * Corresponding eBPF program may look like:
 121 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 122 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 123 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 124 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 125 * here verifier looks at prototype of map_lookup_elem() and sees:
 126 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 127 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 128 *
 129 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 130 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 131 * and were initialized prior to this call.
 132 * If it's ok, then verifier allows this BPF_CALL insn and looks at
 133 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 134 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 135 * returns either pointer to map value or NULL.
 136 *
 137 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 138 * insn, the register holding that pointer in the true branch changes state to
 139 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 140 * branch. See check_cond_jmp_op().
 141 *
 142 * After the call R0 is set to return type of the function and registers R1-R5
 143 * are set to NOT_INIT to indicate that they are no longer readable.
 144 *
 145 * The following reference types represent a potential reference to a kernel
 146 * resource which, after first being allocated, must be checked and freed by
 147 * the BPF program:
 148 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 149 *
 150 * When the verifier sees a helper call return a reference type, it allocates a
 151 * pointer id for the reference and stores it in the current function state.
 152 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 153 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 154 * passes through a NULL-check conditional. For the branch wherein the state is
 155 * changed to CONST_IMM, the verifier releases the reference.
 156 *
 157 * For each helper function that allocates a reference, such as
 158 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 159 * bpf_sk_release(). When a reference type passes into the release function,
 160 * the verifier also releases the reference. If any unchecked or unreleased
 161 * reference remains at the end of the program, the verifier rejects it.
 162 */
 163
 164/* verifier_state + insn_idx are pushed to stack when branch is encountered */
 165struct bpf_verifier_stack_elem {
 166        /* verifer state is 'st'
 167         * before processing instruction 'insn_idx'
 168         * and after processing instruction 'prev_insn_idx'
 169         */
 170        struct bpf_verifier_state st;
 171        int insn_idx;
 172        int prev_insn_idx;
 173        struct bpf_verifier_stack_elem *next;
 174        /* length of verifier log at the time this state was pushed on stack */
 175        u32 log_pos;
 176};
 177
 178#define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 179#define BPF_COMPLEXITY_LIMIT_STATES     64
 180
 181#define BPF_MAP_KEY_POISON      (1ULL << 63)
 182#define BPF_MAP_KEY_SEEN        (1ULL << 62)
 183
 184#define BPF_MAP_PTR_UNPRIV      1UL
 185#define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 186                                          POISON_POINTER_DELTA))
 187#define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 188
 189static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 190{
 191        return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 192}
 193
 194static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 195{
 196        return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 197}
 198
 199static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 200                              const struct bpf_map *map, bool unpriv)
 201{
 202        BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 203        unpriv |= bpf_map_ptr_unpriv(aux);
 204        aux->map_ptr_state = (unsigned long)map |
 205                             (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 206}
 207
 208static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 209{
 210        return aux->map_key_state & BPF_MAP_KEY_POISON;
 211}
 212
 213static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 214{
 215        return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 216}
 217
 218static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 219{
 220        return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 221}
 222
 223static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 224{
 225        bool poisoned = bpf_map_key_poisoned(aux);
 226
 227        aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 228                             (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 229}
 230
 231static bool bpf_pseudo_call(const struct bpf_insn *insn)
 232{
 233        return insn->code == (BPF_JMP | BPF_CALL) &&
 234               insn->src_reg == BPF_PSEUDO_CALL;
 235}
 236
 237static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 238{
 239        return insn->code == (BPF_JMP | BPF_CALL) &&
 240               insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 241}
 242
 243struct bpf_call_arg_meta {
 244        struct bpf_map *map_ptr;
 245        bool raw_mode;
 246        bool pkt_access;
 247        int regno;
 248        int access_size;
 249        int mem_size;
 250        u64 msize_max_value;
 251        int ref_obj_id;
 252        int map_uid;
 253        int func_id;
 254        struct btf *btf;
 255        u32 btf_id;
 256        struct btf *ret_btf;
 257        u32 ret_btf_id;
 258        u32 subprogno;
 259};
 260
 261struct btf *btf_vmlinux;
 262
 263static DEFINE_MUTEX(bpf_verifier_lock);
 264
 265static const struct bpf_line_info *
 266find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 267{
 268        const struct bpf_line_info *linfo;
 269        const struct bpf_prog *prog;
 270        u32 i, nr_linfo;
 271
 272        prog = env->prog;
 273        nr_linfo = prog->aux->nr_linfo;
 274
 275        if (!nr_linfo || insn_off >= prog->len)
 276                return NULL;
 277
 278        linfo = prog->aux->linfo;
 279        for (i = 1; i < nr_linfo; i++)
 280                if (insn_off < linfo[i].insn_off)
 281                        break;
 282
 283        return &linfo[i - 1];
 284}
 285
 286void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 287                       va_list args)
 288{
 289        unsigned int n;
 290
 291        n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 292
 293        WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 294                  "verifier log line truncated - local buffer too short\n");
 295
 296        n = min(log->len_total - log->len_used - 1, n);
 297        log->kbuf[n] = '\0';
 298
 299        if (log->level == BPF_LOG_KERNEL) {
 300                pr_err("BPF:%s\n", log->kbuf);
 301                return;
 302        }
 303        if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 304                log->len_used += n;
 305        else
 306                log->ubuf = NULL;
 307}
 308
 309static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 310{
 311        char zero = 0;
 312
 313        if (!bpf_verifier_log_needed(log))
 314                return;
 315
 316        log->len_used = new_pos;
 317        if (put_user(zero, log->ubuf + new_pos))
 318                log->ubuf = NULL;
 319}
 320
 321/* log_level controls verbosity level of eBPF verifier.
 322 * bpf_verifier_log_write() is used to dump the verification trace to the log,
 323 * so the user can figure out what's wrong with the program
 324 */
 325__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 326                                           const char *fmt, ...)
 327{
 328        va_list args;
 329
 330        if (!bpf_verifier_log_needed(&env->log))
 331                return;
 332
 333        va_start(args, fmt);
 334        bpf_verifier_vlog(&env->log, fmt, args);
 335        va_end(args);
 336}
 337EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 338
 339__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 340{
 341        struct bpf_verifier_env *env = private_data;
 342        va_list args;
 343
 344        if (!bpf_verifier_log_needed(&env->log))
 345                return;
 346
 347        va_start(args, fmt);
 348        bpf_verifier_vlog(&env->log, fmt, args);
 349        va_end(args);
 350}
 351
 352__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 353                            const char *fmt, ...)
 354{
 355        va_list args;
 356
 357        if (!bpf_verifier_log_needed(log))
 358                return;
 359
 360        va_start(args, fmt);
 361        bpf_verifier_vlog(log, fmt, args);
 362        va_end(args);
 363}
 364
 365static const char *ltrim(const char *s)
 366{
 367        while (isspace(*s))
 368                s++;
 369
 370        return s;
 371}
 372
 373__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 374                                         u32 insn_off,
 375                                         const char *prefix_fmt, ...)
 376{
 377        const struct bpf_line_info *linfo;
 378
 379        if (!bpf_verifier_log_needed(&env->log))
 380                return;
 381
 382        linfo = find_linfo(env, insn_off);
 383        if (!linfo || linfo == env->prev_linfo)
 384                return;
 385
 386        if (prefix_fmt) {
 387                va_list args;
 388
 389                va_start(args, prefix_fmt);
 390                bpf_verifier_vlog(&env->log, prefix_fmt, args);
 391                va_end(args);
 392        }
 393
 394        verbose(env, "%s\n",
 395                ltrim(btf_name_by_offset(env->prog->aux->btf,
 396                                         linfo->line_off)));
 397
 398        env->prev_linfo = linfo;
 399}
 400
 401static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 402                                   struct bpf_reg_state *reg,
 403                                   struct tnum *range, const char *ctx,
 404                                   const char *reg_name)
 405{
 406        char tn_buf[48];
 407
 408        verbose(env, "At %s the register %s ", ctx, reg_name);
 409        if (!tnum_is_unknown(reg->var_off)) {
 410                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 411                verbose(env, "has value %s", tn_buf);
 412        } else {
 413                verbose(env, "has unknown scalar value");
 414        }
 415        tnum_strn(tn_buf, sizeof(tn_buf), *range);
 416        verbose(env, " should have been in %s\n", tn_buf);
 417}
 418
 419static bool type_is_pkt_pointer(enum bpf_reg_type type)
 420{
 421        return type == PTR_TO_PACKET ||
 422               type == PTR_TO_PACKET_META;
 423}
 424
 425static bool type_is_sk_pointer(enum bpf_reg_type type)
 426{
 427        return type == PTR_TO_SOCKET ||
 428                type == PTR_TO_SOCK_COMMON ||
 429                type == PTR_TO_TCP_SOCK ||
 430                type == PTR_TO_XDP_SOCK;
 431}
 432
 433static bool reg_type_not_null(enum bpf_reg_type type)
 434{
 435        return type == PTR_TO_SOCKET ||
 436                type == PTR_TO_TCP_SOCK ||
 437                type == PTR_TO_MAP_VALUE ||
 438                type == PTR_TO_MAP_KEY ||
 439                type == PTR_TO_SOCK_COMMON;
 440}
 441
 442static bool reg_type_may_be_null(enum bpf_reg_type type)
 443{
 444        return type == PTR_TO_MAP_VALUE_OR_NULL ||
 445               type == PTR_TO_SOCKET_OR_NULL ||
 446               type == PTR_TO_SOCK_COMMON_OR_NULL ||
 447               type == PTR_TO_TCP_SOCK_OR_NULL ||
 448               type == PTR_TO_BTF_ID_OR_NULL ||
 449               type == PTR_TO_MEM_OR_NULL ||
 450               type == PTR_TO_RDONLY_BUF_OR_NULL ||
 451               type == PTR_TO_RDWR_BUF_OR_NULL;
 452}
 453
 454static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 455{
 456        return reg->type == PTR_TO_MAP_VALUE &&
 457                map_value_has_spin_lock(reg->map_ptr);
 458}
 459
 460static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 461{
 462        return type == PTR_TO_SOCKET ||
 463                type == PTR_TO_SOCKET_OR_NULL ||
 464                type == PTR_TO_TCP_SOCK ||
 465                type == PTR_TO_TCP_SOCK_OR_NULL ||
 466                type == PTR_TO_MEM ||
 467                type == PTR_TO_MEM_OR_NULL;
 468}
 469
 470static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 471{
 472        return type == ARG_PTR_TO_SOCK_COMMON;
 473}
 474
 475static bool arg_type_may_be_null(enum bpf_arg_type type)
 476{
 477        return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
 478               type == ARG_PTR_TO_MEM_OR_NULL ||
 479               type == ARG_PTR_TO_CTX_OR_NULL ||
 480               type == ARG_PTR_TO_SOCKET_OR_NULL ||
 481               type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
 482               type == ARG_PTR_TO_STACK_OR_NULL;
 483}
 484
 485/* Determine whether the function releases some resources allocated by another
 486 * function call. The first reference type argument will be assumed to be
 487 * released by release_reference().
 488 */
 489static bool is_release_function(enum bpf_func_id func_id)
 490{
 491        return func_id == BPF_FUNC_sk_release ||
 492               func_id == BPF_FUNC_ringbuf_submit ||
 493               func_id == BPF_FUNC_ringbuf_discard;
 494}
 495
 496static bool may_be_acquire_function(enum bpf_func_id func_id)
 497{
 498        return func_id == BPF_FUNC_sk_lookup_tcp ||
 499                func_id == BPF_FUNC_sk_lookup_udp ||
 500                func_id == BPF_FUNC_skc_lookup_tcp ||
 501                func_id == BPF_FUNC_map_lookup_elem ||
 502                func_id == BPF_FUNC_ringbuf_reserve;
 503}
 504
 505static bool is_acquire_function(enum bpf_func_id func_id,
 506                                const struct bpf_map *map)
 507{
 508        enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 509
 510        if (func_id == BPF_FUNC_sk_lookup_tcp ||
 511            func_id == BPF_FUNC_sk_lookup_udp ||
 512            func_id == BPF_FUNC_skc_lookup_tcp ||
 513            func_id == BPF_FUNC_ringbuf_reserve)
 514                return true;
 515
 516        if (func_id == BPF_FUNC_map_lookup_elem &&
 517            (map_type == BPF_MAP_TYPE_SOCKMAP ||
 518             map_type == BPF_MAP_TYPE_SOCKHASH))
 519                return true;
 520
 521        return false;
 522}
 523
 524static bool is_ptr_cast_function(enum bpf_func_id func_id)
 525{
 526        return func_id == BPF_FUNC_tcp_sock ||
 527                func_id == BPF_FUNC_sk_fullsock ||
 528                func_id == BPF_FUNC_skc_to_tcp_sock ||
 529                func_id == BPF_FUNC_skc_to_tcp6_sock ||
 530                func_id == BPF_FUNC_skc_to_udp6_sock ||
 531                func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 532                func_id == BPF_FUNC_skc_to_tcp_request_sock;
 533}
 534
 535static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 536{
 537        return BPF_CLASS(insn->code) == BPF_STX &&
 538               BPF_MODE(insn->code) == BPF_ATOMIC &&
 539               insn->imm == BPF_CMPXCHG;
 540}
 541
 542/* string representation of 'enum bpf_reg_type' */
 543static const char * const reg_type_str[] = {
 544        [NOT_INIT]              = "?",
 545        [SCALAR_VALUE]          = "inv",
 546        [PTR_TO_CTX]            = "ctx",
 547        [CONST_PTR_TO_MAP]      = "map_ptr",
 548        [PTR_TO_MAP_VALUE]      = "map_value",
 549        [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 550        [PTR_TO_STACK]          = "fp",
 551        [PTR_TO_PACKET]         = "pkt",
 552        [PTR_TO_PACKET_META]    = "pkt_meta",
 553        [PTR_TO_PACKET_END]     = "pkt_end",
 554        [PTR_TO_FLOW_KEYS]      = "flow_keys",
 555        [PTR_TO_SOCKET]         = "sock",
 556        [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 557        [PTR_TO_SOCK_COMMON]    = "sock_common",
 558        [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 559        [PTR_TO_TCP_SOCK]       = "tcp_sock",
 560        [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 561        [PTR_TO_TP_BUFFER]      = "tp_buffer",
 562        [PTR_TO_XDP_SOCK]       = "xdp_sock",
 563        [PTR_TO_BTF_ID]         = "ptr_",
 564        [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
 565        [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
 566        [PTR_TO_MEM]            = "mem",
 567        [PTR_TO_MEM_OR_NULL]    = "mem_or_null",
 568        [PTR_TO_RDONLY_BUF]     = "rdonly_buf",
 569        [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 570        [PTR_TO_RDWR_BUF]       = "rdwr_buf",
 571        [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
 572        [PTR_TO_FUNC]           = "func",
 573        [PTR_TO_MAP_KEY]        = "map_key",
 574};
 575
 576static char slot_type_char[] = {
 577        [STACK_INVALID] = '?',
 578        [STACK_SPILL]   = 'r',
 579        [STACK_MISC]    = 'm',
 580        [STACK_ZERO]    = '0',
 581};
 582
 583static void print_liveness(struct bpf_verifier_env *env,
 584                           enum bpf_reg_liveness live)
 585{
 586        if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 587            verbose(env, "_");
 588        if (live & REG_LIVE_READ)
 589                verbose(env, "r");
 590        if (live & REG_LIVE_WRITTEN)
 591                verbose(env, "w");
 592        if (live & REG_LIVE_DONE)
 593                verbose(env, "D");
 594}
 595
 596static struct bpf_func_state *func(struct bpf_verifier_env *env,
 597                                   const struct bpf_reg_state *reg)
 598{
 599        struct bpf_verifier_state *cur = env->cur_state;
 600
 601        return cur->frame[reg->frameno];
 602}
 603
 604static const char *kernel_type_name(const struct btf* btf, u32 id)
 605{
 606        return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 607}
 608
 609/* The reg state of a pointer or a bounded scalar was saved when
 610 * it was spilled to the stack.
 611 */
 612static bool is_spilled_reg(const struct bpf_stack_state *stack)
 613{
 614        return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 615}
 616
 617static void scrub_spilled_slot(u8 *stype)
 618{
 619        if (*stype != STACK_INVALID)
 620                *stype = STACK_MISC;
 621}
 622
 623static void print_verifier_state(struct bpf_verifier_env *env,
 624                                 const struct bpf_func_state *state)
 625{
 626        const struct bpf_reg_state *reg;
 627        enum bpf_reg_type t;
 628        int i;
 629
 630        if (state->frameno)
 631                verbose(env, " frame%d:", state->frameno);
 632        for (i = 0; i < MAX_BPF_REG; i++) {
 633                reg = &state->regs[i];
 634                t = reg->type;
 635                if (t == NOT_INIT)
 636                        continue;
 637                verbose(env, " R%d", i);
 638                print_liveness(env, reg->live);
 639                verbose(env, "=%s", reg_type_str[t]);
 640                if (t == SCALAR_VALUE && reg->precise)
 641                        verbose(env, "P");
 642                if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 643                    tnum_is_const(reg->var_off)) {
 644                        /* reg->off should be 0 for SCALAR_VALUE */
 645                        verbose(env, "%lld", reg->var_off.value + reg->off);
 646                } else {
 647                        if (t == PTR_TO_BTF_ID ||
 648                            t == PTR_TO_BTF_ID_OR_NULL ||
 649                            t == PTR_TO_PERCPU_BTF_ID)
 650                                verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
 651                        verbose(env, "(id=%d", reg->id);
 652                        if (reg_type_may_be_refcounted_or_null(t))
 653                                verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 654                        if (t != SCALAR_VALUE)
 655                                verbose(env, ",off=%d", reg->off);
 656                        if (type_is_pkt_pointer(t))
 657                                verbose(env, ",r=%d", reg->range);
 658                        else if (t == CONST_PTR_TO_MAP ||
 659                                 t == PTR_TO_MAP_KEY ||
 660                                 t == PTR_TO_MAP_VALUE ||
 661                                 t == PTR_TO_MAP_VALUE_OR_NULL)
 662                                verbose(env, ",ks=%d,vs=%d",
 663                                        reg->map_ptr->key_size,
 664                                        reg->map_ptr->value_size);
 665                        if (tnum_is_const(reg->var_off)) {
 666                                /* Typically an immediate SCALAR_VALUE, but
 667                                 * could be a pointer whose offset is too big
 668                                 * for reg->off
 669                                 */
 670                                verbose(env, ",imm=%llx", reg->var_off.value);
 671                        } else {
 672                                if (reg->smin_value != reg->umin_value &&
 673                                    reg->smin_value != S64_MIN)
 674                                        verbose(env, ",smin_value=%lld",
 675                                                (long long)reg->smin_value);
 676                                if (reg->smax_value != reg->umax_value &&
 677                                    reg->smax_value != S64_MAX)
 678                                        verbose(env, ",smax_value=%lld",
 679                                                (long long)reg->smax_value);
 680                                if (reg->umin_value != 0)
 681                                        verbose(env, ",umin_value=%llu",
 682                                                (unsigned long long)reg->umin_value);
 683                                if (reg->umax_value != U64_MAX)
 684                                        verbose(env, ",umax_value=%llu",
 685                                                (unsigned long long)reg->umax_value);
 686                                if (!tnum_is_unknown(reg->var_off)) {
 687                                        char tn_buf[48];
 688
 689                                        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 690                                        verbose(env, ",var_off=%s", tn_buf);
 691                                }
 692                                if (reg->s32_min_value != reg->smin_value &&
 693                                    reg->s32_min_value != S32_MIN)
 694                                        verbose(env, ",s32_min_value=%d",
 695                                                (int)(reg->s32_min_value));
 696                                if (reg->s32_max_value != reg->smax_value &&
 697                                    reg->s32_max_value != S32_MAX)
 698                                        verbose(env, ",s32_max_value=%d",
 699                                                (int)(reg->s32_max_value));
 700                                if (reg->u32_min_value != reg->umin_value &&
 701                                    reg->u32_min_value != U32_MIN)
 702                                        verbose(env, ",u32_min_value=%d",
 703                                                (int)(reg->u32_min_value));
 704                                if (reg->u32_max_value != reg->umax_value &&
 705                                    reg->u32_max_value != U32_MAX)
 706                                        verbose(env, ",u32_max_value=%d",
 707                                                (int)(reg->u32_max_value));
 708                        }
 709                        verbose(env, ")");
 710                }
 711        }
 712        for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 713                char types_buf[BPF_REG_SIZE + 1];
 714                bool valid = false;
 715                int j;
 716
 717                for (j = 0; j < BPF_REG_SIZE; j++) {
 718                        if (state->stack[i].slot_type[j] != STACK_INVALID)
 719                                valid = true;
 720                        types_buf[j] = slot_type_char[
 721                                        state->stack[i].slot_type[j]];
 722                }
 723                types_buf[BPF_REG_SIZE] = 0;
 724                if (!valid)
 725                        continue;
 726                verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 727                print_liveness(env, state->stack[i].spilled_ptr.live);
 728                if (is_spilled_reg(&state->stack[i])) {
 729                        reg = &state->stack[i].spilled_ptr;
 730                        t = reg->type;
 731                        verbose(env, "=%s", reg_type_str[t]);
 732                        if (t == SCALAR_VALUE && reg->precise)
 733                                verbose(env, "P");
 734                        if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 735                                verbose(env, "%lld", reg->var_off.value + reg->off);
 736                } else {
 737                        verbose(env, "=%s", types_buf);
 738                }
 739        }
 740        if (state->acquired_refs && state->refs[0].id) {
 741                verbose(env, " refs=%d", state->refs[0].id);
 742                for (i = 1; i < state->acquired_refs; i++)
 743                        if (state->refs[i].id)
 744                                verbose(env, ",%d", state->refs[i].id);
 745        }
 746        if (state->in_callback_fn)
 747                verbose(env, " cb");
 748        if (state->in_async_callback_fn)
 749                verbose(env, " async_cb");
 750        verbose(env, "\n");
 751}
 752
 753/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
 754 * small to hold src. This is different from krealloc since we don't want to preserve
 755 * the contents of dst.
 756 *
 757 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
 758 * not be allocated.
 759 */
 760static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
 761{
 762        size_t bytes;
 763
 764        if (ZERO_OR_NULL_PTR(src))
 765                goto out;
 766
 767        if (unlikely(check_mul_overflow(n, size, &bytes)))
 768                return NULL;
 769
 770        if (ksize(dst) < bytes) {
 771                kfree(dst);
 772                dst = kmalloc_track_caller(bytes, flags);
 773                if (!dst)
 774                        return NULL;
 775        }
 776
 777        memcpy(dst, src, bytes);
 778out:
 779        return dst ? dst : ZERO_SIZE_PTR;
 780}
 781
 782/* resize an array from old_n items to new_n items. the array is reallocated if it's too
 783 * small to hold new_n items. new items are zeroed out if the array grows.
 784 *
 785 * Contrary to krealloc_array, does not free arr if new_n is zero.
 786 */
 787static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
 788{
 789        if (!new_n || old_n == new_n)
 790                goto out;
 791
 792        arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
 793        if (!arr)
 794                return NULL;
 795
 796        if (new_n > old_n)
 797                memset(arr + old_n * size, 0, (new_n - old_n) * size);
 798
 799out:
 800        return arr ? arr : ZERO_SIZE_PTR;
 801}
 802
 803static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 804{
 805        dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
 806                               sizeof(struct bpf_reference_state), GFP_KERNEL);
 807        if (!dst->refs)
 808                return -ENOMEM;
 809
 810        dst->acquired_refs = src->acquired_refs;
 811        return 0;
 812}
 813
 814static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 815{
 816        size_t n = src->allocated_stack / BPF_REG_SIZE;
 817
 818        dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
 819                                GFP_KERNEL);
 820        if (!dst->stack)
 821                return -ENOMEM;
 822
 823        dst->allocated_stack = src->allocated_stack;
 824        return 0;
 825}
 826
 827static int resize_reference_state(struct bpf_func_state *state, size_t n)
 828{
 829        state->refs = realloc_array(state->refs, state->acquired_refs, n,
 830                                    sizeof(struct bpf_reference_state));
 831        if (!state->refs)
 832                return -ENOMEM;
 833
 834        state->acquired_refs = n;
 835        return 0;
 836}
 837
 838static int grow_stack_state(struct bpf_func_state *state, int size)
 839{
 840        size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 841
 842        if (old_n >= n)
 843                return 0;
 844
 845        state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
 846        if (!state->stack)
 847                return -ENOMEM;
 848
 849        state->allocated_stack = size;
 850        return 0;
 851}
 852
 853/* Acquire a pointer id from the env and update the state->refs to include
 854 * this new pointer reference.
 855 * On success, returns a valid pointer id to associate with the register
 856 * On failure, returns a negative errno.
 857 */
 858static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 859{
 860        struct bpf_func_state *state = cur_func(env);
 861        int new_ofs = state->acquired_refs;
 862        int id, err;
 863
 864        err = resize_reference_state(state, state->acquired_refs + 1);
 865        if (err)
 866                return err;
 867        id = ++env->id_gen;
 868        state->refs[new_ofs].id = id;
 869        state->refs[new_ofs].insn_idx = insn_idx;
 870
 871        return id;
 872}
 873
 874/* release function corresponding to acquire_reference_state(). Idempotent. */
 875static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 876{
 877        int i, last_idx;
 878
 879        last_idx = state->acquired_refs - 1;
 880        for (i = 0; i < state->acquired_refs; i++) {
 881                if (state->refs[i].id == ptr_id) {
 882                        if (last_idx && i != last_idx)
 883                                memcpy(&state->refs[i], &state->refs[last_idx],
 884                                       sizeof(*state->refs));
 885                        memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 886                        state->acquired_refs--;
 887                        return 0;
 888                }
 889        }
 890        return -EINVAL;
 891}
 892
 893static void free_func_state(struct bpf_func_state *state)
 894{
 895        if (!state)
 896                return;
 897        kfree(state->refs);
 898        kfree(state->stack);
 899        kfree(state);
 900}
 901
 902static void clear_jmp_history(struct bpf_verifier_state *state)
 903{
 904        kfree(state->jmp_history);
 905        state->jmp_history = NULL;
 906        state->jmp_history_cnt = 0;
 907}
 908
 909static void free_verifier_state(struct bpf_verifier_state *state,
 910                                bool free_self)
 911{
 912        int i;
 913
 914        for (i = 0; i <= state->curframe; i++) {
 915                free_func_state(state->frame[i]);
 916                state->frame[i] = NULL;
 917        }
 918        clear_jmp_history(state);
 919        if (free_self)
 920                kfree(state);
 921}
 922
 923/* copy verifier state from src to dst growing dst stack space
 924 * when necessary to accommodate larger src stack
 925 */
 926static int copy_func_state(struct bpf_func_state *dst,
 927                           const struct bpf_func_state *src)
 928{
 929        int err;
 930
 931        memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 932        err = copy_reference_state(dst, src);
 933        if (err)
 934                return err;
 935        return copy_stack_state(dst, src);
 936}
 937
 938static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 939                               const struct bpf_verifier_state *src)
 940{
 941        struct bpf_func_state *dst;
 942        int i, err;
 943
 944        dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
 945                                            src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
 946                                            GFP_USER);
 947        if (!dst_state->jmp_history)
 948                return -ENOMEM;
 949        dst_state->jmp_history_cnt = src->jmp_history_cnt;
 950
 951        /* if dst has more stack frames then src frame, free them */
 952        for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 953                free_func_state(dst_state->frame[i]);
 954                dst_state->frame[i] = NULL;
 955        }
 956        dst_state->speculative = src->speculative;
 957        dst_state->curframe = src->curframe;
 958        dst_state->active_spin_lock = src->active_spin_lock;
 959        dst_state->branches = src->branches;
 960        dst_state->parent = src->parent;
 961        dst_state->first_insn_idx = src->first_insn_idx;
 962        dst_state->last_insn_idx = src->last_insn_idx;
 963        for (i = 0; i <= src->curframe; i++) {
 964                dst = dst_state->frame[i];
 965                if (!dst) {
 966                        dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 967                        if (!dst)
 968                                return -ENOMEM;
 969                        dst_state->frame[i] = dst;
 970                }
 971                err = copy_func_state(dst, src->frame[i]);
 972                if (err)
 973                        return err;
 974        }
 975        return 0;
 976}
 977
 978static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 979{
 980        while (st) {
 981                u32 br = --st->branches;
 982
 983                /* WARN_ON(br > 1) technically makes sense here,
 984                 * but see comment in push_stack(), hence:
 985                 */
 986                WARN_ONCE((int)br < 0,
 987                          "BUG update_branch_counts:branches_to_explore=%d\n",
 988                          br);
 989                if (br)
 990                        break;
 991                st = st->parent;
 992        }
 993}
 994
 995static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 996                     int *insn_idx, bool pop_log)
 997{
 998        struct bpf_verifier_state *cur = env->cur_state;
 999        struct bpf_verifier_stack_elem *elem, *head = env->head;
1000        int err;
1001
1002        if (env->head == NULL)
1003                return -ENOENT;
1004
1005        if (cur) {
1006                err = copy_verifier_state(cur, &head->st);
1007                if (err)
1008                        return err;
1009        }
1010        if (pop_log)
1011                bpf_vlog_reset(&env->log, head->log_pos);
1012        if (insn_idx)
1013                *insn_idx = head->insn_idx;
1014        if (prev_insn_idx)
1015                *prev_insn_idx = head->prev_insn_idx;
1016        elem = head->next;
1017        free_verifier_state(&head->st, false);
1018        kfree(head);
1019        env->head = elem;
1020        env->stack_size--;
1021        return 0;
1022}
1023
1024static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1025                                             int insn_idx, int prev_insn_idx,
1026                                             bool speculative)
1027{
1028        struct bpf_verifier_state *cur = env->cur_state;
1029        struct bpf_verifier_stack_elem *elem;
1030        int err;
1031
1032        elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1033        if (!elem)
1034                goto err;
1035
1036        elem->insn_idx = insn_idx;
1037        elem->prev_insn_idx = prev_insn_idx;
1038        elem->next = env->head;
1039        elem->log_pos = env->log.len_used;
1040        env->head = elem;
1041        env->stack_size++;
1042        err = copy_verifier_state(&elem->st, cur);
1043        if (err)
1044                goto err;
1045        elem->st.speculative |= speculative;
1046        if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1047                verbose(env, "The sequence of %d jumps is too complex.\n",
1048                        env->stack_size);
1049                goto err;
1050        }
1051        if (elem->st.parent) {
1052                ++elem->st.parent->branches;
1053                /* WARN_ON(branches > 2) technically makes sense here,
1054                 * but
1055                 * 1. speculative states will bump 'branches' for non-branch
1056                 * instructions
1057                 * 2. is_state_visited() heuristics may decide not to create
1058                 * a new state for a sequence of branches and all such current
1059                 * and cloned states will be pointing to a single parent state
1060                 * which might have large 'branches' count.
1061                 */
1062        }
1063        return &elem->st;
1064err:
1065        free_verifier_state(env->cur_state, true);
1066        env->cur_state = NULL;
1067        /* pop all elements and return */
1068        while (!pop_stack(env, NULL, NULL, false));
1069        return NULL;
1070}
1071
1072#define CALLER_SAVED_REGS 6
1073static const int caller_saved[CALLER_SAVED_REGS] = {
1074        BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1075};
1076
1077static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1078                                struct bpf_reg_state *reg);
1079
1080/* This helper doesn't clear reg->id */
1081static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1082{
1083        reg->var_off = tnum_const(imm);
1084        reg->smin_value = (s64)imm;
1085        reg->smax_value = (s64)imm;
1086        reg->umin_value = imm;
1087        reg->umax_value = imm;
1088
1089        reg->s32_min_value = (s32)imm;
1090        reg->s32_max_value = (s32)imm;
1091        reg->u32_min_value = (u32)imm;
1092        reg->u32_max_value = (u32)imm;
1093}
1094
1095/* Mark the unknown part of a register (variable offset or scalar value) as
1096 * known to have the value @imm.
1097 */
1098static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1099{
1100        /* Clear id, off, and union(map_ptr, range) */
1101        memset(((u8 *)reg) + sizeof(reg->type), 0,
1102               offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1103        ___mark_reg_known(reg, imm);
1104}
1105
1106static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1107{
1108        reg->var_off = tnum_const_subreg(reg->var_off, imm);
1109        reg->s32_min_value = (s32)imm;
1110        reg->s32_max_value = (s32)imm;
1111        reg->u32_min_value = (u32)imm;
1112        reg->u32_max_value = (u32)imm;
1113}
1114
1115/* Mark the 'variable offset' part of a register as zero.  This should be
1116 * used only on registers holding a pointer type.
1117 */
1118static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1119{
1120        __mark_reg_known(reg, 0);
1121}
1122
1123static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1124{
1125        __mark_reg_known(reg, 0);
1126        reg->type = SCALAR_VALUE;
1127}
1128
1129static void mark_reg_known_zero(struct bpf_verifier_env *env,
1130                                struct bpf_reg_state *regs, u32 regno)
1131{
1132        if (WARN_ON(regno >= MAX_BPF_REG)) {
1133                verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1134                /* Something bad happened, let's kill all regs */
1135                for (regno = 0; regno < MAX_BPF_REG; regno++)
1136                        __mark_reg_not_init(env, regs + regno);
1137                return;
1138        }
1139        __mark_reg_known_zero(regs + regno);
1140}
1141
1142static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1143{
1144        switch (reg->type) {
1145        case PTR_TO_MAP_VALUE_OR_NULL: {
1146                const struct bpf_map *map = reg->map_ptr;
1147
1148                if (map->inner_map_meta) {
1149                        reg->type = CONST_PTR_TO_MAP;
1150                        reg->map_ptr = map->inner_map_meta;
1151                        /* transfer reg's id which is unique for every map_lookup_elem
1152                         * as UID of the inner map.
1153                         */
1154                        if (map_value_has_timer(map->inner_map_meta))
1155                                reg->map_uid = reg->id;
1156                } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1157                        reg->type = PTR_TO_XDP_SOCK;
1158                } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1159                           map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1160                        reg->type = PTR_TO_SOCKET;
1161                } else {
1162                        reg->type = PTR_TO_MAP_VALUE;
1163                }
1164                break;
1165        }
1166        case PTR_TO_SOCKET_OR_NULL:
1167                reg->type = PTR_TO_SOCKET;
1168                break;
1169        case PTR_TO_SOCK_COMMON_OR_NULL:
1170                reg->type = PTR_TO_SOCK_COMMON;
1171                break;
1172        case PTR_TO_TCP_SOCK_OR_NULL:
1173                reg->type = PTR_TO_TCP_SOCK;
1174                break;
1175        case PTR_TO_BTF_ID_OR_NULL:
1176                reg->type = PTR_TO_BTF_ID;
1177                break;
1178        case PTR_TO_MEM_OR_NULL:
1179                reg->type = PTR_TO_MEM;
1180                break;
1181        case PTR_TO_RDONLY_BUF_OR_NULL:
1182                reg->type = PTR_TO_RDONLY_BUF;
1183                break;
1184        case PTR_TO_RDWR_BUF_OR_NULL:
1185                reg->type = PTR_TO_RDWR_BUF;
1186                break;
1187        default:
1188                WARN_ONCE(1, "unknown nullable register type");
1189        }
1190}
1191
1192static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1193{
1194        return type_is_pkt_pointer(reg->type);
1195}
1196
1197static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1198{
1199        return reg_is_pkt_pointer(reg) ||
1200               reg->type == PTR_TO_PACKET_END;
1201}
1202
1203/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1204static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1205                                    enum bpf_reg_type which)
1206{
1207        /* The register can already have a range from prior markings.
1208         * This is fine as long as it hasn't been advanced from its
1209         * origin.
1210         */
1211        return reg->type == which &&
1212               reg->id == 0 &&
1213               reg->off == 0 &&
1214               tnum_equals_const(reg->var_off, 0);
1215}
1216
1217/* Reset the min/max bounds of a register */
1218static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1219{
1220        reg->smin_value = S64_MIN;
1221        reg->smax_value = S64_MAX;
1222        reg->umin_value = 0;
1223        reg->umax_value = U64_MAX;
1224
1225        reg->s32_min_value = S32_MIN;
1226        reg->s32_max_value = S32_MAX;
1227        reg->u32_min_value = 0;
1228        reg->u32_max_value = U32_MAX;
1229}
1230
1231static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1232{
1233        reg->smin_value = S64_MIN;
1234        reg->smax_value = S64_MAX;
1235        reg->umin_value = 0;
1236        reg->umax_value = U64_MAX;
1237}
1238
1239static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1240{
1241        reg->s32_min_value = S32_MIN;
1242        reg->s32_max_value = S32_MAX;
1243        reg->u32_min_value = 0;
1244        reg->u32_max_value = U32_MAX;
1245}
1246
1247static void __update_reg32_bounds(struct bpf_reg_state *reg)
1248{
1249        struct tnum var32_off = tnum_subreg(reg->var_off);
1250
1251        /* min signed is max(sign bit) | min(other bits) */
1252        reg->s32_min_value = max_t(s32, reg->s32_min_value,
1253                        var32_off.value | (var32_off.mask & S32_MIN));
1254        /* max signed is min(sign bit) | max(other bits) */
1255        reg->s32_max_value = min_t(s32, reg->s32_max_value,
1256                        var32_off.value | (var32_off.mask & S32_MAX));
1257        reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1258        reg->u32_max_value = min(reg->u32_max_value,
1259                                 (u32)(var32_off.value | var32_off.mask));
1260}
1261
1262static void __update_reg64_bounds(struct bpf_reg_state *reg)
1263{
1264        /* min signed is max(sign bit) | min(other bits) */
1265        reg->smin_value = max_t(s64, reg->smin_value,
1266                                reg->var_off.value | (reg->var_off.mask & S64_MIN));
1267        /* max signed is min(sign bit) | max(other bits) */
1268        reg->smax_value = min_t(s64, reg->smax_value,
1269                                reg->var_off.value | (reg->var_off.mask & S64_MAX));
1270        reg->umin_value = max(reg->umin_value, reg->var_off.value);
1271        reg->umax_value = min(reg->umax_value,
1272                              reg->var_off.value | reg->var_off.mask);
1273}
1274
1275static void __update_reg_bounds(struct bpf_reg_state *reg)
1276{
1277        __update_reg32_bounds(reg);
1278        __update_reg64_bounds(reg);
1279}
1280
1281/* Uses signed min/max values to inform unsigned, and vice-versa */
1282static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1283{
1284        /* Learn sign from signed bounds.
1285         * If we cannot cross the sign boundary, then signed and unsigned bounds
1286         * are the same, so combine.  This works even in the negative case, e.g.
1287         * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1288         */
1289        if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1290                reg->s32_min_value = reg->u32_min_value =
1291                        max_t(u32, reg->s32_min_value, reg->u32_min_value);
1292                reg->s32_max_value = reg->u32_max_value =
1293                        min_t(u32, reg->s32_max_value, reg->u32_max_value);
1294                return;
1295        }
1296        /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1297         * boundary, so we must be careful.
1298         */
1299        if ((s32)reg->u32_max_value >= 0) {
1300                /* Positive.  We can't learn anything from the smin, but smax
1301                 * is positive, hence safe.
1302                 */
1303                reg->s32_min_value = reg->u32_min_value;
1304                reg->s32_max_value = reg->u32_max_value =
1305                        min_t(u32, reg->s32_max_value, reg->u32_max_value);
1306        } else if ((s32)reg->u32_min_value < 0) {
1307                /* Negative.  We can't learn anything from the smax, but smin
1308                 * is negative, hence safe.
1309                 */
1310                reg->s32_min_value = reg->u32_min_value =
1311                        max_t(u32, reg->s32_min_value, reg->u32_min_value);
1312                reg->s32_max_value = reg->u32_max_value;
1313        }
1314}
1315
1316static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1317{
1318        /* Learn sign from signed bounds.
1319         * If we cannot cross the sign boundary, then signed and unsigned bounds
1320         * are the same, so combine.  This works even in the negative case, e.g.
1321         * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1322         */
1323        if (reg->smin_value >= 0 || reg->smax_value < 0) {
1324                reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1325                                                          reg->umin_value);
1326                reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1327                                                          reg->umax_value);
1328                return;
1329        }
1330        /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1331         * boundary, so we must be careful.
1332         */
1333        if ((s64)reg->umax_value >= 0) {
1334                /* Positive.  We can't learn anything from the smin, but smax
1335                 * is positive, hence safe.
1336                 */
1337                reg->smin_value = reg->umin_value;
1338                reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1339                                                          reg->umax_value);
1340        } else if ((s64)reg->umin_value < 0) {
1341                /* Negative.  We can't learn anything from the smax, but smin
1342                 * is negative, hence safe.
1343                 */
1344                reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1345                                                          reg->umin_value);
1346                reg->smax_value = reg->umax_value;
1347        }
1348}
1349
1350static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1351{
1352        __reg32_deduce_bounds(reg);
1353        __reg64_deduce_bounds(reg);
1354}
1355
1356/* Attempts to improve var_off based on unsigned min/max information */
1357static void __reg_bound_offset(struct bpf_reg_state *reg)
1358{
1359        struct tnum var64_off = tnum_intersect(reg->var_off,
1360                                               tnum_range(reg->umin_value,
1361                                                          reg->umax_value));
1362        struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1363                                                tnum_range(reg->u32_min_value,
1364                                                           reg->u32_max_value));
1365
1366        reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1367}
1368
1369static bool __reg32_bound_s64(s32 a)
1370{
1371        return a >= 0 && a <= S32_MAX;
1372}
1373
1374static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1375{
1376        reg->umin_value = reg->u32_min_value;
1377        reg->umax_value = reg->u32_max_value;
1378
1379        /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1380         * be positive otherwise set to worse case bounds and refine later
1381         * from tnum.
1382         */
1383        if (__reg32_bound_s64(reg->s32_min_value) &&
1384            __reg32_bound_s64(reg->s32_max_value)) {
1385                reg->smin_value = reg->s32_min_value;
1386                reg->smax_value = reg->s32_max_value;
1387        } else {
1388                reg->smin_value = 0;
1389                reg->smax_value = U32_MAX;
1390        }
1391}
1392
1393static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1394{
1395        /* special case when 64-bit register has upper 32-bit register
1396         * zeroed. Typically happens after zext or <<32, >>32 sequence
1397         * allowing us to use 32-bit bounds directly,
1398         */
1399        if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1400                __reg_assign_32_into_64(reg);
1401        } else {
1402                /* Otherwise the best we can do is push lower 32bit known and
1403                 * unknown bits into register (var_off set from jmp logic)
1404                 * then learn as much as possible from the 64-bit tnum
1405                 * known and unknown bits. The previous smin/smax bounds are
1406                 * invalid here because of jmp32 compare so mark them unknown
1407                 * so they do not impact tnum bounds calculation.
1408                 */
1409                __mark_reg64_unbounded(reg);
1410                __update_reg_bounds(reg);
1411        }
1412
1413        /* Intersecting with the old var_off might have improved our bounds
1414         * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1415         * then new var_off is (0; 0x7f...fc) which improves our umax.
1416         */
1417        __reg_deduce_bounds(reg);
1418        __reg_bound_offset(reg);
1419        __update_reg_bounds(reg);
1420}
1421
1422static bool __reg64_bound_s32(s64 a)
1423{
1424        return a >= S32_MIN && a <= S32_MAX;
1425}
1426
1427static bool __reg64_bound_u32(u64 a)
1428{
1429        return a >= U32_MIN && a <= U32_MAX;
1430}
1431
1432static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1433{
1434        __mark_reg32_unbounded(reg);
1435
1436        if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1437                reg->s32_min_value = (s32)reg->smin_value;
1438                reg->s32_max_value = (s32)reg->smax_value;
1439        }
1440        if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1441                reg->u32_min_value = (u32)reg->umin_value;
1442                reg->u32_max_value = (u32)reg->umax_value;
1443        }
1444
1445        /* Intersecting with the old var_off might have improved our bounds
1446         * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1447         * then new var_off is (0; 0x7f...fc) which improves our umax.
1448         */
1449        __reg_deduce_bounds(reg);
1450        __reg_bound_offset(reg);
1451        __update_reg_bounds(reg);
1452}
1453
1454/* Mark a register as having a completely unknown (scalar) value. */
1455static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1456                               struct bpf_reg_state *reg)
1457{
1458        /*
1459         * Clear type, id, off, and union(map_ptr, range) and
1460         * padding between 'type' and union
1461         */
1462        memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1463        reg->type = SCALAR_VALUE;
1464        reg->var_off = tnum_unknown;
1465        reg->frameno = 0;
1466        reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1467        __mark_reg_unbounded(reg);
1468}
1469
1470static void mark_reg_unknown(struct bpf_verifier_env *env,
1471                             struct bpf_reg_state *regs, u32 regno)
1472{
1473        if (WARN_ON(regno >= MAX_BPF_REG)) {
1474                verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1475                /* Something bad happened, let's kill all regs except FP */
1476                for (regno = 0; regno < BPF_REG_FP; regno++)
1477                        __mark_reg_not_init(env, regs + regno);
1478                return;
1479        }
1480        __mark_reg_unknown(env, regs + regno);
1481}
1482
1483static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1484                                struct bpf_reg_state *reg)
1485{
1486        __mark_reg_unknown(env, reg);
1487        reg->type = NOT_INIT;
1488}
1489
1490static void mark_reg_not_init(struct bpf_verifier_env *env,
1491                              struct bpf_reg_state *regs, u32 regno)
1492{
1493        if (WARN_ON(regno >= MAX_BPF_REG)) {
1494                verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1495                /* Something bad happened, let's kill all regs except FP */
1496                for (regno = 0; regno < BPF_REG_FP; regno++)
1497                        __mark_reg_not_init(env, regs + regno);
1498                return;
1499        }
1500        __mark_reg_not_init(env, regs + regno);
1501}
1502
1503static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1504                            struct bpf_reg_state *regs, u32 regno,
1505                            enum bpf_reg_type reg_type,
1506                            struct btf *btf, u32 btf_id)
1507{
1508        if (reg_type == SCALAR_VALUE) {
1509                mark_reg_unknown(env, regs, regno);
1510                return;
1511        }
1512        mark_reg_known_zero(env, regs, regno);
1513        regs[regno].type = PTR_TO_BTF_ID;
1514        regs[regno].btf = btf;
1515        regs[regno].btf_id = btf_id;
1516}
1517
1518#define DEF_NOT_SUBREG  (0)
1519static void init_reg_state(struct bpf_verifier_env *env,
1520                           struct bpf_func_state *state)
1521{
1522        struct bpf_reg_state *regs = state->regs;
1523        int i;
1524
1525        for (i = 0; i < MAX_BPF_REG; i++) {
1526                mark_reg_not_init(env, regs, i);
1527                regs[i].live = REG_LIVE_NONE;
1528                regs[i].parent = NULL;
1529                regs[i].subreg_def = DEF_NOT_SUBREG;
1530        }
1531
1532        /* frame pointer */
1533        regs[BPF_REG_FP].type = PTR_TO_STACK;
1534        mark_reg_known_zero(env, regs, BPF_REG_FP);
1535        regs[BPF_REG_FP].frameno = state->frameno;
1536}
1537
1538#define BPF_MAIN_FUNC (-1)
1539static void init_func_state(struct bpf_verifier_env *env,
1540                            struct bpf_func_state *state,
1541                            int callsite, int frameno, int subprogno)
1542{
1543        state->callsite = callsite;
1544        state->frameno = frameno;
1545        state->subprogno = subprogno;
1546        init_reg_state(env, state);
1547}
1548
1549/* Similar to push_stack(), but for async callbacks */
1550static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1551                                                int insn_idx, int prev_insn_idx,
1552                                                int subprog)
1553{
1554        struct bpf_verifier_stack_elem *elem;
1555        struct bpf_func_state *frame;
1556
1557        elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1558        if (!elem)
1559                goto err;
1560
1561        elem->insn_idx = insn_idx;
1562        elem->prev_insn_idx = prev_insn_idx;
1563        elem->next = env->head;
1564        elem->log_pos = env->log.len_used;
1565        env->head = elem;
1566        env->stack_size++;
1567        if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1568                verbose(env,
1569                        "The sequence of %d jumps is too complex for async cb.\n",
1570                        env->stack_size);
1571                goto err;
1572        }
1573        /* Unlike push_stack() do not copy_verifier_state().
1574         * The caller state doesn't matter.
1575         * This is async callback. It starts in a fresh stack.
1576         * Initialize it similar to do_check_common().
1577         */
1578        elem->st.branches = 1;
1579        frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1580        if (!frame)
1581                goto err;
1582        init_func_state(env, frame,
1583                        BPF_MAIN_FUNC /* callsite */,
1584                        0 /* frameno within this callchain */,
1585                        subprog /* subprog number within this prog */);
1586        elem->st.frame[0] = frame;
1587        return &elem->st;
1588err:
1589        free_verifier_state(env->cur_state, true);
1590        env->cur_state = NULL;
1591        /* pop all elements and return */
1592        while (!pop_stack(env, NULL, NULL, false));
1593        return NULL;
1594}
1595
1596
1597enum reg_arg_type {
1598        SRC_OP,         /* register is used as source operand */
1599        DST_OP,         /* register is used as destination operand */
1600        DST_OP_NO_MARK  /* same as above, check only, don't mark */
1601};
1602
1603static int cmp_subprogs(const void *a, const void *b)
1604{
1605        return ((struct bpf_subprog_info *)a)->start -
1606               ((struct bpf_subprog_info *)b)->start;
1607}
1608
1609static int find_subprog(struct bpf_verifier_env *env, int off)
1610{
1611        struct bpf_subprog_info *p;
1612
1613        p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1614                    sizeof(env->subprog_info[0]), cmp_subprogs);
1615        if (!p)
1616                return -ENOENT;
1617        return p - env->subprog_info;
1618
1619}
1620
1621static int add_subprog(struct bpf_verifier_env *env, int off)
1622{
1623        int insn_cnt = env->prog->len;
1624        int ret;
1625
1626        if (off >= insn_cnt || off < 0) {
1627                verbose(env, "call to invalid destination\n");
1628                return -EINVAL;
1629        }
1630        ret = find_subprog(env, off);
1631        if (ret >= 0)
1632                return ret;
1633        if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1634                verbose(env, "too many subprograms\n");
1635                return -E2BIG;
1636        }
1637        /* determine subprog starts. The end is one before the next starts */
1638        env->subprog_info[env->subprog_cnt++].start = off;
1639        sort(env->subprog_info, env->subprog_cnt,
1640             sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1641        return env->subprog_cnt - 1;
1642}
1643
1644#define MAX_KFUNC_DESCS 256
1645#define MAX_KFUNC_BTFS  256
1646
1647struct bpf_kfunc_desc {
1648        struct btf_func_model func_model;
1649        u32 func_id;
1650        s32 imm;
1651        u16 offset;
1652};
1653
1654struct bpf_kfunc_btf {
1655        struct btf *btf;
1656        struct module *module;
1657        u16 offset;
1658};
1659
1660struct bpf_kfunc_desc_tab {
1661        struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1662        u32 nr_descs;
1663};
1664
1665struct bpf_kfunc_btf_tab {
1666        struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1667        u32 nr_descs;
1668};
1669
1670static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1671{
1672        const struct bpf_kfunc_desc *d0 = a;
1673        const struct bpf_kfunc_desc *d1 = b;
1674
1675        /* func_id is not greater than BTF_MAX_TYPE */
1676        return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1677}
1678
1679static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1680{
1681        const struct bpf_kfunc_btf *d0 = a;
1682        const struct bpf_kfunc_btf *d1 = b;
1683
1684        return d0->offset - d1->offset;
1685}
1686
1687static const struct bpf_kfunc_desc *
1688find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1689{
1690        struct bpf_kfunc_desc desc = {
1691                .func_id = func_id,
1692                .offset = offset,
1693        };
1694        struct bpf_kfunc_desc_tab *tab;
1695
1696        tab = prog->aux->kfunc_tab;
1697        return bsearch(&desc, tab->descs, tab->nr_descs,
1698                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1699}
1700
1701static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1702                                         s16 offset, struct module **btf_modp)
1703{
1704        struct bpf_kfunc_btf kf_btf = { .offset = offset };
1705        struct bpf_kfunc_btf_tab *tab;
1706        struct bpf_kfunc_btf *b;
1707        struct module *mod;
1708        struct btf *btf;
1709        int btf_fd;
1710
1711        tab = env->prog->aux->kfunc_btf_tab;
1712        b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1713                    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1714        if (!b) {
1715                if (tab->nr_descs == MAX_KFUNC_BTFS) {
1716                        verbose(env, "too many different module BTFs\n");
1717                        return ERR_PTR(-E2BIG);
1718                }
1719
1720                if (bpfptr_is_null(env->fd_array)) {
1721                        verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1722                        return ERR_PTR(-EPROTO);
1723                }
1724
1725                if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1726                                            offset * sizeof(btf_fd),
1727                                            sizeof(btf_fd)))
1728                        return ERR_PTR(-EFAULT);
1729
1730                btf = btf_get_by_fd(btf_fd);
1731                if (IS_ERR(btf)) {
1732                        verbose(env, "invalid module BTF fd specified\n");
1733                        return btf;
1734                }
1735
1736                if (!btf_is_module(btf)) {
1737                        verbose(env, "BTF fd for kfunc is not a module BTF\n");
1738                        btf_put(btf);
1739                        return ERR_PTR(-EINVAL);
1740                }
1741
1742                mod = btf_try_get_module(btf);
1743                if (!mod) {
1744                        btf_put(btf);
1745                        return ERR_PTR(-ENXIO);
1746                }
1747
1748                b = &tab->descs[tab->nr_descs++];
1749                b->btf = btf;
1750                b->module = mod;
1751                b->offset = offset;
1752
1753                sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1754                     kfunc_btf_cmp_by_off, NULL);
1755        }
1756        if (btf_modp)
1757                *btf_modp = b->module;
1758        return b->btf;
1759}
1760
1761void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1762{
1763        if (!tab)
1764                return;
1765
1766        while (tab->nr_descs--) {
1767                module_put(tab->descs[tab->nr_descs].module);
1768                btf_put(tab->descs[tab->nr_descs].btf);
1769        }
1770        kfree(tab);
1771}
1772
1773static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
1774                                       u32 func_id, s16 offset,
1775                                       struct module **btf_modp)
1776{
1777        if (offset) {
1778                if (offset < 0) {
1779                        /* In the future, this can be allowed to increase limit
1780                         * of fd index into fd_array, interpreted as u16.
1781                         */
1782                        verbose(env, "negative offset disallowed for kernel module function call\n");
1783                        return ERR_PTR(-EINVAL);
1784                }
1785
1786                return __find_kfunc_desc_btf(env, offset, btf_modp);
1787        }
1788        return btf_vmlinux ?: ERR_PTR(-ENOENT);
1789}
1790
1791static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1792{
1793        const struct btf_type *func, *func_proto;
1794        struct bpf_kfunc_btf_tab *btf_tab;
1795        struct bpf_kfunc_desc_tab *tab;
1796        struct bpf_prog_aux *prog_aux;
1797        struct bpf_kfunc_desc *desc;
1798        const char *func_name;
1799        struct btf *desc_btf;
1800        unsigned long addr;
1801        int err;
1802
1803        prog_aux = env->prog->aux;
1804        tab = prog_aux->kfunc_tab;
1805        btf_tab = prog_aux->kfunc_btf_tab;
1806        if (!tab) {
1807                if (!btf_vmlinux) {
1808                        verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1809                        return -ENOTSUPP;
1810                }
1811
1812                if (!env->prog->jit_requested) {
1813                        verbose(env, "JIT is required for calling kernel function\n");
1814                        return -ENOTSUPP;
1815                }
1816
1817                if (!bpf_jit_supports_kfunc_call()) {
1818                        verbose(env, "JIT does not support calling kernel function\n");
1819                        return -ENOTSUPP;
1820                }
1821
1822                if (!env->prog->gpl_compatible) {
1823                        verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1824                        return -EINVAL;
1825                }
1826
1827                tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1828                if (!tab)
1829                        return -ENOMEM;
1830                prog_aux->kfunc_tab = tab;
1831        }
1832
1833        /* func_id == 0 is always invalid, but instead of returning an error, be
1834         * conservative and wait until the code elimination pass before returning
1835         * error, so that invalid calls that get pruned out can be in BPF programs
1836         * loaded from userspace.  It is also required that offset be untouched
1837         * for such calls.
1838         */
1839        if (!func_id && !offset)
1840                return 0;
1841
1842        if (!btf_tab && offset) {
1843                btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1844                if (!btf_tab)
1845                        return -ENOMEM;
1846                prog_aux->kfunc_btf_tab = btf_tab;
1847        }
1848
1849        desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
1850        if (IS_ERR(desc_btf)) {
1851                verbose(env, "failed to find BTF for kernel function\n");
1852                return PTR_ERR(desc_btf);
1853        }
1854
1855        if (find_kfunc_desc(env->prog, func_id, offset))
1856                return 0;
1857
1858        if (tab->nr_descs == MAX_KFUNC_DESCS) {
1859                verbose(env, "too many different kernel function calls\n");
1860                return -E2BIG;
1861        }
1862
1863        func = btf_type_by_id(desc_btf, func_id);
1864        if (!func || !btf_type_is_func(func)) {
1865                verbose(env, "kernel btf_id %u is not a function\n",
1866                        func_id);
1867                return -EINVAL;
1868        }
1869        func_proto = btf_type_by_id(desc_btf, func->type);
1870        if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1871                verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1872                        func_id);
1873                return -EINVAL;
1874        }
1875
1876        func_name = btf_name_by_offset(desc_btf, func->name_off);
1877        addr = kallsyms_lookup_name(func_name);
1878        if (!addr) {
1879                verbose(env, "cannot find address for kernel function %s\n",
1880                        func_name);
1881                return -EINVAL;
1882        }
1883
1884        desc = &tab->descs[tab->nr_descs++];
1885        desc->func_id = func_id;
1886        desc->imm = BPF_CALL_IMM(addr);
1887        desc->offset = offset;
1888        err = btf_distill_func_proto(&env->log, desc_btf,
1889                                     func_proto, func_name,
1890                                     &desc->func_model);
1891        if (!err)
1892                sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1893                     kfunc_desc_cmp_by_id_off, NULL);
1894        return err;
1895}
1896
1897static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1898{
1899        const struct bpf_kfunc_desc *d0 = a;
1900        const struct bpf_kfunc_desc *d1 = b;
1901
1902        if (d0->imm > d1->imm)
1903                return 1;
1904        else if (d0->imm < d1->imm)
1905                return -1;
1906        return 0;
1907}
1908
1909static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1910{
1911        struct bpf_kfunc_desc_tab *tab;
1912
1913        tab = prog->aux->kfunc_tab;
1914        if (!tab)
1915                return;
1916
1917        sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1918             kfunc_desc_cmp_by_imm, NULL);
1919}
1920
1921bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1922{
1923        return !!prog->aux->kfunc_tab;
1924}
1925
1926const struct btf_func_model *
1927bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1928                         const struct bpf_insn *insn)
1929{
1930        const struct bpf_kfunc_desc desc = {
1931                .imm = insn->imm,
1932        };
1933        const struct bpf_kfunc_desc *res;
1934        struct bpf_kfunc_desc_tab *tab;
1935
1936        tab = prog->aux->kfunc_tab;
1937        res = bsearch(&desc, tab->descs, tab->nr_descs,
1938                      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1939
1940        return res ? &res->func_model : NULL;
1941}
1942
1943static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
1944{
1945        struct bpf_subprog_info *subprog = env->subprog_info;
1946        struct bpf_insn *insn = env->prog->insnsi;
1947        int i, ret, insn_cnt = env->prog->len;
1948
1949        /* Add entry function. */
1950        ret = add_subprog(env, 0);
1951        if (ret)
1952                return ret;
1953
1954        for (i = 0; i < insn_cnt; i++, insn++) {
1955                if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
1956                    !bpf_pseudo_kfunc_call(insn))
1957                        continue;
1958
1959                if (!env->bpf_capable) {
1960                        verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1961                        return -EPERM;
1962                }
1963
1964                if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
1965                        ret = add_subprog(env, i + insn->imm + 1);
1966                else
1967                        ret = add_kfunc_call(env, insn->imm, insn->off);
1968
1969                if (ret < 0)
1970                        return ret;
1971        }
1972
1973        /* Add a fake 'exit' subprog which could simplify subprog iteration
1974         * logic. 'subprog_cnt' should not be increased.
1975         */
1976        subprog[env->subprog_cnt].start = insn_cnt;
1977
1978        if (env->log.level & BPF_LOG_LEVEL2)
1979                for (i = 0; i < env->subprog_cnt; i++)
1980                        verbose(env, "func#%d @%d\n", i, subprog[i].start);
1981
1982        return 0;
1983}
1984
1985static int check_subprogs(struct bpf_verifier_env *env)
1986{
1987        int i, subprog_start, subprog_end, off, cur_subprog = 0;
1988        struct bpf_subprog_info *subprog = env->subprog_info;
1989        struct bpf_insn *insn = env->prog->insnsi;
1990        int insn_cnt = env->prog->len;
1991
1992        /* now check that all jumps are within the same subprog */
1993        subprog_start = subprog[cur_subprog].start;
1994        subprog_end = subprog[cur_subprog + 1].start;
1995        for (i = 0; i < insn_cnt; i++) {
1996                u8 code = insn[i].code;
1997
1998                if (code == (BPF_JMP | BPF_CALL) &&
1999                    insn[i].imm == BPF_FUNC_tail_call &&
2000                    insn[i].src_reg != BPF_PSEUDO_CALL)
2001                        subprog[cur_subprog].has_tail_call = true;
2002                if (BPF_CLASS(code) == BPF_LD &&
2003                    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2004                        subprog[cur_subprog].has_ld_abs = true;
2005                if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2006                        goto next;
2007                if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2008                        goto next;
2009                off = i + insn[i].off + 1;
2010                if (off < subprog_start || off >= subprog_end) {
2011                        verbose(env, "jump out of range from insn %d to %d\n", i, off);
2012                        return -EINVAL;
2013                }
2014next:
2015                if (i == subprog_end - 1) {
2016                        /* to avoid fall-through from one subprog into another
2017                         * the last insn of the subprog should be either exit
2018                         * or unconditional jump back
2019                         */
2020                        if (code != (BPF_JMP | BPF_EXIT) &&
2021                            code != (BPF_JMP | BPF_JA)) {
2022                                verbose(env, "last insn is not an exit or jmp\n");
2023                                return -EINVAL;
2024                        }
2025                        subprog_start = subprog_end;
2026                        cur_subprog++;
2027                        if (cur_subprog < env->subprog_cnt)
2028                                subprog_end = subprog[cur_subprog + 1].start;
2029                }
2030        }
2031        return 0;
2032}
2033
2034/* Parentage chain of this register (or stack slot) should take care of all
2035 * issues like callee-saved registers, stack slot allocation time, etc.
2036 */
2037static int mark_reg_read(struct bpf_verifier_env *env,
2038                         const struct bpf_reg_state *state,
2039                         struct bpf_reg_state *parent, u8 flag)
2040{
2041        bool writes = parent == state->parent; /* Observe write marks */
2042        int cnt = 0;
2043
2044        while (parent) {
2045                /* if read wasn't screened by an earlier write ... */
2046                if (writes && state->live & REG_LIVE_WRITTEN)
2047                        break;
2048                if (parent->live & REG_LIVE_DONE) {
2049                        verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2050                                reg_type_str[parent->type],
2051                                parent->var_off.value, parent->off);
2052                        return -EFAULT;
2053                }
2054                /* The first condition is more likely to be true than the
2055                 * second, checked it first.
2056                 */
2057                if ((parent->live & REG_LIVE_READ) == flag ||
2058                    parent->live & REG_LIVE_READ64)
2059                        /* The parentage chain never changes and
2060                         * this parent was already marked as LIVE_READ.
2061                         * There is no need to keep walking the chain again and
2062                         * keep re-marking all parents as LIVE_READ.
2063                         * This case happens when the same register is read
2064                         * multiple times without writes into it in-between.
2065                         * Also, if parent has the stronger REG_LIVE_READ64 set,
2066                         * then no need to set the weak REG_LIVE_READ32.
2067                         */
2068                        break;
2069                /* ... then we depend on parent's value */
2070                parent->live |= flag;
2071                /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2072                if (flag == REG_LIVE_READ64)
2073                        parent->live &= ~REG_LIVE_READ32;
2074                state = parent;
2075                parent = state->parent;
2076                writes = true;
2077                cnt++;
2078        }
2079
2080        if (env->longest_mark_read_walk < cnt)
2081                env->longest_mark_read_walk = cnt;
2082        return 0;
2083}
2084
2085/* This function is supposed to be used by the following 32-bit optimization
2086 * code only. It returns TRUE if the source or destination register operates
2087 * on 64-bit, otherwise return FALSE.
2088 */
2089static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2090                     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2091{
2092        u8 code, class, op;
2093
2094        code = insn->code;
2095        class = BPF_CLASS(code);
2096        op = BPF_OP(code);
2097        if (class == BPF_JMP) {
2098                /* BPF_EXIT for "main" will reach here. Return TRUE
2099                 * conservatively.
2100                 */
2101                if (op == BPF_EXIT)
2102                        return true;
2103                if (op == BPF_CALL) {
2104                        /* BPF to BPF call will reach here because of marking
2105                         * caller saved clobber with DST_OP_NO_MARK for which we
2106                         * don't care the register def because they are anyway
2107                         * marked as NOT_INIT already.
2108                         */
2109                        if (insn->src_reg == BPF_PSEUDO_CALL)
2110                                return false;
2111                        /* Helper call will reach here because of arg type
2112                         * check, conservatively return TRUE.
2113                         */
2114                        if (t == SRC_OP)
2115                                return true;
2116
2117                        return false;
2118                }
2119        }
2120
2121        if (class == BPF_ALU64 || class == BPF_JMP ||
2122            /* BPF_END always use BPF_ALU class. */
2123            (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2124                return true;
2125
2126        if (class == BPF_ALU || class == BPF_JMP32)
2127                return false;
2128
2129        if (class == BPF_LDX) {
2130                if (t != SRC_OP)
2131                        return BPF_SIZE(code) == BPF_DW;
2132                /* LDX source must be ptr. */
2133                return true;
2134        }
2135
2136        if (class == BPF_STX) {
2137                /* BPF_STX (including atomic variants) has multiple source
2138                 * operands, one of which is a ptr. Check whether the caller is
2139                 * asking about it.
2140                 */
2141                if (t == SRC_OP && reg->type != SCALAR_VALUE)
2142                        return true;
2143                return BPF_SIZE(code) == BPF_DW;
2144        }
2145
2146        if (class == BPF_LD) {
2147                u8 mode = BPF_MODE(code);
2148
2149                /* LD_IMM64 */
2150                if (mode == BPF_IMM)
2151                        return true;
2152
2153                /* Both LD_IND and LD_ABS return 32-bit data. */
2154                if (t != SRC_OP)
2155                        return  false;
2156
2157                /* Implicit ctx ptr. */
2158                if (regno == BPF_REG_6)
2159                        return true;
2160
2161                /* Explicit source could be any width. */
2162                return true;
2163        }
2164
2165        if (class == BPF_ST)
2166                /* The only source register for BPF_ST is a ptr. */
2167                return true;
2168
2169        /* Conservatively return true at default. */
2170        return true;
2171}
2172
2173/* Return the regno defined by the insn, or -1. */
2174static int insn_def_regno(const struct bpf_insn *insn)
2175{
2176        switch (BPF_CLASS(insn->code)) {
2177        case BPF_JMP:
2178        case BPF_JMP32:
2179        case BPF_ST:
2180                return -1;
2181        case BPF_STX:
2182                if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2183                    (insn->imm & BPF_FETCH)) {
2184                        if (insn->imm == BPF_CMPXCHG)
2185                                return BPF_REG_0;
2186                        else
2187                                return insn->src_reg;
2188                } else {
2189                        return -1;
2190                }
2191        default:
2192                return insn->dst_reg;
2193        }
2194}
2195
2196/* Return TRUE if INSN has defined any 32-bit value explicitly. */
2197static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2198{
2199        int dst_reg = insn_def_regno(insn);
2200
2201        if (dst_reg == -1)
2202                return false;
2203
2204        return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2205}
2206
2207static void mark_insn_zext(struct bpf_verifier_env *env,
2208                           struct bpf_reg_state *reg)
2209{
2210        s32 def_idx = reg->subreg_def;
2211
2212        if (def_idx == DEF_NOT_SUBREG)
2213                return;
2214
2215        env->insn_aux_data[def_idx - 1].zext_dst = true;
2216        /* The dst will be zero extended, so won't be sub-register anymore. */
2217        reg->subreg_def = DEF_NOT_SUBREG;
2218}
2219
2220static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2221                         enum reg_arg_type t)
2222{
2223        struct bpf_verifier_state *vstate = env->cur_state;
2224        struct bpf_func_state *state = vstate->frame[vstate->curframe];
2225        struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2226        struct bpf_reg_state *reg, *regs = state->regs;
2227        bool rw64;
2228
2229        if (regno >= MAX_BPF_REG) {
2230                verbose(env, "R%d is invalid\n", regno);
2231                return -EINVAL;
2232        }
2233
2234        reg = &regs[regno];
2235        rw64 = is_reg64(env, insn, regno, reg, t);
2236        if (t == SRC_OP) {
2237                /* check whether register used as source operand can be read */
2238                if (reg->type == NOT_INIT) {
2239                        verbose(env, "R%d !read_ok\n", regno);
2240                        return -EACCES;
2241                }
2242                /* We don't need to worry about FP liveness because it's read-only */
2243                if (regno == BPF_REG_FP)
2244                        return 0;
2245
2246                if (rw64)
2247                        mark_insn_zext(env, reg);
2248
2249                return mark_reg_read(env, reg, reg->parent,
2250                                     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2251        } else {
2252                /* check whether register used as dest operand can be written to */
2253                if (regno == BPF_REG_FP) {
2254                        verbose(env, "frame pointer is read only\n");
2255                        return -EACCES;
2256                }
2257                reg->live |= REG_LIVE_WRITTEN;
2258                reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2259                if (t == DST_OP)
2260                        mark_reg_unknown(env, regs, regno);
2261        }
2262        return 0;
2263}
2264
2265/* for any branch, call, exit record the history of jmps in the given state */
2266static int push_jmp_history(struct bpf_verifier_env *env,
2267                            struct bpf_verifier_state *cur)
2268{
2269        u32 cnt = cur->jmp_history_cnt;
2270        struct bpf_idx_pair *p;
2271
2272        cnt++;
2273        p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2274        if (!p)
2275                return -ENOMEM;
2276        p[cnt - 1].idx = env->insn_idx;
2277        p[cnt - 1].prev_idx = env->prev_insn_idx;
2278        cur->jmp_history = p;
2279        cur->jmp_history_cnt = cnt;
2280        return 0;
2281}
2282
2283/* Backtrack one insn at a time. If idx is not at the top of recorded
2284 * history then previous instruction came from straight line execution.
2285 */
2286static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2287                             u32 *history)
2288{
2289        u32 cnt = *history;
2290
2291        if (cnt && st->jmp_history[cnt - 1].idx == i) {
2292                i = st->jmp_history[cnt - 1].prev_idx;
2293                (*history)--;
2294        } else {
2295                i--;
2296        }
2297        return i;
2298}
2299
2300static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2301{
2302        const struct btf_type *func;
2303        struct btf *desc_btf;
2304
2305        if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2306                return NULL;
2307
2308        desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
2309        if (IS_ERR(desc_btf))
2310                return "<error>";
2311
2312        func = btf_type_by_id(desc_btf, insn->imm);
2313        return btf_name_by_offset(desc_btf, func->name_off);
2314}
2315
2316/* For given verifier state backtrack_insn() is called from the last insn to
2317 * the first insn. Its purpose is to compute a bitmask of registers and
2318 * stack slots that needs precision in the parent verifier state.
2319 */
2320static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2321                          u32 *reg_mask, u64 *stack_mask)
2322{
2323        const struct bpf_insn_cbs cbs = {
2324                .cb_call        = disasm_kfunc_name,
2325                .cb_print       = verbose,
2326                .private_data   = env,
2327        };
2328        struct bpf_insn *insn = env->prog->insnsi + idx;
2329        u8 class = BPF_CLASS(insn->code);
2330        u8 opcode = BPF_OP(insn->code);
2331        u8 mode = BPF_MODE(insn->code);
2332        u32 dreg = 1u << insn->dst_reg;
2333        u32 sreg = 1u << insn->src_reg;
2334        u32 spi;
2335
2336        if (insn->code == 0)
2337                return 0;
2338        if (env->log.level & BPF_LOG_LEVEL) {
2339                verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2340                verbose(env, "%d: ", idx);
2341                print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2342        }
2343
2344        if (class == BPF_ALU || class == BPF_ALU64) {
2345                if (!(*reg_mask & dreg))
2346                        return 0;
2347                if (opcode == BPF_MOV) {
2348                        if (BPF_SRC(insn->code) == BPF_X) {
2349                                /* dreg = sreg
2350                                 * dreg needs precision after this insn
2351                                 * sreg needs precision before this insn
2352                                 */
2353                                *reg_mask &= ~dreg;
2354                                *reg_mask |= sreg;
2355                        } else {
2356                                /* dreg = K
2357                                 * dreg needs precision after this insn.
2358                                 * Corresponding register is already marked
2359                                 * as precise=true in this verifier state.
2360                                 * No further markings in parent are necessary
2361                                 */
2362                                *reg_mask &= ~dreg;
2363                        }
2364                } else {
2365                        if (BPF_SRC(insn->code) == BPF_X) {
2366                                /* dreg += sreg
2367                                 * both dreg and sreg need precision
2368                                 * before this insn
2369                                 */
2370                                *reg_mask |= sreg;
2371                        } /* else dreg += K
2372                           * dreg still needs precision before this insn
2373                           */
2374                }
2375        } else if (class == BPF_LDX) {
2376                if (!(*reg_mask & dreg))
2377                        return 0;
2378                *reg_mask &= ~dreg;
2379
2380                /* scalars can only be spilled into stack w/o losing precision.
2381                 * Load from any other memory can be zero extended.
2382                 * The desire to keep that precision is already indicated
2383                 * by 'precise' mark in corresponding register of this state.
2384                 * No further tracking necessary.
2385                 */
2386                if (insn->src_reg != BPF_REG_FP)
2387                        return 0;
2388
2389                /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2390                 * that [fp - off] slot contains scalar that needs to be
2391                 * tracked with precision
2392                 */
2393                spi = (-insn->off - 1) / BPF_REG_SIZE;
2394                if (spi >= 64) {
2395                        verbose(env, "BUG spi %d\n", spi);
2396                        WARN_ONCE(1, "verifier backtracking bug");
2397                        return -EFAULT;
2398                }
2399                *stack_mask |= 1ull << spi;
2400        } else if (class == BPF_STX || class == BPF_ST) {
2401                if (*reg_mask & dreg)
2402                        /* stx & st shouldn't be using _scalar_ dst_reg
2403                         * to access memory. It means backtracking
2404                         * encountered a case of pointer subtraction.
2405                         */
2406                        return -ENOTSUPP;
2407                /* scalars can only be spilled into stack */
2408                if (insn->dst_reg != BPF_REG_FP)
2409                        return 0;
2410                spi = (-insn->off - 1) / BPF_REG_SIZE;
2411                if (spi >= 64) {
2412                        verbose(env, "BUG spi %d\n", spi);
2413                        WARN_ONCE(1, "verifier backtracking bug");
2414                        return -EFAULT;
2415                }
2416                if (!(*stack_mask & (1ull << spi)))
2417                        return 0;
2418                *stack_mask &= ~(1ull << spi);
2419                if (class == BPF_STX)
2420                        *reg_mask |= sreg;
2421        } else if (class == BPF_JMP || class == BPF_JMP32) {
2422                if (opcode == BPF_CALL) {
2423                        if (insn->src_reg == BPF_PSEUDO_CALL)
2424                                return -ENOTSUPP;
2425                        /* regular helper call sets R0 */
2426                        *reg_mask &= ~1;
2427                        if (*reg_mask & 0x3f) {
2428                                /* if backtracing was looking for registers R1-R5
2429                                 * they should have been found already.
2430                                 */
2431                                verbose(env, "BUG regs %x\n", *reg_mask);
2432                                WARN_ONCE(1, "verifier backtracking bug");
2433                                return -EFAULT;
2434                        }
2435                } else if (opcode == BPF_EXIT) {
2436                        return -ENOTSUPP;
2437                }
2438        } else if (class == BPF_LD) {
2439                if (!(*reg_mask & dreg))
2440                        return 0;
2441                *reg_mask &= ~dreg;
2442                /* It's ld_imm64 or ld_abs or ld_ind.
2443                 * For ld_imm64 no further tracking of precision
2444                 * into parent is necessary
2445                 */
2446                if (mode == BPF_IND || mode == BPF_ABS)
2447                        /* to be analyzed */
2448                        return -ENOTSUPP;
2449        }
2450        return 0;
2451}
2452
2453/* the scalar precision tracking algorithm:
2454 * . at the start all registers have precise=false.
2455 * . scalar ranges are tracked as normal through alu and jmp insns.
2456 * . once precise value of the scalar register is used in:
2457 *   .  ptr + scalar alu
2458 *   . if (scalar cond K|scalar)
2459 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2460 *   backtrack through the verifier states and mark all registers and
2461 *   stack slots with spilled constants that these scalar regisers
2462 *   should be precise.
2463 * . during state pruning two registers (or spilled stack slots)
2464 *   are equivalent if both are not precise.
2465 *
2466 * Note the verifier cannot simply walk register parentage chain,
2467 * since many different registers and stack slots could have been
2468 * used to compute single precise scalar.
2469 *
2470 * The approach of starting with precise=true for all registers and then
2471 * backtrack to mark a register as not precise when the verifier detects
2472 * that program doesn't care about specific value (e.g., when helper
2473 * takes register as ARG_ANYTHING parameter) is not safe.
2474 *
2475 * It's ok to walk single parentage chain of the verifier states.
2476 * It's possible that this backtracking will go all the way till 1st insn.
2477 * All other branches will be explored for needing precision later.
2478 *
2479 * The backtracking needs to deal with cases like:
2480 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2481 * r9 -= r8
2482 * r5 = r9
2483 * if r5 > 0x79f goto pc+7
2484 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2485 * r5 += 1
2486 * ...
2487 * call bpf_perf_event_output#25
2488 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2489 *
2490 * and this case:
2491 * r6 = 1
2492 * call foo // uses callee's r6 inside to compute r0
2493 * r0 += r6
2494 * if r0 == 0 goto
2495 *
2496 * to track above reg_mask/stack_mask needs to be independent for each frame.
2497 *
2498 * Also if parent's curframe > frame where backtracking started,
2499 * the verifier need to mark registers in both frames, otherwise callees
2500 * may incorrectly prune callers. This is similar to
2501 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2502 *
2503 * For now backtracking falls back into conservative marking.
2504 */
2505static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2506                                     struct bpf_verifier_state *st)
2507{
2508        struct bpf_func_state *func;
2509        struct bpf_reg_state *reg;
2510        int i, j;
2511
2512        /* big hammer: mark all scalars precise in this path.
2513         * pop_stack may still get !precise scalars.
2514         */
2515        for (; st; st = st->parent)
2516                for (i = 0; i <= st->curframe; i++) {
2517                        func = st->frame[i];
2518                        for (j = 0; j < BPF_REG_FP; j++) {
2519                                reg = &func->regs[j];
2520                                if (reg->type != SCALAR_VALUE)
2521                                        continue;
2522                                reg->precise = true;
2523                        }
2524                        for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2525                                if (!is_spilled_reg(&func->stack[j]))
2526                                        continue;
2527                                reg = &func->stack[j].spilled_ptr;
2528                                if (reg->type != SCALAR_VALUE)
2529                                        continue;
2530                                reg->precise = true;
2531                        }
2532                }
2533}
2534
2535static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2536                                  int spi)
2537{
2538        struct bpf_verifier_state *st = env->cur_state;
2539        int first_idx = st->first_insn_idx;
2540        int last_idx = env->insn_idx;
2541        struct bpf_func_state *func;
2542        struct bpf_reg_state *reg;
2543        u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2544        u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2545        bool skip_first = true;
2546        bool new_marks = false;
2547        int i, err;
2548
2549        if (!env->bpf_capable)
2550                return 0;
2551
2552        func = st->frame[st->curframe];
2553        if (regno >= 0) {
2554                reg = &func->regs[regno];
2555                if (reg->type != SCALAR_VALUE) {
2556                        WARN_ONCE(1, "backtracing misuse");
2557                        return -EFAULT;
2558                }
2559                if (!reg->precise)
2560                        new_marks = true;
2561                else
2562                        reg_mask = 0;
2563                reg->precise = true;
2564        }
2565
2566        while (spi >= 0) {
2567                if (!is_spilled_reg(&func->stack[spi])) {
2568                        stack_mask = 0;
2569                        break;
2570                }
2571                reg = &func->stack[spi].spilled_ptr;
2572                if (reg->type != SCALAR_VALUE) {
2573                        stack_mask = 0;
2574                        break;
2575                }
2576                if (!reg->precise)
2577                        new_marks = true;
2578                else
2579                        stack_mask = 0;
2580                reg->precise = true;
2581                break;
2582        }
2583
2584        if (!new_marks)
2585                return 0;
2586        if (!reg_mask && !stack_mask)
2587                return 0;
2588        for (;;) {
2589                DECLARE_BITMAP(mask, 64);
2590                u32 history = st->jmp_history_cnt;
2591
2592                if (env->log.level & BPF_LOG_LEVEL)
2593                        verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2594                for (i = last_idx;;) {
2595                        if (skip_first) {
2596                                err = 0;
2597                                skip_first = false;
2598                        } else {
2599                                err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2600                        }
2601                        if (err == -ENOTSUPP) {
2602                                mark_all_scalars_precise(env, st);
2603                                return 0;
2604                        } else if (err) {
2605                                return err;
2606                        }
2607                        if (!reg_mask && !stack_mask)
2608                                /* Found assignment(s) into tracked register in this state.
2609                                 * Since this state is already marked, just return.
2610                                 * Nothing to be tracked further in the parent state.
2611                                 */
2612                                return 0;
2613                        if (i == first_idx)
2614                                break;
2615                        i = get_prev_insn_idx(st, i, &history);
2616                        if (i >= env->prog->len) {
2617                                /* This can happen if backtracking reached insn 0
2618                                 * and there are still reg_mask or stack_mask
2619                                 * to backtrack.
2620                                 * It means the backtracking missed the spot where
2621                                 * particular register was initialized with a constant.
2622                                 */
2623                                verbose(env, "BUG backtracking idx %d\n", i);
2624                                WARN_ONCE(1, "verifier backtracking bug");
2625                                return -EFAULT;
2626                        }
2627                }
2628                st = st->parent;
2629                if (!st)
2630                        break;
2631
2632                new_marks = false;
2633                func = st->frame[st->curframe];
2634                bitmap_from_u64(mask, reg_mask);
2635                for_each_set_bit(i, mask, 32) {
2636                        reg = &func->regs[i];
2637                        if (reg->type != SCALAR_VALUE) {
2638                                reg_mask &= ~(1u << i);
2639                                continue;
2640                        }
2641                        if (!reg->precise)
2642                                new_marks = true;
2643                        reg->precise = true;
2644                }
2645
2646                bitmap_from_u64(mask, stack_mask);
2647                for_each_set_bit(i, mask, 64) {
2648                        if (i >= func->allocated_stack / BPF_REG_SIZE) {
2649                                /* the sequence of instructions:
2650                                 * 2: (bf) r3 = r10
2651                                 * 3: (7b) *(u64 *)(r3 -8) = r0
2652                                 * 4: (79) r4 = *(u64 *)(r10 -8)
2653                                 * doesn't contain jmps. It's backtracked
2654                                 * as a single block.
2655                                 * During backtracking insn 3 is not recognized as
2656                                 * stack access, so at the end of backtracking
2657                                 * stack slot fp-8 is still marked in stack_mask.
2658                                 * However the parent state may not have accessed
2659                                 * fp-8 and it's "unallocated" stack space.
2660                                 * In such case fallback to conservative.
2661                                 */
2662                                mark_all_scalars_precise(env, st);
2663                                return 0;
2664                        }
2665
2666                        if (!is_spilled_reg(&func->stack[i])) {
2667                                stack_mask &= ~(1ull << i);
2668                                continue;
2669                        }
2670                        reg = &func->stack[i].spilled_ptr;
2671                        if (reg->type != SCALAR_VALUE) {
2672                                stack_mask &= ~(1ull << i);
2673                                continue;
2674                        }
2675                        if (!reg->precise)
2676                                new_marks = true;
2677                        reg->precise = true;
2678                }
2679                if (env->log.level & BPF_LOG_LEVEL) {
2680                        print_verifier_state(env, func);
2681                        verbose(env, "parent %s regs=%x stack=%llx marks\n",
2682                                new_marks ? "didn't have" : "already had",
2683                                reg_mask, stack_mask);
2684                }
2685
2686                if (!reg_mask && !stack_mask)
2687                        break;
2688                if (!new_marks)
2689                        break;
2690
2691                last_idx = st->last_insn_idx;
2692                first_idx = st->first_insn_idx;
2693        }
2694        return 0;
2695}
2696
2697static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2698{
2699        return __mark_chain_precision(env, regno, -1);
2700}
2701
2702static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2703{
2704        return __mark_chain_precision(env, -1, spi);
2705}
2706
2707static bool is_spillable_regtype(enum bpf_reg_type type)
2708{
2709        switch (type) {
2710        case PTR_TO_MAP_VALUE:
2711        case PTR_TO_MAP_VALUE_OR_NULL:
2712        case PTR_TO_STACK:
2713        case PTR_TO_CTX:
2714        case PTR_TO_PACKET:
2715        case PTR_TO_PACKET_META:
2716        case PTR_TO_PACKET_END:
2717        case PTR_TO_FLOW_KEYS:
2718        case CONST_PTR_TO_MAP:
2719        case PTR_TO_SOCKET:
2720        case PTR_TO_SOCKET_OR_NULL:
2721        case PTR_TO_SOCK_COMMON:
2722        case PTR_TO_SOCK_COMMON_OR_NULL:
2723        case PTR_TO_TCP_SOCK:
2724        case PTR_TO_TCP_SOCK_OR_NULL:
2725        case PTR_TO_XDP_SOCK:
2726        case PTR_TO_BTF_ID:
2727        case PTR_TO_BTF_ID_OR_NULL:
2728        case PTR_TO_RDONLY_BUF:
2729        case PTR_TO_RDONLY_BUF_OR_NULL:
2730        case PTR_TO_RDWR_BUF:
2731        case PTR_TO_RDWR_BUF_OR_NULL:
2732        case PTR_TO_PERCPU_BTF_ID:
2733        case PTR_TO_MEM:
2734        case PTR_TO_MEM_OR_NULL:
2735        case PTR_TO_FUNC:
2736        case PTR_TO_MAP_KEY:
2737                return true;
2738        default:
2739                return false;
2740        }
2741}
2742
2743/* Does this register contain a constant zero? */
2744static bool register_is_null(struct bpf_reg_state *reg)
2745{
2746        return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2747}
2748
2749static bool register_is_const(struct bpf_reg_state *reg)
2750{
2751        return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2752}
2753
2754static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2755{
2756        return tnum_is_unknown(reg->var_off) &&
2757               reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2758               reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2759               reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2760               reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2761}
2762
2763static bool register_is_bounded(struct bpf_reg_state *reg)
2764{
2765        return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2766}
2767
2768static bool __is_pointer_value(bool allow_ptr_leaks,
2769                               const struct bpf_reg_state *reg)
2770{
2771        if (allow_ptr_leaks)
2772                return false;
2773
2774        return reg->type != SCALAR_VALUE;
2775}
2776
2777static void save_register_state(struct bpf_func_state *state,
2778                                int spi, struct bpf_reg_state *reg,
2779                                int size)
2780{
2781        int i;
2782
2783        state->stack[spi].spilled_ptr = *reg;
2784        if (size == BPF_REG_SIZE)
2785                state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2786
2787        for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2788                state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2789
2790        /* size < 8 bytes spill */
2791        for (; i; i--)
2792                scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2793}
2794
2795/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2796 * stack boundary and alignment are checked in check_mem_access()
2797 */
2798static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2799                                       /* stack frame we're writing to */
2800                                       struct bpf_func_state *state,
2801                                       int off, int size, int value_regno,
2802                                       int insn_idx)
2803{
2804        struct bpf_func_state *cur; /* state of the current function */
2805        int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2806        u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2807        struct bpf_reg_state *reg = NULL;
2808
2809        err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
2810        if (err)
2811                return err;
2812        /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2813         * so it's aligned access and [off, off + size) are within stack limits
2814         */
2815        if (!env->allow_ptr_leaks &&
2816            state->stack[spi].slot_type[0] == STACK_SPILL &&
2817            size != BPF_REG_SIZE) {
2818                verbose(env, "attempt to corrupt spilled pointer on stack\n");
2819                return -EACCES;
2820        }
2821
2822        cur = env->cur_state->frame[env->cur_state->curframe];
2823        if (value_regno >= 0)
2824                reg = &cur->regs[value_regno];
2825        if (!env->bypass_spec_v4) {
2826                bool sanitize = reg && is_spillable_regtype(reg->type);
2827
2828                for (i = 0; i < size; i++) {
2829                        if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2830                                sanitize = true;
2831                                break;
2832                        }
2833                }
2834
2835                if (sanitize)
2836                        env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2837        }
2838
2839        if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2840            !register_is_null(reg) && env->bpf_capable) {
2841                if (dst_reg != BPF_REG_FP) {
2842                        /* The backtracking logic can only recognize explicit
2843                         * stack slot address like [fp - 8]. Other spill of
2844                         * scalar via different register has to be conservative.
2845                         * Backtrack from here and mark all registers as precise
2846                         * that contributed into 'reg' being a constant.
2847                         */
2848                        err = mark_chain_precision(env, value_regno);
2849                        if (err)
2850                                return err;
2851                }
2852                save_register_state(state, spi, reg, size);
2853        } else if (reg && is_spillable_regtype(reg->type)) {
2854                /* register containing pointer is being spilled into stack */
2855                if (size != BPF_REG_SIZE) {
2856                        verbose_linfo(env, insn_idx, "; ");
2857                        verbose(env, "invalid size of register spill\n");
2858                        return -EACCES;
2859                }
2860                if (state != cur && reg->type == PTR_TO_STACK) {
2861                        verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2862                        return -EINVAL;
2863                }
2864                save_register_state(state, spi, reg, size);
2865        } else {
2866                u8 type = STACK_MISC;
2867
2868                /* regular write of data into stack destroys any spilled ptr */
2869                state->stack[spi].spilled_ptr.type = NOT_INIT;
2870                /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2871                if (is_spilled_reg(&state->stack[spi]))
2872                        for (i = 0; i < BPF_REG_SIZE; i++)
2873                                scrub_spilled_slot(&state->stack[spi].slot_type[i]);
2874
2875                /* only mark the slot as written if all 8 bytes were written
2876                 * otherwise read propagation may incorrectly stop too soon
2877                 * when stack slots are partially written.
2878                 * This heuristic means that read propagation will be
2879                 * conservative, since it will add reg_live_read marks
2880                 * to stack slots all the way to first state when programs
2881                 * writes+reads less than 8 bytes
2882                 */
2883                if (size == BPF_REG_SIZE)
2884                        state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2885
2886                /* when we zero initialize stack slots mark them as such */
2887                if (reg && register_is_null(reg)) {
2888                        /* backtracking doesn't work for STACK_ZERO yet. */
2889                        err = mark_chain_precision(env, value_regno);
2890                        if (err)
2891                                return err;
2892                        type = STACK_ZERO;
2893                }
2894
2895                /* Mark slots affected by this stack write. */
2896                for (i = 0; i < size; i++)
2897                        state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2898                                type;
2899        }
2900        return 0;
2901}
2902
2903/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2904 * known to contain a variable offset.
2905 * This function checks whether the write is permitted and conservatively
2906 * tracks the effects of the write, considering that each stack slot in the
2907 * dynamic range is potentially written to.
2908 *
2909 * 'off' includes 'regno->off'.
2910 * 'value_regno' can be -1, meaning that an unknown value is being written to
2911 * the stack.
2912 *
2913 * Spilled pointers in range are not marked as written because we don't know
2914 * what's going to be actually written. This means that read propagation for
2915 * future reads cannot be terminated by this write.
2916 *
2917 * For privileged programs, uninitialized stack slots are considered
2918 * initialized by this write (even though we don't know exactly what offsets
2919 * are going to be written to). The idea is that we don't want the verifier to
2920 * reject future reads that access slots written to through variable offsets.
2921 */
2922static int check_stack_write_var_off(struct bpf_verifier_env *env,
2923                                     /* func where register points to */
2924                                     struct bpf_func_state *state,
2925                                     int ptr_regno, int off, int size,
2926                                     int value_regno, int insn_idx)
2927{
2928        struct bpf_func_state *cur; /* state of the current function */
2929        int min_off, max_off;
2930        int i, err;
2931        struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2932        bool writing_zero = false;
2933        /* set if the fact that we're writing a zero is used to let any
2934         * stack slots remain STACK_ZERO
2935         */
2936        bool zero_used = false;
2937
2938        cur = env->cur_state->frame[env->cur_state->curframe];
2939        ptr_reg = &cur->regs[ptr_regno];
2940        min_off = ptr_reg->smin_value + off;
2941        max_off = ptr_reg->smax_value + off + size;
2942        if (value_regno >= 0)
2943                value_reg = &cur->regs[value_regno];
2944        if (value_reg && register_is_null(value_reg))
2945                writing_zero = true;
2946
2947        err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
2948        if (err)
2949                return err;
2950
2951
2952        /* Variable offset writes destroy any spilled pointers in range. */
2953        for (i = min_off; i < max_off; i++) {
2954                u8 new_type, *stype;
2955                int slot, spi;
2956
2957                slot = -i - 1;
2958                spi = slot / BPF_REG_SIZE;
2959                stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2960
2961                if (!env->allow_ptr_leaks
2962                                && *stype != NOT_INIT
2963                                && *stype != SCALAR_VALUE) {
2964                        /* Reject the write if there's are spilled pointers in
2965                         * range. If we didn't reject here, the ptr status
2966                         * would be erased below (even though not all slots are
2967                         * actually overwritten), possibly opening the door to
2968                         * leaks.
2969                         */
2970                        verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2971                                insn_idx, i);
2972                        return -EINVAL;
2973                }
2974
2975                /* Erase all spilled pointers. */
2976                state->stack[spi].spilled_ptr.type = NOT_INIT;
2977
2978                /* Update the slot type. */
2979                new_type = STACK_MISC;
2980                if (writing_zero && *stype == STACK_ZERO) {
2981                        new_type = STACK_ZERO;
2982                        zero_used = true;
2983                }
2984                /* If the slot is STACK_INVALID, we check whether it's OK to
2985                 * pretend that it will be initialized by this write. The slot
2986                 * might not actually be written to, and so if we mark it as
2987                 * initialized future reads might leak uninitialized memory.
2988                 * For privileged programs, we will accept such reads to slots
2989                 * that may or may not be written because, if we're reject
2990                 * them, the error would be too confusing.
2991                 */
2992                if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2993                        verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2994                                        insn_idx, i);
2995                        return -EINVAL;
2996                }
2997                *stype = new_type;
2998        }
2999        if (zero_used) {
3000                /* backtracking doesn't work for STACK_ZERO yet. */
3001                err = mark_chain_precision(env, value_regno);
3002                if (err)
3003                        return err;
3004        }
3005        return 0;
3006}
3007
3008/* When register 'dst_regno' is assigned some values from stack[min_off,
3009 * max_off), we set the register's type according to the types of the
3010 * respective stack slots. If all the stack values are known to be zeros, then
3011 * so is the destination reg. Otherwise, the register is considered to be
3012 * SCALAR. This function does not deal with register filling; the caller must
3013 * ensure that all spilled registers in the stack range have been marked as
3014 * read.
3015 */
3016static void mark_reg_stack_read(struct bpf_verifier_env *env,
3017                                /* func where src register points to */
3018                                struct bpf_func_state *ptr_state,
3019                                int min_off, int max_off, int dst_regno)
3020{
3021        struct bpf_verifier_state *vstate = env->cur_state;
3022        struct bpf_func_state *state = vstate->frame[vstate->curframe];
3023        int i, slot, spi;
3024        u8 *stype;
3025        int zeros = 0;
3026
3027        for (i = min_off; i < max_off; i++) {
3028                slot = -i - 1;
3029                spi = slot / BPF_REG_SIZE;
3030                stype = ptr_state->stack[spi].slot_type;
3031                if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3032                        break;
3033                zeros++;
3034        }
3035        if (zeros == max_off - min_off) {
3036                /* any access_size read into register is zero extended,
3037                 * so the whole register == const_zero
3038                 */
3039                __mark_reg_const_zero(&state->regs[dst_regno]);
3040                /* backtracking doesn't support STACK_ZERO yet,
3041                 * so mark it precise here, so that later
3042                 * backtracking can stop here.
3043                 * Backtracking may not need this if this register
3044                 * doesn't participate in pointer adjustment.
3045                 * Forward propagation of precise flag is not
3046                 * necessary either. This mark is only to stop
3047                 * backtracking. Any register that contributed
3048                 * to const 0 was marked precise before spill.
3049                 */
3050                state->regs[dst_regno].precise = true;
3051        } else {
3052                /* have read misc data from the stack */
3053                mark_reg_unknown(env, state->regs, dst_regno);
3054        }
3055        state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3056}
3057
3058/* Read the stack at 'off' and put the results into the register indicated by
3059 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3060 * spilled reg.
3061 *
3062 * 'dst_regno' can be -1, meaning that the read value is not going to a
3063 * register.
3064 *
3065 * The access is assumed to be within the current stack bounds.
3066 */
3067static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3068                                      /* func where src register points to */
3069                                      struct bpf_func_state *reg_state,
3070                                      int off, int size, int dst_regno)
3071{
3072        struct bpf_verifier_state *vstate = env->cur_state;
3073        struct bpf_func_state *state = vstate->frame[vstate->curframe];
3074        int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3075        struct bpf_reg_state *reg;
3076        u8 *stype, type;
3077
3078        stype = reg_state->stack[spi].slot_type;
3079        reg = &reg_state->stack[spi].spilled_ptr;
3080
3081        if (is_spilled_reg(&reg_state->stack[spi])) {
3082                u8 spill_size = 1;
3083
3084                for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3085                        spill_size++;
3086
3087                if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3088                        if (reg->type != SCALAR_VALUE) {
3089                                verbose_linfo(env, env->insn_idx, "; ");
3090                                verbose(env, "invalid size of register fill\n");
3091                                return -EACCES;
3092                        }
3093
3094                        mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3095                        if (dst_regno < 0)
3096                                return 0;
3097
3098                        if (!(off % BPF_REG_SIZE) && size == spill_size) {
3099                                /* The earlier check_reg_arg() has decided the
3100                                 * subreg_def for this insn.  Save it first.
3101                                 */
3102                                s32 subreg_def = state->regs[dst_regno].subreg_def;
3103
3104                                state->regs[dst_regno] = *reg;
3105                                state->regs[dst_regno].subreg_def = subreg_def;
3106                        } else {
3107                                for (i = 0; i < size; i++) {
3108                                        type = stype[(slot - i) % BPF_REG_SIZE];
3109                                        if (type == STACK_SPILL)
3110                                                continue;
3111                                        if (type == STACK_MISC)
3112                                                continue;
3113                                        verbose(env, "invalid read from stack off %d+%d size %d\n",
3114                                                off, i, size);
3115                                        return -EACCES;
3116                                }
3117                                mark_reg_unknown(env, state->regs, dst_regno);
3118                        }
3119                        state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3120                        return 0;
3121                }
3122
3123                if (dst_regno >= 0) {
3124                        /* restore register state from stack */
3125                        state->regs[dst_regno] = *reg;
3126                        /* mark reg as written since spilled pointer state likely
3127                         * has its liveness marks cleared by is_state_visited()
3128                         * which resets stack/reg liveness for state transitions
3129                         */
3130                        state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3131                } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3132                        /* If dst_regno==-1, the caller is asking us whether
3133                         * it is acceptable to use this value as a SCALAR_VALUE
3134                         * (e.g. for XADD).
3135                         * We must not allow unprivileged callers to do that
3136                         * with spilled pointers.
3137                         */
3138                        verbose(env, "leaking pointer from stack off %d\n",
3139                                off);
3140                        return -EACCES;
3141                }
3142                mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3143        } else {
3144                for (i = 0; i < size; i++) {
3145                        type = stype[(slot - i) % BPF_REG_SIZE];
3146                        if (type == STACK_MISC)
3147                                continue;
3148                        if (type == STACK_ZERO)
3149                                continue;
3150                        verbose(env, "invalid read from stack off %d+%d size %d\n",
3151                                off, i, size);
3152                        return -EACCES;
3153                }
3154                mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3155                if (dst_regno >= 0)
3156                        mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3157        }
3158        return 0;
3159}
3160
3161enum stack_access_src {
3162        ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3163        ACCESS_HELPER = 2,  /* the access is performed by a helper */
3164};
3165
3166static int check_stack_range_initialized(struct bpf_verifier_env *env,
3167                                         int regno, int off, int access_size,
3168                                         bool zero_size_allowed,
3169                                         enum stack_access_src type,
3170                                         struct bpf_call_arg_meta *meta);
3171
3172static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3173{
3174        return cur_regs(env) + regno;
3175}
3176
3177/* Read the stack at 'ptr_regno + off' and put the result into the register
3178 * 'dst_regno'.
3179 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3180 * but not its variable offset.
3181 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3182 *
3183 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3184 * filling registers (i.e. reads of spilled register cannot be detected when
3185 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3186 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3187 * offset; for a fixed offset check_stack_read_fixed_off should be used
3188 * instead.
3189 */
3190static int check_stack_read_var_off(struct bpf_verifier_env *env,
3191                                    int ptr_regno, int off, int size, int dst_regno)
3192{
3193        /* The state of the source register. */
3194        struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3195        struct bpf_func_state *ptr_state = func(env, reg);
3196        int err;
3197        int min_off, max_off;
3198
3199        /* Note that we pass a NULL meta, so raw access will not be permitted.
3200         */
3201        err = check_stack_range_initialized(env, ptr_regno, off, size,
3202                                            false, ACCESS_DIRECT, NULL);
3203        if (err)
3204                return err;
3205
3206        min_off = reg->smin_value + off;
3207        max_off = reg->smax_value + off;
3208        mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3209        return 0;
3210}
3211
3212/* check_stack_read dispatches to check_stack_read_fixed_off or
3213 * check_stack_read_var_off.
3214 *
3215 * The caller must ensure that the offset falls within the allocated stack
3216 * bounds.
3217 *
3218 * 'dst_regno' is a register which will receive the value from the stack. It
3219 * can be -1, meaning that the read value is not going to a register.
3220 */
3221static int check_stack_read(struct bpf_verifier_env *env,
3222                            int ptr_regno, int off, int size,
3223                            int dst_regno)
3224{
3225        struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3226        struct bpf_func_state *state = func(env, reg);
3227        int err;
3228        /* Some accesses are only permitted with a static offset. */
3229        bool var_off = !tnum_is_const(reg->var_off);
3230
3231        /* The offset is required to be static when reads don't go to a
3232         * register, in order to not leak pointers (see
3233         * check_stack_read_fixed_off).
3234         */
3235        if (dst_regno < 0 && var_off) {
3236                char tn_buf[48];
3237
3238                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3239                verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3240                        tn_buf, off, size);
3241                return -EACCES;
3242        }
3243        /* Variable offset is prohibited for unprivileged mode for simplicity
3244         * since it requires corresponding support in Spectre masking for stack
3245         * ALU. See also retrieve_ptr_limit().
3246         */
3247        if (!env->bypass_spec_v1 && var_off) {
3248                char tn_buf[48];
3249
3250                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3251                verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3252                                ptr_regno, tn_buf);
3253                return -EACCES;
3254        }
3255
3256        if (!var_off) {
3257                off += reg->var_off.value;
3258                err = check_stack_read_fixed_off(env, state, off, size,
3259                                                 dst_regno);
3260        } else {
3261                /* Variable offset stack reads need more conservative handling
3262                 * than fixed offset ones. Note that dst_regno >= 0 on this
3263                 * branch.
3264                 */
3265                err = check_stack_read_var_off(env, ptr_regno, off, size,
3266                                               dst_regno);
3267        }
3268        return err;
3269}
3270
3271
3272/* check_stack_write dispatches to check_stack_write_fixed_off or
3273 * check_stack_write_var_off.
3274 *
3275 * 'ptr_regno' is the register used as a pointer into the stack.
3276 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3277 * 'value_regno' is the register whose value we're writing to the stack. It can
3278 * be -1, meaning that we're not writing from a register.
3279 *
3280 * The caller must ensure that the offset falls within the maximum stack size.
3281 */
3282static int check_stack_write(struct bpf_verifier_env *env,
3283                             int ptr_regno, int off, int size,
3284                             int value_regno, int insn_idx)
3285{
3286        struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3287        struct bpf_func_state *state = func(env, reg);
3288        int err;
3289
3290        if (tnum_is_const(reg->var_off)) {
3291                off += reg->var_off.value;
3292                err = check_stack_write_fixed_off(env, state, off, size,
3293                                                  value_regno, insn_idx);
3294        } else {
3295                /* Variable offset stack reads need more conservative handling
3296                 * than fixed offset ones.
3297                 */
3298                err = check_stack_write_var_off(env, state,
3299                                                ptr_regno, off, size,
3300                                                value_regno, insn_idx);
3301        }
3302        return err;
3303}
3304
3305static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3306                                 int off, int size, enum bpf_access_type type)
3307{
3308        struct bpf_reg_state *regs = cur_regs(env);
3309        struct bpf_map *map = regs[regno].map_ptr;
3310        u32 cap = bpf_map_flags_to_cap(map);
3311
3312        if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3313                verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3314                        map->value_size, off, size);
3315                return -EACCES;
3316        }
3317
3318        if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3319                verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3320                        map->value_size, off, size);
3321                return -EACCES;
3322        }
3323
3324        return 0;
3325}
3326
3327/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3328static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3329                              int off, int size, u32 mem_size,
3330                              bool zero_size_allowed)
3331{
3332        bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3333        struct bpf_reg_state *reg;
3334
3335        if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3336                return 0;
3337
3338        reg = &cur_regs(env)[regno];
3339        switch (reg->type) {
3340        case PTR_TO_MAP_KEY:
3341                verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3342                        mem_size, off, size);
3343                break;
3344        case PTR_TO_MAP_VALUE:
3345                verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3346                        mem_size, off, size);
3347                break;
3348        case PTR_TO_PACKET:
3349        case PTR_TO_PACKET_META:
3350        case PTR_TO_PACKET_END:
3351                verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3352                        off, size, regno, reg->id, off, mem_size);
3353                break;
3354        case PTR_TO_MEM:
3355        default:
3356                verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3357                        mem_size, off, size);
3358        }
3359
3360        return -EACCES;
3361}
3362
3363/* check read/write into a memory region with possible variable offset */
3364static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3365                                   int off, int size, u32 mem_size,
3366                                   bool zero_size_allowed)
3367{
3368        struct bpf_verifier_state *vstate = env->cur_state;
3369        struct bpf_func_state *state = vstate->frame[vstate->curframe];
3370        struct bpf_reg_state *reg = &state->regs[regno];
3371        int err;
3372
3373        /* We may have adjusted the register pointing to memory region, so we
3374         * need to try adding each of min_value and max_value to off
3375         * to make sure our theoretical access will be safe.
3376         */
3377        if (env->log.level & BPF_LOG_LEVEL)
3378                print_verifier_state(env, state);
3379
3380        /* The minimum value is only important with signed
3381         * comparisons where we can't assume the floor of a
3382         * value is 0.  If we are using signed variables for our
3383         * index'es we need to make sure that whatever we use
3384         * will have a set floor within our range.
3385         */
3386        if (reg->smin_value < 0 &&
3387            (reg->smin_value == S64_MIN ||
3388             (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3389              reg->smin_value + off < 0)) {
3390                verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3391                        regno);
3392                return -EACCES;
3393        }
3394        err = __check_mem_access(env, regno, reg->smin_value + off, size,
3395                                 mem_size, zero_size_allowed);
3396        if (err) {
3397                verbose(env, "R%d min value is outside of the allowed memory range\n",
3398                        regno);
3399                return err;
3400        }
3401
3402        /* If we haven't set a max value then we need to bail since we can't be
3403         * sure we won't do bad things.
3404         * If reg->umax_value + off could overflow, treat that as unbounded too.
3405         */
3406        if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3407                verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3408                        regno);
3409                return -EACCES;
3410        }
3411        err = __check_mem_access(env, regno, reg->umax_value + off, size,
3412                                 mem_size, zero_size_allowed);
3413        if (err) {
3414                verbose(env, "R%d max value is outside of the allowed memory range\n",
3415                        regno);
3416                return err;
3417        }
3418
3419        return 0;
3420}
3421
3422/* check read/write into a map element with possible variable offset */
3423static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3424                            int off, int size, bool zero_size_allowed)
3425{
3426        struct bpf_verifier_state *vstate = env->cur_state;
3427        struct bpf_func_state *state = vstate->frame[vstate->curframe];
3428        struct bpf_reg_state *reg = &state->regs[regno];
3429        struct bpf_map *map = reg->map_ptr;
3430        int err;
3431
3432        err = check_mem_region_access(env, regno, off, size, map->value_size,
3433                                      zero_size_allowed);
3434        if (err)
3435                return err;
3436
3437        if (map_value_has_spin_lock(map)) {
3438                u32 lock = map->spin_lock_off;
3439
3440                /* if any part of struct bpf_spin_lock can be touched by
3441                 * load/store reject this program.
3442                 * To check that [x1, x2) overlaps with [y1, y2)
3443                 * it is sufficient to check x1 < y2 && y1 < x2.
3444                 */
3445                if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3446                     lock < reg->umax_value + off + size) {
3447                        verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3448                        return -EACCES;
3449                }
3450        }
3451        if (map_value_has_timer(map)) {
3452                u32 t = map->timer_off;
3453
3454                if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3455                     t < reg->umax_value + off + size) {
3456                        verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3457                        return -EACCES;
3458                }
3459        }
3460        return err;
3461}
3462
3463#define MAX_PACKET_OFF 0xffff
3464
3465static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
3466{
3467        return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
3468}
3469
3470static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3471                                       const struct bpf_call_arg_meta *meta,
3472                                       enum bpf_access_type t)
3473{
3474        enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3475
3476        switch (prog_type) {
3477        /* Program types only with direct read access go here! */
3478        case BPF_PROG_TYPE_LWT_IN:
3479        case BPF_PROG_TYPE_LWT_OUT:
3480        case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3481        case BPF_PROG_TYPE_SK_REUSEPORT:
3482        case BPF_PROG_TYPE_FLOW_DISSECTOR:
3483        case BPF_PROG_TYPE_CGROUP_SKB:
3484                if (t == BPF_WRITE)
3485                        return false;
3486                fallthrough;
3487
3488        /* Program types with direct read + write access go here! */
3489        case BPF_PROG_TYPE_SCHED_CLS:
3490        case BPF_PROG_TYPE_SCHED_ACT:
3491        case BPF_PROG_TYPE_XDP:
3492        case BPF_PROG_TYPE_LWT_XMIT:
3493        case BPF_PROG_TYPE_SK_SKB:
3494        case BPF_PROG_TYPE_SK_MSG:
3495                if (meta)
3496                        return meta->pkt_access;
3497
3498                env->seen_direct_write = true;
3499                return true;
3500
3501        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3502                if (t == BPF_WRITE)
3503                        env->seen_direct_write = true;
3504
3505                return true;
3506
3507        default:
3508                return false;
3509        }
3510}
3511
3512static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3513                               int size, bool zero_size_allowed)
3514{
3515        struct bpf_reg_state *regs = cur_regs(env);
3516        struct bpf_reg_state *reg = &regs[regno];
3517        int err;
3518
3519        /* We may have added a variable offset to the packet pointer; but any
3520         * reg->range we have comes after that.  We are only checking the fixed
3521         * offset.
3522         */
3523
3524        /* We don't allow negative numbers, because we aren't tracking enough
3525         * detail to prove they're safe.
3526         */
3527        if (reg->smin_value < 0) {
3528                verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3529                        regno);
3530                return -EACCES;
3531        }
3532
3533        err = reg->range < 0 ? -EINVAL :
3534              __check_mem_access(env, regno, off, size, reg->range,
3535                                 zero_size_allowed);
3536        if (err) {
3537                verbose(env, "R%d offset is outside of the packet\n", regno);
3538                return err;
3539        }
3540
3541        /* __check_mem_access has made sure "off + size - 1" is within u16.
3542         * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3543         * otherwise find_good_pkt_pointers would have refused to set range info
3544         * that __check_mem_access would have rejected this pkt access.
3545         * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3546         */
3547        env->prog->aux->max_pkt_offset =
3548                max_t(u32, env->prog->aux->max_pkt_offset,
3549                      off + reg->umax_value + size - 1);
3550
3551        return err;
3552}
3553
3554/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3555static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3556                            enum bpf_access_type t, enum bpf_reg_type *reg_type,
3557                            struct btf **btf, u32 *btf_id)
3558{
3559        struct bpf_insn_access_aux info = {
3560                .reg_type = *reg_type,
3561                .log = &env->log,
3562        };
3563
3564        if (env->ops->is_valid_access &&
3565            env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3566                /* A non zero info.ctx_field_size indicates that this field is a
3567                 * candidate for later verifier transformation to load the whole
3568                 * field and then apply a mask when accessed with a narrower
3569                 * access than actual ctx access size. A zero info.ctx_field_size
3570                 * will only allow for whole field access and rejects any other
3571                 * type of narrower access.
3572                 */
3573                *reg_type = info.reg_type;
3574
3575                if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
3576                        *btf = info.btf;
3577                        *btf_id = info.btf_id;
3578                } else {
3579                        env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3580                }
3581                /* remember the offset of last byte accessed in ctx */
3582                if (env->prog->aux->max_ctx_offset < off + size)
3583                        env->prog->aux->max_ctx_offset = off + size;
3584                return 0;
3585        }
3586
3587        verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3588        return -EACCES;
3589}
3590
3591static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3592                                  int size)
3593{
3594        if (size < 0 || off < 0 ||
3595            (u64)off + size > sizeof(struct bpf_flow_keys)) {
3596                verbose(env, "invalid access to flow keys off=%d size=%d\n",
3597                        off, size);
3598                return -EACCES;
3599        }
3600        return 0;
3601}
3602
3603static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3604                             u32 regno, int off, int size,
3605                             enum bpf_access_type t)
3606{
3607        struct bpf_reg_state *regs = cur_regs(env);
3608        struct bpf_reg_state *reg = &regs[regno];
3609        struct bpf_insn_access_aux info = {};
3610        bool valid;
3611
3612        if (reg->smin_value < 0) {
3613                verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3614                        regno);
3615                return -EACCES;
3616        }
3617
3618        switch (reg->type) {
3619        case PTR_TO_SOCK_COMMON:
3620                valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3621                break;
3622        case PTR_TO_SOCKET:
3623                valid = bpf_sock_is_valid_access(off, size, t, &info);
3624                break;
3625        case PTR_TO_TCP_SOCK:
3626                valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3627                break;
3628        case PTR_TO_XDP_SOCK:
3629                valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3630                break;
3631        default:
3632                valid = false;
3633        }
3634
3635
3636        if (valid) {
3637                env->insn_aux_data[insn_idx].ctx_field_size =
3638                        info.ctx_field_size;
3639                return 0;
3640        }
3641
3642        verbose(env, "R%d invalid %s access off=%d size=%d\n",
3643                regno, reg_type_str[reg->type], off, size);
3644
3645        return -EACCES;
3646}
3647
3648static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3649{
3650        return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3651}
3652
3653static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3654{
3655        const struct bpf_reg_state *reg = reg_state(env, regno);
3656
3657        return reg->type == PTR_TO_CTX;
3658}
3659
3660static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3661{
3662        const struct bpf_reg_state *reg = reg_state(env, regno);
3663
3664        return type_is_sk_pointer(reg->type);
3665}
3666
3667static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3668{
3669        const struct bpf_reg_state *reg = reg_state(env, regno);
3670
3671        return type_is_pkt_pointer(reg->type);
3672}
3673
3674static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3675{
3676        const struct bpf_reg_state *reg = reg_state(env, regno);
3677
3678        /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3679        return reg->type == PTR_TO_FLOW_KEYS;
3680}
3681
3682static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3683                                   const struct bpf_reg_state *reg,
3684                                   int off, int size, bool strict)
3685{
3686        struct tnum reg_off;
3687        int ip_align;
3688
3689        /* Byte size accesses are always allowed. */
3690        if (!strict || size == 1)
3691                return 0;
3692
3693        /* For platforms that do not have a Kconfig enabling
3694         * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3695         * NET_IP_ALIGN is universally set to '2'.  And on platforms
3696         * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3697         * to this code only in strict mode where we want to emulate
3698         * the NET_IP_ALIGN==2 checking.  Therefore use an
3699         * unconditional IP align value of '2'.
3700         */
3701        ip_align = 2;
3702
3703        reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3704        if (!tnum_is_aligned(reg_off, size)) {
3705                char tn_buf[48];
3706
3707                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3708                verbose(env,
3709                        "misaligned packet access off %d+%s+%d+%d size %d\n",
3710                        ip_align, tn_buf, reg->off, off, size);
3711                return -EACCES;
3712        }
3713
3714        return 0;
3715}
3716
3717static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3718                                       const struct bpf_reg_state *reg,
3719                                       const char *pointer_desc,
3720                                       int off, int size, bool strict)
3721{
3722        struct tnum reg_off;
3723
3724        /* Byte size accesses are always allowed. */
3725        if (!strict || size == 1)
3726                return 0;
3727
3728        reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3729        if (!tnum_is_aligned(reg_off, size)) {
3730                char tn_buf[48];
3731
3732                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3733                verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3734                        pointer_desc, tn_buf, reg->off, off, size);
3735                return -EACCES;
3736        }
3737
3738        return 0;
3739}
3740
3741static int check_ptr_alignment(struct bpf_verifier_env *env,
3742                               const struct bpf_reg_state *reg, int off,
3743                               int size, bool strict_alignment_once)
3744{
3745        bool strict = env->strict_alignment || strict_alignment_once;
3746        const char *pointer_desc = "";
3747
3748        switch (reg->type) {
3749        case PTR_TO_PACKET:
3750        case PTR_TO_PACKET_META:
3751                /* Special case, because of NET_IP_ALIGN. Given metadata sits
3752                 * right in front, treat it the very same way.
3753                 */
3754                return check_pkt_ptr_alignment(env, reg, off, size, strict);
3755        case PTR_TO_FLOW_KEYS:
3756                pointer_desc = "flow keys ";
3757                break;
3758        case PTR_TO_MAP_KEY:
3759                pointer_desc = "key ";
3760                break;
3761        case PTR_TO_MAP_VALUE:
3762                pointer_desc = "value ";
3763                break;
3764        case PTR_TO_CTX:
3765                pointer_desc = "context ";
3766                break;
3767        case PTR_TO_STACK:
3768                pointer_desc = "stack ";
3769                /* The stack spill tracking logic in check_stack_write_fixed_off()
3770                 * and check_stack_read_fixed_off() relies on stack accesses being
3771                 * aligned.
3772                 */
3773                strict = true;
3774                break;
3775        case PTR_TO_SOCKET:
3776                pointer_desc = "sock ";
3777                break;
3778        case PTR_TO_SOCK_COMMON:
3779                pointer_desc = "sock_common ";
3780                break;
3781        case PTR_TO_TCP_SOCK:
3782                pointer_desc = "tcp_sock ";
3783                break;
3784        case PTR_TO_XDP_SOCK:
3785                pointer_desc = "xdp_sock ";
3786                break;
3787        default:
3788                break;
3789        }
3790        return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3791                                           strict);
3792}
3793
3794static int update_stack_depth(struct bpf_verifier_env *env,
3795                              const struct bpf_func_state *func,
3796                              int off)
3797{
3798        u16 stack = env->subprog_info[func->subprogno].stack_depth;
3799
3800        if (stack >= -off)
3801                return 0;
3802
3803        /* update known max for given subprogram */
3804        env->subprog_info[func->subprogno].stack_depth = -off;
3805        return 0;
3806}
3807
3808/* starting from main bpf function walk all instructions of the function
3809 * and recursively walk all callees that given function can call.
3810 * Ignore jump and exit insns.
3811 * Since recursion is prevented by check_cfg() this algorithm
3812 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3813 */
3814static int check_max_stack_depth(struct bpf_verifier_env *env)
3815{
3816        int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3817        struct bpf_subprog_info *subprog = env->subprog_info;
3818        struct bpf_insn *insn = env->prog->insnsi;
3819        bool tail_call_reachable = false;
3820        int ret_insn[MAX_CALL_FRAMES];
3821        int ret_prog[MAX_CALL_FRAMES];
3822        int j;
3823
3824process_func:
3825        /* protect against potential stack overflow that might happen when
3826         * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3827         * depth for such case down to 256 so that the worst case scenario
3828         * would result in 8k stack size (32 which is tailcall limit * 256 =
3829         * 8k).
3830         *
3831         * To get the idea what might happen, see an example:
3832         * func1 -> sub rsp, 128
3833         *  subfunc1 -> sub rsp, 256
3834         *  tailcall1 -> add rsp, 256
3835         *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3836         *   subfunc2 -> sub rsp, 64
3837         *   subfunc22 -> sub rsp, 128
3838         *   tailcall2 -> add rsp, 128
3839         *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3840         *
3841         * tailcall will unwind the current stack frame but it will not get rid
3842         * of caller's stack as shown on the example above.
3843         */
3844        if (idx && subprog[idx].has_tail_call && depth >= 256) {
3845                verbose(env,
3846                        "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3847                        depth);
3848                return -EACCES;
3849        }
3850        /* round up to 32-bytes, since this is granularity
3851         * of interpreter stack size
3852         */
3853        depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3854        if (depth > MAX_BPF_STACK) {
3855                verbose(env, "combined stack size of %d calls is %d. Too large\n",
3856                        frame + 1, depth);
3857                return -EACCES;
3858        }
3859continue_func:
3860        subprog_end = subprog[idx + 1].start;
3861        for (; i < subprog_end; i++) {
3862                int next_insn;
3863
3864                if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
3865                        continue;
3866                /* remember insn and function to return to */
3867                ret_insn[frame] = i + 1;
3868                ret_prog[frame] = idx;
3869
3870                /* find the callee */
3871                next_insn = i + insn[i].imm + 1;
3872                idx = find_subprog(env, next_insn);
3873                if (idx < 0) {
3874                        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3875                                  next_insn);
3876                        return -EFAULT;
3877                }
3878                if (subprog[idx].is_async_cb) {
3879                        if (subprog[idx].has_tail_call) {
3880                                verbose(env, "verifier bug. subprog has tail_call and async cb\n");
3881                                return -EFAULT;
3882                        }
3883                         /* async callbacks don't increase bpf prog stack size */
3884                        continue;
3885                }
3886                i = next_insn;
3887
3888                if (subprog[idx].has_tail_call)
3889                        tail_call_reachable = true;
3890
3891                frame++;
3892                if (frame >= MAX_CALL_FRAMES) {
3893                        verbose(env, "the call stack of %d frames is too deep !\n",
3894                                frame);
3895                        return -E2BIG;
3896                }
3897                goto process_func;
3898        }
3899        /* if tail call got detected across bpf2bpf calls then mark each of the
3900         * currently present subprog frames as tail call reachable subprogs;
3901         * this info will be utilized by JIT so that we will be preserving the
3902         * tail call counter throughout bpf2bpf calls combined with tailcalls
3903         */
3904        if (tail_call_reachable)
3905                for (j = 0; j < frame; j++)
3906                        subprog[ret_prog[j]].tail_call_reachable = true;
3907        if (subprog[0].tail_call_reachable)
3908                env->prog->aux->tail_call_reachable = true;
3909
3910        /* end of for() loop means the last insn of the 'subprog'
3911         * was reached. Doesn't matter whether it was JA or EXIT
3912         */
3913        if (frame == 0)
3914                return 0;
3915        depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3916        frame--;
3917        i = ret_insn[frame];
3918        idx = ret_prog[frame];
3919        goto continue_func;
3920}
3921
3922#ifndef CONFIG_BPF_JIT_ALWAYS_ON
3923static int get_callee_stack_depth(struct bpf_verifier_env *env,
3924                                  const struct bpf_insn *insn, int idx)
3925{
3926        int start = idx + insn->imm + 1, subprog;
3927
3928        subprog = find_subprog(env, start);
3929        if (subprog < 0) {
3930                WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3931                          start);
3932                return -EFAULT;
3933        }
3934        return env->subprog_info[subprog].stack_depth;
3935}
3936#endif
3937
3938int check_ctx_reg(struct bpf_verifier_env *env,
3939                  const struct bpf_reg_state *reg, int regno)
3940{
3941        /* Access to ctx or passing it to a helper is only allowed in
3942         * its original, unmodified form.
3943         */
3944
3945        if (reg->off) {
3946                verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3947                        regno, reg->off);
3948                return -EACCES;
3949        }
3950
3951        if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3952                char tn_buf[48];
3953
3954                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3955                verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3956                return -EACCES;
3957        }
3958
3959        return 0;
3960}
3961
3962static int __check_buffer_access(struct bpf_verifier_env *env,
3963                                 const char *buf_info,
3964                                 const struct bpf_reg_state *reg,
3965                                 int regno, int off, int size)
3966{
3967        if (off < 0) {
3968                verbose(env,
3969                        "R%d invalid %s buffer access: off=%d, size=%d\n",
3970                        regno, buf_info, off, size);
3971                return -EACCES;
3972        }
3973        if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3974                char tn_buf[48];
3975
3976                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3977                verbose(env,
3978                        "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3979                        regno, off, tn_buf);
3980                return -EACCES;
3981        }
3982
3983        return 0;
3984}
3985
3986static int check_tp_buffer_access(struct bpf_verifier_env *env,
3987                                  const struct bpf_reg_state *reg,
3988                                  int regno, int off, int size)
3989{
3990        int err;
3991
3992        err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3993        if (err)
3994                return err;
3995
3996        if (off + size > env->prog->aux->max_tp_access)
3997                env->prog->aux->max_tp_access = off + size;
3998
3999        return 0;
4000}
4001
4002static int check_buffer_access(struct bpf_verifier_env *env,
4003                               const struct bpf_reg_state *reg,
4004                               int regno, int off, int size,
4005                               bool zero_size_allowed,
4006                               const char *buf_info,
4007                               u32 *max_access)
4008{
4009        int err;
4010
4011        err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4012        if (err)
4013                return err;
4014
4015        if (off + size > *max_access)
4016                *max_access = off + size;
4017
4018        return 0;
4019}
4020
4021/* BPF architecture zero extends alu32 ops into 64-bit registesr */
4022static void zext_32_to_64(struct bpf_reg_state *reg)
4023{
4024        reg->var_off = tnum_subreg(reg->var_off);
4025        __reg_assign_32_into_64(reg);
4026}
4027
4028/* truncate register to smaller size (in bytes)
4029 * must be called with size < BPF_REG_SIZE
4030 */
4031static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4032{
4033        u64 mask;
4034
4035        /* clear high bits in bit representation */
4036        reg->var_off = tnum_cast(reg->var_off, size);
4037
4038        /* fix arithmetic bounds */
4039        mask = ((u64)1 << (size * 8)) - 1;
4040        if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4041                reg->umin_value &= mask;
4042                reg->umax_value &= mask;
4043        } else {
4044                reg->umin_value = 0;
4045                reg->umax_value = mask;
4046        }
4047        reg->smin_value = reg->umin_value;
4048        reg->smax_value = reg->umax_value;
4049
4050        /* If size is smaller than 32bit register the 32bit register
4051         * values are also truncated so we push 64-bit bounds into
4052         * 32-bit bounds. Above were truncated < 32-bits already.
4053         */
4054        if (size >= 4)
4055                return;
4056        __reg_combine_64_into_32(reg);
4057}
4058
4059static bool bpf_map_is_rdonly(const struct bpf_map *map)
4060{
4061        /* A map is considered read-only if the following condition are true:
4062         *
4063         * 1) BPF program side cannot change any of the map content. The
4064         *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4065         *    and was set at map creation time.
4066         * 2) The map value(s) have been initialized from user space by a
4067         *    loader and then "frozen", such that no new map update/delete
4068         *    operations from syscall side are possible for the rest of
4069         *    the map's lifetime from that point onwards.
4070         * 3) Any parallel/pending map update/delete operations from syscall
4071         *    side have been completed. Only after that point, it's safe to
4072         *    assume that map value(s) are immutable.
4073         */
4074        return (map->map_flags & BPF_F_RDONLY_PROG) &&
4075               READ_ONCE(map->frozen) &&
4076               !bpf_map_write_active(map);
4077}
4078
4079static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4080{
4081        void *ptr;
4082        u64 addr;
4083        int err;
4084
4085        err = map->ops->map_direct_value_addr(map, &addr, off);
4086        if (err)
4087                return err;
4088        ptr = (void *)(long)addr + off;
4089
4090        switch (size) {
4091        case sizeof(u8):
4092                *val = (u64)*(u8 *)ptr;
4093                break;
4094        case sizeof(u16):
4095                *val = (u64)*(u16 *)ptr;
4096                break;
4097        case sizeof(u32):
4098                *val = (u64)*(u32 *)ptr;
4099                break;
4100        case sizeof(u64):
4101                *val = *(u64 *)ptr;
4102                break;
4103        default:
4104                return -EINVAL;
4105        }
4106        return 0;
4107}
4108
4109static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4110                                   struct bpf_reg_state *regs,
4111                                   int regno, int off, int size,
4112                                   enum bpf_access_type atype,
4113                                   int value_regno)
4114{
4115        struct bpf_reg_state *reg = regs + regno;
4116        const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4117        const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4118        u32 btf_id;
4119        int ret;
4120
4121        if (off < 0) {
4122                verbose(env,
4123                        "R%d is ptr_%s invalid negative access: off=%d\n",
4124                        regno, tname, off);
4125                return -EACCES;
4126        }
4127        if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4128                char tn_buf[48];
4129
4130                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4131                verbose(env,
4132                        "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4133                        regno, tname, off, tn_buf);
4134                return -EACCES;
4135        }
4136
4137        if (env->ops->btf_struct_access) {
4138                ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4139                                                  off, size, atype, &btf_id);
4140        } else {
4141                if (atype != BPF_READ) {
4142                        verbose(env, "only read is supported\n");
4143                        return -EACCES;
4144                }
4145
4146                ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4147                                        atype, &btf_id);
4148        }
4149
4150        if (ret < 0)
4151                return ret;
4152
4153        if (atype == BPF_READ && value_regno >= 0)
4154                mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
4155
4156        return 0;
4157}
4158
4159static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4160                                   struct bpf_reg_state *regs,
4161                                   int regno, int off, int size,
4162                                   enum bpf_access_type atype,
4163                                   int value_regno)
4164{
4165        struct bpf_reg_state *reg = regs + regno;
4166        struct bpf_map *map = reg->map_ptr;
4167        const struct btf_type *t;
4168        const char *tname;
4169        u32 btf_id;
4170        int ret;
4171
4172        if (!btf_vmlinux) {
4173                verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4174                return -ENOTSUPP;
4175        }
4176
4177        if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4178                verbose(env, "map_ptr access not supported for map type %d\n",
4179                        map->map_type);
4180                return -ENOTSUPP;
4181        }
4182
4183        t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4184        tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4185
4186        if (!env->allow_ptr_to_map_access) {
4187                verbose(env,
4188                        "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4189                        tname);
4190                return -EPERM;
4191        }
4192
4193        if (off < 0) {
4194                verbose(env, "R%d is %s invalid negative access: off=%d\n",
4195                        regno, tname, off);
4196                return -EACCES;
4197        }
4198
4199        if (atype != BPF_READ) {
4200                verbose(env, "only read from %s is supported\n", tname);
4201                return -EACCES;
4202        }
4203
4204        ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
4205        if (ret < 0)
4206                return ret;
4207
4208        if (value_regno >= 0)
4209                mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
4210
4211        return 0;
4212}
4213
4214/* Check that the stack access at the given offset is within bounds. The
4215 * maximum valid offset is -1.
4216 *
4217 * The minimum valid offset is -MAX_BPF_STACK for writes, and
4218 * -state->allocated_stack for reads.
4219 */
4220static int check_stack_slot_within_bounds(int off,
4221                                          struct bpf_func_state *state,
4222                                          enum bpf_access_type t)
4223{
4224        int min_valid_off;
4225
4226        if (t == BPF_WRITE)
4227                min_valid_off = -MAX_BPF_STACK;
4228        else
4229                min_valid_off = -state->allocated_stack;
4230
4231        if (off < min_valid_off || off > -1)
4232                return -EACCES;
4233        return 0;
4234}
4235
4236/* Check that the stack access at 'regno + off' falls within the maximum stack
4237 * bounds.
4238 *
4239 * 'off' includes `regno->offset`, but not its dynamic part (if any).
4240 */
4241static int check_stack_access_within_bounds(
4242                struct bpf_verifier_env *env,
4243                int regno, int off, int access_size,
4244                enum stack_access_src src, enum bpf_access_type type)
4245{
4246        struct bpf_reg_state *regs = cur_regs(env);
4247        struct bpf_reg_state *reg = regs + regno;
4248        struct bpf_func_state *state = func(env, reg);
4249        int min_off, max_off;
4250        int err;
4251        char *err_extra;
4252
4253        if (src == ACCESS_HELPER)
4254                /* We don't know if helpers are reading or writing (or both). */
4255                err_extra = " indirect access to";
4256        else if (type == BPF_READ)
4257                err_extra = " read from";
4258        else
4259                err_extra = " write to";
4260
4261        if (tnum_is_const(reg->var_off)) {
4262                min_off = reg->var_off.value + off;
4263                if (access_size > 0)
4264                        max_off = min_off + access_size - 1;
4265                else
4266                        max_off = min_off;
4267        } else {
4268                if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4269                    reg->smin_value <= -BPF_MAX_VAR_OFF) {
4270                        verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4271                                err_extra, regno);
4272                        return -EACCES;
4273                }
4274                min_off = reg->smin_value + off;
4275                if (access_size > 0)
4276                        max_off = reg->smax_value + off + access_size - 1;
4277                else
4278                        max_off = min_off;
4279        }
4280
4281        err = check_stack_slot_within_bounds(min_off, state, type);
4282        if (!err)
4283                err = check_stack_slot_within_bounds(max_off, state, type);
4284
4285        if (err) {
4286                if (tnum_is_const(reg->var_off)) {
4287                        verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4288                                err_extra, regno, off, access_size);
4289                } else {
4290                        char tn_buf[48];
4291
4292                        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4293                        verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4294                                err_extra, regno, tn_buf, access_size);
4295                }
4296        }
4297        return err;
4298}
4299
4300/* check whether memory at (regno + off) is accessible for t = (read | write)
4301 * if t==write, value_regno is a register which value is stored into memory
4302 * if t==read, value_regno is a register which will receive the value from memory
4303 * if t==write && value_regno==-1, some unknown value is stored into memory
4304 * if t==read && value_regno==-1, don't care what we read from memory
4305 */
4306static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4307                            int off, int bpf_size, enum bpf_access_type t,
4308                            int value_regno, bool strict_alignment_once)
4309{
4310        struct bpf_reg_state *regs = cur_regs(env);
4311        struct bpf_reg_state *reg = regs + regno;
4312        struct bpf_func_state *state;
4313        int size, err = 0;
4314
4315        size = bpf_size_to_bytes(bpf_size);
4316        if (size < 0)
4317                return size;
4318
4319        /* alignment checks will add in reg->off themselves */
4320        err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4321        if (err)
4322                return err;
4323
4324        /* for access checks, reg->off is just part of off */
4325        off += reg->off;
4326
4327        if (reg->type == PTR_TO_MAP_KEY) {
4328                if (t == BPF_WRITE) {
4329                        verbose(env, "write to change key R%d not allowed\n", regno);
4330                        return -EACCES;
4331                }
4332
4333                err = check_mem_region_access(env, regno, off, size,
4334                                              reg->map_ptr->key_size, false);
4335                if (err)
4336                        return err;
4337                if (value_regno >= 0)
4338                        mark_reg_unknown(env, regs, value_regno);
4339        } else if (reg->type == PTR_TO_MAP_VALUE) {
4340                if (t == BPF_WRITE && value_regno >= 0 &&
4341                    is_pointer_value(env, value_regno)) {
4342                        verbose(env, "R%d leaks addr into map\n", value_regno);
4343                        return -EACCES;
4344                }
4345                err = check_map_access_type(env, regno, off, size, t);
4346                if (err)
4347                        return err;
4348                err = check_map_access(env, regno, off, size, false);
4349                if (!err && t == BPF_READ && value_regno >= 0) {
4350                        struct bpf_map *map = reg->map_ptr;
4351
4352                        /* if map is read-only, track its contents as scalars */
4353                        if (tnum_is_const(reg->var_off) &&
4354                            bpf_map_is_rdonly(map) &&
4355                            map->ops->map_direct_value_addr) {
4356                                int map_off = off + reg->var_off.value;
4357                                u64 val = 0;
4358
4359                                err = bpf_map_direct_read(map, map_off, size,
4360                                                          &val);
4361                                if (err)
4362                                        return err;
4363
4364                                regs[value_regno].type = SCALAR_VALUE;
4365                                __mark_reg_known(&regs[value_regno], val);
4366                        } else {
4367                                mark_reg_unknown(env, regs, value_regno);
4368                        }
4369                }
4370        } else if (reg->type == PTR_TO_MEM) {
4371                if (t == BPF_WRITE && value_regno >= 0 &&
4372                    is_pointer_value(env, value_regno)) {
4373                        verbose(env, "R%d leaks addr into mem\n", value_regno);
4374                        return -EACCES;
4375                }
4376                err = check_mem_region_access(env, regno, off, size,
4377                                              reg->mem_size, false);
4378                if (!err && t == BPF_READ && value_regno >= 0)
4379                        mark_reg_unknown(env, regs, value_regno);
4380        } else if (reg->type == PTR_TO_CTX) {
4381                enum bpf_reg_type reg_type = SCALAR_VALUE;
4382                struct btf *btf = NULL;
4383                u32 btf_id = 0;
4384
4385                if (t == BPF_WRITE && value_regno >= 0 &&
4386                    is_pointer_value(env, value_regno)) {
4387                        verbose(env, "R%d leaks addr into ctx\n", value_regno);
4388                        return -EACCES;
4389                }
4390
4391                err = check_ctx_reg(env, reg, regno);
4392                if (err < 0)
4393                        return err;
4394
4395                err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
4396                if (err)
4397                        verbose_linfo(env, insn_idx, "; ");
4398                if (!err && t == BPF_READ && value_regno >= 0) {
4399                        /* ctx access returns either a scalar, or a
4400                         * PTR_TO_PACKET[_META,_END]. In the latter
4401                         * case, we know the offset is zero.
4402                         */
4403                        if (reg_type == SCALAR_VALUE) {
4404                                mark_reg_unknown(env, regs, value_regno);
4405                        } else {
4406                                mark_reg_known_zero(env, regs,
4407                                                    value_regno);
4408                                if (reg_type_may_be_null(reg_type))
4409                                        regs[value_regno].id = ++env->id_gen;
4410                                /* A load of ctx field could have different
4411                                 * actual load size with the one encoded in the
4412                                 * insn. When the dst is PTR, it is for sure not
4413                                 * a sub-register.
4414                                 */
4415                                regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4416                                if (reg_type == PTR_TO_BTF_ID ||
4417                                    reg_type == PTR_TO_BTF_ID_OR_NULL) {
4418                                        regs[value_regno].btf = btf;
4419                                        regs[value_regno].btf_id = btf_id;
4420                                }
4421                        }
4422                        regs[value_regno].type = reg_type;
4423                }
4424
4425        } else if (reg->type == PTR_TO_STACK) {
4426                /* Basic bounds checks. */
4427                err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4428                if (err)
4429                        return err;
4430
4431                state = func(env, reg);
4432                err = update_stack_depth(env, state, off);
4433                if (err)
4434                        return err;
4435
4436                if (t == BPF_READ)
4437                        err = check_stack_read(env, regno, off, size,
4438                                               value_regno);
4439                else
4440                        err = check_stack_write(env, regno, off, size,
4441                                                value_regno, insn_idx);
4442        } else if (reg_is_pkt_pointer(reg)) {
4443                if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4444                        verbose(env, "cannot write into packet\n");
4445                        return -EACCES;
4446                }
4447                if (t == BPF_WRITE && value_regno >= 0 &&
4448                    is_pointer_value(env, value_regno)) {
4449                        verbose(env, "R%d leaks addr into packet\n",
4450                                value_regno);
4451                        return -EACCES;
4452                }
4453                err = check_packet_access(env, regno, off, size, false);
4454                if (!err && t == BPF_READ && value_regno >= 0)
4455                        mark_reg_unknown(env, regs, value_regno);
4456        } else if (reg->type == PTR_TO_FLOW_KEYS) {
4457                if (t == BPF_WRITE && value_regno >= 0 &&
4458                    is_pointer_value(env, value_regno)) {
4459                        verbose(env, "R%d leaks addr into flow keys\n",
4460                                value_regno);
4461                        return -EACCES;
4462                }
4463
4464                err = check_flow_keys_access(env, off, size);
4465                if (!err && t == BPF_READ && value_regno >= 0)
4466                        mark_reg_unknown(env, regs, value_regno);
4467        } else if (type_is_sk_pointer(reg->type)) {
4468                if (t == BPF_WRITE) {
4469                        verbose(env, "R%d cannot write into %s\n",
4470                                regno, reg_type_str[reg->type]);
4471                        return -EACCES;
4472                }
4473                err = check_sock_access(env, insn_idx, regno, off, size, t);
4474                if (!err && value_regno >= 0)
4475                        mark_reg_unknown(env, regs, value_regno);
4476        } else if (reg->type == PTR_TO_TP_BUFFER) {
4477                err = check_tp_buffer_access(env, reg, regno, off, size);
4478                if (!err && t == BPF_READ && value_regno >= 0)
4479                        mark_reg_unknown(env, regs, value_regno);
4480        } else if (reg->type == PTR_TO_BTF_ID) {
4481                err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4482                                              value_regno);
4483        } else if (reg->type == CONST_PTR_TO_MAP) {
4484                err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4485                                              value_regno);
4486        } else if (reg->type == PTR_TO_RDONLY_BUF) {
4487                if (t == BPF_WRITE) {
4488                        verbose(env, "R%d cannot write into %s\n",
4489                                regno, reg_type_str[reg->type]);
4490                        return -EACCES;
4491                }
4492                err = check_buffer_access(env, reg, regno, off, size, false,
4493                                          "rdonly",
4494                                          &env->prog->aux->max_rdonly_access);
4495                if (!err && value_regno >= 0)
4496                        mark_reg_unknown(env, regs, value_regno);
4497        } else if (reg->type == PTR_TO_RDWR_BUF) {
4498                err = check_buffer_access(env, reg, regno, off, size, false,
4499                                          "rdwr",
4500                                          &env->prog->aux->max_rdwr_access);
4501                if (!err && t == BPF_READ && value_regno >= 0)
4502                        mark_reg_unknown(env, regs, value_regno);
4503        } else {
4504                verbose(env, "R%d invalid mem access '%s'\n", regno,
4505                        reg_type_str[reg->type]);
4506                return -EACCES;
4507        }
4508
4509        if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4510            regs[value_regno].type == SCALAR_VALUE) {
4511                /* b/h/w load zero-extends, mark upper bits as known 0 */
4512                coerce_reg_to_size(&regs[value_regno], size);
4513        }
4514        return err;
4515}
4516
4517static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4518{
4519        int load_reg;
4520        int err;
4521
4522        switch (insn->imm) {
4523        case BPF_ADD:
4524        case BPF_ADD | BPF_FETCH:
4525        case BPF_AND:
4526        case BPF_AND | BPF_FETCH:
4527        case BPF_OR:
4528        case BPF_OR | BPF_FETCH:
4529        case BPF_XOR:
4530        case BPF_XOR | BPF_FETCH:
4531        case BPF_XCHG:
4532        case BPF_CMPXCHG:
4533                break;
4534        default:
4535                verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4536                return -EINVAL;
4537        }
4538
4539        if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4540                verbose(env, "invalid atomic operand size\n");
4541                return -EINVAL;
4542        }
4543
4544        /* check src1 operand */
4545        err = check_reg_arg(env, insn->src_reg, SRC_OP);
4546        if (err)
4547                return err;
4548
4549        /* check src2 operand */
4550        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4551        if (err)
4552                return err;
4553
4554        if (insn->imm == BPF_CMPXCHG) {
4555                /* Check comparison of R0 with memory location */
4556                const u32 aux_reg = BPF_REG_0;
4557
4558                err = check_reg_arg(env, aux_reg, SRC_OP);
4559                if (err)
4560                        return err;
4561
4562                if (is_pointer_value(env, aux_reg)) {
4563                        verbose(env, "R%d leaks addr into mem\n", aux_reg);
4564                        return -EACCES;
4565                }
4566        }
4567
4568        if (is_pointer_value(env, insn->src_reg)) {
4569                verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4570                return -EACCES;
4571        }
4572
4573        if (is_ctx_reg(env, insn->dst_reg) ||
4574            is_pkt_reg(env, insn->dst_reg) ||
4575            is_flow_key_reg(env, insn->dst_reg) ||
4576            is_sk_reg(env, insn->dst_reg)) {
4577                verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4578                        insn->dst_reg,
4579                        reg_type_str[reg_state(env, insn->dst_reg)->type]);
4580                return -EACCES;
4581        }
4582
4583        if (insn->imm & BPF_FETCH) {
4584                if (insn->imm == BPF_CMPXCHG)
4585                        load_reg = BPF_REG_0;
4586                else
4587                        load_reg = insn->src_reg;
4588
4589                /* check and record load of old value */
4590                err = check_reg_arg(env, load_reg, DST_OP);
4591                if (err)
4592                        return err;
4593        } else {
4594                /* This instruction accesses a memory location but doesn't
4595                 * actually load it into a register.
4596                 */
4597                load_reg = -1;
4598        }
4599
4600        /* Check whether we can read the memory, with second call for fetch
4601         * case to simulate the register fill.
4602         */
4603        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4604                               BPF_SIZE(insn->code), BPF_READ, -1, true);
4605        if (!err && load_reg >= 0)
4606                err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4607                                       BPF_SIZE(insn->code), BPF_READ, load_reg,
4608                                       true);
4609        if (err)
4610                return err;
4611
4612        /* Check whether we can write into the same memory. */
4613        err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4614                               BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4615        if (err)
4616                return err;
4617
4618        return 0;
4619}
4620
4621/* When register 'regno' is used to read the stack (either directly or through
4622 * a helper function) make sure that it's within stack boundary and, depending
4623 * on the access type, that all elements of the stack are initialized.
4624 *
4625 * 'off' includes 'regno->off', but not its dynamic part (if any).
4626 *
4627 * All registers that have been spilled on the stack in the slots within the
4628 * read offsets are marked as read.
4629 */
4630static int check_stack_range_initialized(
4631                struct bpf_verifier_env *env, int regno, int off,
4632                int access_size, bool zero_size_allowed,
4633                enum stack_access_src type, struct bpf_call_arg_meta *meta)
4634{
4635        struct bpf_reg_state *reg = reg_state(env, regno);
4636        struct bpf_func_state *state = func(env, reg);
4637        int err, min_off, max_off, i, j, slot, spi;
4638        char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4639        enum bpf_access_type bounds_check_type;
4640        /* Some accesses can write anything into the stack, others are
4641         * read-only.
4642         */
4643        bool clobber = false;
4644
4645        if (access_size == 0 && !zero_size_allowed) {
4646                verbose(env, "invalid zero-sized read\n");
4647                return -EACCES;
4648        }
4649
4650        if (type == ACCESS_HELPER) {
4651                /* The bounds checks for writes are more permissive than for
4652                 * reads. However, if raw_mode is not set, we'll do extra
4653                 * checks below.
4654                 */
4655                bounds_check_type = BPF_WRITE;
4656                clobber = true;
4657        } else {
4658                bounds_check_type = BPF_READ;
4659        }
4660        err = check_stack_access_within_bounds(env, regno, off, access_size,
4661                                               type, bounds_check_type);
4662        if (err)
4663                return err;
4664
4665
4666        if (tnum_is_const(reg->var_off)) {
4667                min_off = max_off = reg->var_off.value + off;
4668        } else {
4669                /* Variable offset is prohibited for unprivileged mode for
4670                 * simplicity since it requires corresponding support in
4671                 * Spectre masking for stack ALU.
4672                 * See also retrieve_ptr_limit().
4673                 */
4674                if (!env->bypass_spec_v1) {
4675                        char tn_buf[48];
4676
4677                        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4678                        verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4679                                regno, err_extra, tn_buf);
4680                        return -EACCES;
4681                }
4682                /* Only initialized buffer on stack is allowed to be accessed
4683                 * with variable offset. With uninitialized buffer it's hard to
4684                 * guarantee that whole memory is marked as initialized on
4685                 * helper return since specific bounds are unknown what may
4686                 * cause uninitialized stack leaking.
4687                 */
4688                if (meta && meta->raw_mode)
4689                        meta = NULL;
4690
4691                min_off = reg->smin_value + off;
4692                max_off = reg->smax_value + off;
4693        }
4694
4695        if (meta && meta->raw_mode) {
4696                meta->access_size = access_size;
4697                meta->regno = regno;
4698                return 0;
4699        }
4700
4701        for (i = min_off; i < max_off + access_size; i++) {
4702                u8 *stype;
4703
4704                slot = -i - 1;
4705                spi = slot / BPF_REG_SIZE;
4706                if (state->allocated_stack <= slot)
4707                        goto err;
4708                stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4709                if (*stype == STACK_MISC)
4710                        goto mark;
4711                if (*stype == STACK_ZERO) {
4712                        if (clobber) {
4713                                /* helper can write anything into the stack */
4714                                *stype = STACK_MISC;
4715                        }
4716                        goto mark;
4717                }
4718
4719                if (is_spilled_reg(&state->stack[spi]) &&
4720                    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4721                        goto mark;
4722
4723                if (is_spilled_reg(&state->stack[spi]) &&
4724                    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4725                     env->allow_ptr_leaks)) {
4726                        if (clobber) {
4727                                __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4728                                for (j = 0; j < BPF_REG_SIZE; j++)
4729                                        scrub_spilled_slot(&state->stack[spi].slot_type[j]);
4730                        }
4731                        goto mark;
4732                }
4733
4734err:
4735                if (tnum_is_const(reg->var_off)) {
4736                        verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4737                                err_extra, regno, min_off, i - min_off, access_size);
4738                } else {
4739                        char tn_buf[48];
4740
4741                        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4742                        verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4743                                err_extra, regno, tn_buf, i - min_off, access_size);
4744                }
4745                return -EACCES;
4746mark:
4747                /* reading any byte out of 8-byte 'spill_slot' will cause
4748                 * the whole slot to be marked as 'read'
4749                 */
4750                mark_reg_read(env, &state->stack[spi].spilled_ptr,
4751                              state->stack[spi].spilled_ptr.parent,
4752                              REG_LIVE_READ64);
4753        }
4754        return update_stack_depth(env, state, min_off);
4755}
4756
4757static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4758                                   int access_size, bool zero_size_allowed,
4759                                   struct bpf_call_arg_meta *meta)
4760{
4761        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4762
4763        switch (reg->type) {
4764        case PTR_TO_PACKET:
4765        case PTR_TO_PACKET_META:
4766                return check_packet_access(env, regno, reg->off, access_size,
4767                                           zero_size_allowed);
4768        case PTR_TO_MAP_KEY:
4769                return check_mem_region_access(env, regno, reg->off, access_size,
4770                                               reg->map_ptr->key_size, false);
4771        case PTR_TO_MAP_VALUE:
4772                if (check_map_access_type(env, regno, reg->off, access_size,
4773                                          meta && meta->raw_mode ? BPF_WRITE :
4774                                          BPF_READ))
4775                        return -EACCES;
4776                return check_map_access(env, regno, reg->off, access_size,
4777                                        zero_size_allowed);
4778        case PTR_TO_MEM:
4779                return check_mem_region_access(env, regno, reg->off,
4780                                               access_size, reg->mem_size,
4781                                               zero_size_allowed);
4782        case PTR_TO_RDONLY_BUF:
4783                if (meta && meta->raw_mode)
4784                        return -EACCES;
4785                return check_buffer_access(env, reg, regno, reg->off,
4786                                           access_size, zero_size_allowed,
4787                                           "rdonly",
4788                                           &env->prog->aux->max_rdonly_access);
4789        case PTR_TO_RDWR_BUF:
4790                return check_buffer_access(env, reg, regno, reg->off,
4791                                           access_size, zero_size_allowed,
4792                                           "rdwr",
4793                                           &env->prog->aux->max_rdwr_access);
4794        case PTR_TO_STACK:
4795                return check_stack_range_initialized(
4796                                env,
4797                                regno, reg->off, access_size,
4798                                zero_size_allowed, ACCESS_HELPER, meta);
4799        default: /* scalar_value or invalid ptr */
4800                /* Allow zero-byte read from NULL, regardless of pointer type */
4801                if (zero_size_allowed && access_size == 0 &&
4802                    register_is_null(reg))
4803                        return 0;
4804
4805                verbose(env, "R%d type=%s expected=%s\n", regno,
4806                        reg_type_str[reg->type],
4807                        reg_type_str[PTR_TO_STACK]);
4808                return -EACCES;
4809        }
4810}
4811
4812int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4813                   u32 regno, u32 mem_size)
4814{
4815        if (register_is_null(reg))
4816                return 0;
4817
4818        if (reg_type_may_be_null(reg->type)) {
4819                /* Assuming that the register contains a value check if the memory
4820                 * access is safe. Temporarily save and restore the register's state as
4821                 * the conversion shouldn't be visible to a caller.
4822                 */
4823                const struct bpf_reg_state saved_reg = *reg;
4824                int rv;
4825
4826                mark_ptr_not_null_reg(reg);
4827                rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
4828                *reg = saved_reg;
4829                return rv;
4830        }
4831
4832        return check_helper_mem_access(env, regno, mem_size, true, NULL);
4833}
4834
4835/* Implementation details:
4836 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4837 * Two bpf_map_lookups (even with the same key) will have different reg->id.
4838 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4839 * value_or_null->value transition, since the verifier only cares about
4840 * the range of access to valid map value pointer and doesn't care about actual
4841 * address of the map element.
4842 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4843 * reg->id > 0 after value_or_null->value transition. By doing so
4844 * two bpf_map_lookups will be considered two different pointers that
4845 * point to different bpf_spin_locks.
4846 * The verifier allows taking only one bpf_spin_lock at a time to avoid
4847 * dead-locks.
4848 * Since only one bpf_spin_lock is allowed the checks are simpler than
4849 * reg_is_refcounted() logic. The verifier needs to remember only
4850 * one spin_lock instead of array of acquired_refs.
4851 * cur_state->active_spin_lock remembers which map value element got locked
4852 * and clears it after bpf_spin_unlock.
4853 */
4854static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4855                             bool is_lock)
4856{
4857        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4858        struct bpf_verifier_state *cur = env->cur_state;
4859        bool is_const = tnum_is_const(reg->var_off);
4860        struct bpf_map *map = reg->map_ptr;
4861        u64 val = reg->var_off.value;
4862
4863        if (!is_const) {
4864                verbose(env,
4865                        "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4866                        regno);
4867                return -EINVAL;
4868        }
4869        if (!map->btf) {
4870                verbose(env,
4871                        "map '%s' has to have BTF in order to use bpf_spin_lock\n",
4872                        map->name);
4873                return -EINVAL;
4874        }
4875        if (!map_value_has_spin_lock(map)) {
4876                if (map->spin_lock_off == -E2BIG)
4877                        verbose(env,
4878                                "map '%s' has more than one 'struct bpf_spin_lock'\n",
4879                                map->name);
4880                else if (map->spin_lock_off == -ENOENT)
4881                        verbose(env,
4882                                "map '%s' doesn't have 'struct bpf_spin_lock'\n",
4883                                map->name);
4884                else
4885                        verbose(env,
4886                                "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4887                                map->name);
4888                return -EINVAL;
4889        }
4890        if (map->spin_lock_off != val + reg->off) {
4891                verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4892                        val + reg->off);
4893                return -EINVAL;
4894        }
4895        if (is_lock) {
4896                if (cur->active_spin_lock) {
4897                        verbose(env,
4898                                "Locking two bpf_spin_locks are not allowed\n");
4899                        return -EINVAL;
4900                }
4901                cur->active_spin_lock = reg->id;
4902        } else {
4903                if (!cur->active_spin_lock) {
4904                        verbose(env, "bpf_spin_unlock without taking a lock\n");
4905                        return -EINVAL;
4906                }
4907                if (cur->active_spin_lock != reg->id) {
4908                        verbose(env, "bpf_spin_unlock of different lock\n");
4909                        return -EINVAL;
4910                }
4911                cur->active_spin_lock = 0;
4912        }
4913        return 0;
4914}
4915
4916static int process_timer_func(struct bpf_verifier_env *env, int regno,
4917                              struct bpf_call_arg_meta *meta)
4918{
4919        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4920        bool is_const = tnum_is_const(reg->var_off);
4921        struct bpf_map *map = reg->map_ptr;
4922        u64 val = reg->var_off.value;
4923
4924        if (!is_const) {
4925                verbose(env,
4926                        "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
4927                        regno);
4928                return -EINVAL;
4929        }
4930        if (!map->btf) {
4931                verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
4932                        map->name);
4933                return -EINVAL;
4934        }
4935        if (!map_value_has_timer(map)) {
4936                if (map->timer_off == -E2BIG)
4937                        verbose(env,
4938                                "map '%s' has more than one 'struct bpf_timer'\n",
4939                                map->name);
4940                else if (map->timer_off == -ENOENT)
4941                        verbose(env,
4942                                "map '%s' doesn't have 'struct bpf_timer'\n",
4943                                map->name);
4944                else
4945                        verbose(env,
4946                                "map '%s' is not a struct type or bpf_timer is mangled\n",
4947                                map->name);
4948                return -EINVAL;
4949        }
4950        if (map->timer_off != val + reg->off) {
4951                verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
4952                        val + reg->off, map->timer_off);
4953                return -EINVAL;
4954        }
4955        if (meta->map_ptr) {
4956                verbose(env, "verifier bug. Two map pointers in a timer helper\n");
4957                return -EFAULT;
4958        }
4959        meta->map_uid = reg->map_uid;
4960        meta->map_ptr = map;
4961        return 0;
4962}
4963
4964static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4965{
4966        return type == ARG_PTR_TO_MEM ||
4967               type == ARG_PTR_TO_MEM_OR_NULL ||
4968               type == ARG_PTR_TO_UNINIT_MEM;
4969}
4970
4971static bool arg_type_is_mem_size(enum bpf_arg_type type)
4972{
4973        return type == ARG_CONST_SIZE ||
4974               type == ARG_CONST_SIZE_OR_ZERO;
4975}
4976
4977static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4978{
4979        return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4980}
4981
4982static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4983{
4984        return type == ARG_PTR_TO_INT ||
4985               type == ARG_PTR_TO_LONG;
4986}
4987
4988static int int_ptr_type_to_size(enum bpf_arg_type type)
4989{
4990        if (type == ARG_PTR_TO_INT)
4991                return sizeof(u32);
4992        else if (type == ARG_PTR_TO_LONG)
4993                return sizeof(u64);
4994
4995        return -EINVAL;
4996}
4997
4998static int resolve_map_arg_type(struct bpf_verifier_env *env,
4999                                 const struct bpf_call_arg_meta *meta,
5000                                 enum bpf_arg_type *arg_type)
5001{
5002        if (!meta->map_ptr) {
5003                /* kernel subsystem misconfigured verifier */
5004                verbose(env, "invalid map_ptr to access map->type\n");
5005                return -EACCES;
5006        }
5007
5008        switch (meta->map_ptr->map_type) {
5009        case BPF_MAP_TYPE_SOCKMAP:
5010        case BPF_MAP_TYPE_SOCKHASH:
5011                if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
5012                        *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5013                } else {
5014                        verbose(env, "invalid arg_type for sockmap/sockhash\n");
5015                        return -EINVAL;
5016                }
5017                break;
5018        case BPF_MAP_TYPE_BLOOM_FILTER:
5019                if (meta->func_id == BPF_FUNC_map_peek_elem)
5020                        *arg_type = ARG_PTR_TO_MAP_VALUE;
5021                break;
5022        default:
5023                break;
5024        }
5025        return 0;
5026}
5027
5028struct bpf_reg_types {
5029        const enum bpf_reg_type types[10];
5030        u32 *btf_id;
5031};
5032
5033static const struct bpf_reg_types map_key_value_types = {
5034        .types = {
5035                PTR_TO_STACK,
5036                PTR_TO_PACKET,
5037                PTR_TO_PACKET_META,
5038                PTR_TO_MAP_KEY,
5039                PTR_TO_MAP_VALUE,
5040        },
5041};
5042
5043static const struct bpf_reg_types sock_types = {
5044        .types = {
5045                PTR_TO_SOCK_COMMON,
5046                PTR_TO_SOCKET,
5047                PTR_TO_TCP_SOCK,
5048                PTR_TO_XDP_SOCK,
5049        },
5050};
5051
5052#ifdef CONFIG_NET
5053static const struct bpf_reg_types btf_id_sock_common_types = {
5054        .types = {
5055                PTR_TO_SOCK_COMMON,
5056                PTR_TO_SOCKET,
5057                PTR_TO_TCP_SOCK,
5058                PTR_TO_XDP_SOCK,
5059                PTR_TO_BTF_ID,
5060        },
5061        .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5062};
5063#endif
5064
5065static const struct bpf_reg_types mem_types = {
5066        .types = {
5067                PTR_TO_STACK,
5068                PTR_TO_PACKET,
5069                PTR_TO_PACKET_META,
5070                PTR_TO_MAP_KEY,
5071                PTR_TO_MAP_VALUE,
5072                PTR_TO_MEM,
5073                PTR_TO_RDONLY_BUF,
5074                PTR_TO_RDWR_BUF,
5075        },
5076};
5077
5078static const struct bpf_reg_types int_ptr_types = {
5079        .types = {
5080                PTR_TO_STACK,
5081                PTR_TO_PACKET,
5082                PTR_TO_PACKET_META,
5083                PTR_TO_MAP_KEY,
5084                PTR_TO_MAP_VALUE,
5085        },
5086};
5087
5088static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5089static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5090static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5091static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
5092static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5093static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5094static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5095static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
5096static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5097static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5098static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5099static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5100
5101static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5102        [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
5103        [ARG_PTR_TO_MAP_VALUE]          = &map_key_value_types,
5104        [ARG_PTR_TO_UNINIT_MAP_VALUE]   = &map_key_value_types,
5105        [ARG_PTR_TO_MAP_VALUE_OR_NULL]  = &map_key_value_types,
5106        [ARG_CONST_SIZE]                = &scalar_types,
5107        [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
5108        [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
5109        [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
5110        [ARG_PTR_TO_CTX]                = &context_types,
5111        [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
5112        [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
5113#ifdef CONFIG_NET
5114        [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
5115#endif
5116        [ARG_PTR_TO_SOCKET]             = &fullsock_types,
5117        [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
5118        [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
5119        [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
5120        [ARG_PTR_TO_MEM]                = &mem_types,
5121        [ARG_PTR_TO_MEM_OR_NULL]        = &mem_types,
5122        [ARG_PTR_TO_UNINIT_MEM]         = &mem_types,
5123        [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
5124        [ARG_PTR_TO_ALLOC_MEM_OR_NULL]  = &alloc_mem_types,
5125        [ARG_PTR_TO_INT]                = &int_ptr_types,
5126        [ARG_PTR_TO_LONG]               = &int_ptr_types,
5127        [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
5128        [ARG_PTR_TO_FUNC]               = &func_ptr_types,
5129        [ARG_PTR_TO_STACK_OR_NULL]      = &stack_ptr_types,
5130        [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
5131        [ARG_PTR_TO_TIMER]              = &timer_types,
5132};
5133
5134static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5135                          enum bpf_arg_type arg_type,
5136                          const u32 *arg_btf_id)
5137{
5138        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5139        enum bpf_reg_type expected, type = reg->type;
5140        const struct bpf_reg_types *compatible;
5141        int i, j;
5142
5143        compatible = compatible_reg_types[arg_type];
5144        if (!compatible) {
5145                verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5146                return -EFAULT;
5147        }
5148
5149        for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5150                expected = compatible->types[i];
5151                if (expected == NOT_INIT)
5152                        break;
5153
5154                if (type == expected)
5155                        goto found;
5156        }
5157
5158        verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
5159        for (j = 0; j + 1 < i; j++)
5160                verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
5161        verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
5162        return -EACCES;
5163
5164found:
5165        if (type == PTR_TO_BTF_ID) {
5166                if (!arg_btf_id) {
5167                        if (!compatible->btf_id) {
5168                                verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5169                                return -EFAULT;
5170                        }
5171                        arg_btf_id = compatible->btf_id;
5172                }
5173
5174                if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5175                                          btf_vmlinux, *arg_btf_id)) {
5176                        verbose(env, "R%d is of type %s but %s is expected\n",
5177                                regno, kernel_type_name(reg->btf, reg->btf_id),
5178                                kernel_type_name(btf_vmlinux, *arg_btf_id));
5179                        return -EACCES;
5180                }
5181
5182                if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5183                        verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
5184                                regno);
5185                        return -EACCES;
5186                }
5187        }
5188
5189        return 0;
5190}
5191
5192static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5193                          struct bpf_call_arg_meta *meta,
5194                          const struct bpf_func_proto *fn)
5195{
5196        u32 regno = BPF_REG_1 + arg;
5197        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5198        enum bpf_arg_type arg_type = fn->arg_type[arg];
5199        enum bpf_reg_type type = reg->type;
5200        int err = 0;
5201
5202        if (arg_type == ARG_DONTCARE)
5203                return 0;
5204
5205        err = check_reg_arg(env, regno, SRC_OP);
5206        if (err)
5207                return err;
5208
5209        if (arg_type == ARG_ANYTHING) {
5210                if (is_pointer_value(env, regno)) {
5211                        verbose(env, "R%d leaks addr into helper function\n",
5212                                regno);
5213                        return -EACCES;
5214                }
5215                return 0;
5216        }
5217
5218        if (type_is_pkt_pointer(type) &&
5219            !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5220                verbose(env, "helper access to the packet is not allowed\n");
5221                return -EACCES;
5222        }
5223
5224        if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5225            arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
5226            arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
5227                err = resolve_map_arg_type(env, meta, &arg_type);
5228                if (err)
5229                        return err;
5230        }
5231
5232        if (register_is_null(reg) && arg_type_may_be_null(arg_type))
5233                /* A NULL register has a SCALAR_VALUE type, so skip
5234                 * type checking.
5235                 */
5236                goto skip_type_check;
5237
5238        err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5239        if (err)
5240                return err;
5241
5242        if (type == PTR_TO_CTX) {
5243                err = check_ctx_reg(env, reg, regno);
5244                if (err < 0)
5245                        return err;
5246        }
5247
5248skip_type_check:
5249        if (reg->ref_obj_id) {
5250                if (meta->ref_obj_id) {
5251                        verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5252                                regno, reg->ref_obj_id,
5253                                meta->ref_obj_id);
5254                        return -EFAULT;
5255                }
5256                meta->ref_obj_id = reg->ref_obj_id;
5257        }
5258
5259        if (arg_type == ARG_CONST_MAP_PTR) {
5260                /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5261                if (meta->map_ptr) {
5262                        /* Use map_uid (which is unique id of inner map) to reject:
5263                         * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5264                         * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5265                         * if (inner_map1 && inner_map2) {
5266                         *     timer = bpf_map_lookup_elem(inner_map1);
5267                         *     if (timer)
5268                         *         // mismatch would have been allowed
5269                         *         bpf_timer_init(timer, inner_map2);
5270                         * }
5271                         *
5272                         * Comparing map_ptr is enough to distinguish normal and outer maps.
5273                         */
5274                        if (meta->map_ptr != reg->map_ptr ||
5275                            meta->map_uid != reg->map_uid) {
5276                                verbose(env,
5277                                        "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5278                                        meta->map_uid, reg->map_uid);
5279                                return -EINVAL;
5280                        }
5281                }
5282                meta->map_ptr = reg->map_ptr;
5283                meta->map_uid = reg->map_uid;
5284        } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5285                /* bpf_map_xxx(..., map_ptr, ..., key) call:
5286                 * check that [key, key + map->key_size) are within
5287                 * stack limits and initialized
5288                 */
5289                if (!meta->map_ptr) {
5290                        /* in function declaration map_ptr must come before
5291                         * map_key, so that it's verified and known before
5292                         * we have to check map_key here. Otherwise it means
5293                         * that kernel subsystem misconfigured verifier
5294                         */
5295                        verbose(env, "invalid map_ptr to access map->key\n");
5296                        return -EACCES;
5297                }
5298                err = check_helper_mem_access(env, regno,
5299                                              meta->map_ptr->key_size, false,
5300                                              NULL);
5301        } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5302                   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
5303                    !register_is_null(reg)) ||
5304                   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5305                /* bpf_map_xxx(..., map_ptr, ..., value) call:
5306                 * check [value, value + map->value_size) validity
5307                 */
5308                if (!meta->map_ptr) {
5309                        /* kernel subsystem misconfigured verifier */
5310                        verbose(env, "invalid map_ptr to access map->value\n");
5311                        return -EACCES;
5312                }
5313                meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
5314                err = check_helper_mem_access(env, regno,
5315                                              meta->map_ptr->value_size, false,
5316                                              meta);
5317        } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5318                if (!reg->btf_id) {
5319                        verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5320                        return -EACCES;
5321                }
5322                meta->ret_btf = reg->btf;
5323                meta->ret_btf_id = reg->btf_id;
5324        } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5325                if (meta->func_id == BPF_FUNC_spin_lock) {
5326                        if (process_spin_lock(env, regno, true))
5327                                return -EACCES;
5328                } else if (meta->func_id == BPF_FUNC_spin_unlock) {
5329                        if (process_spin_lock(env, regno, false))
5330                                return -EACCES;
5331                } else {
5332                        verbose(env, "verifier internal error\n");
5333                        return -EFAULT;
5334                }
5335        } else if (arg_type == ARG_PTR_TO_TIMER) {
5336                if (process_timer_func(env, regno, meta))
5337                        return -EACCES;
5338        } else if (arg_type == ARG_PTR_TO_FUNC) {
5339                meta->subprogno = reg->subprogno;
5340        } else if (arg_type_is_mem_ptr(arg_type)) {
5341                /* The access to this pointer is only checked when we hit the
5342                 * next is_mem_size argument below.
5343                 */
5344                meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
5345        } else if (arg_type_is_mem_size(arg_type)) {
5346                bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
5347
5348                /* This is used to refine r0 return value bounds for helpers
5349                 * that enforce this value as an upper bound on return values.
5350                 * See do_refine_retval_range() for helpers that can refine
5351                 * the return value. C type of helper is u32 so we pull register
5352                 * bound from umax_value however, if negative verifier errors
5353                 * out. Only upper bounds can be learned because retval is an
5354                 * int type and negative retvals are allowed.
5355                 */
5356                meta->msize_max_value = reg->umax_value;
5357
5358                /* The register is SCALAR_VALUE; the access check
5359                 * happens using its boundaries.
5360                 */
5361                if (!tnum_is_const(reg->var_off))
5362                        /* For unprivileged variable accesses, disable raw
5363                         * mode so that the program is required to
5364                         * initialize all the memory that the helper could
5365                         * just partially fill up.
5366                         */
5367                        meta = NULL;
5368
5369                if (reg->smin_value < 0) {
5370                        verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5371                                regno);
5372                        return -EACCES;
5373                }
5374
5375                if (reg->umin_value == 0) {
5376                        err = check_helper_mem_access(env, regno - 1, 0,
5377                                                      zero_size_allowed,
5378                                                      meta);
5379                        if (err)
5380                                return err;
5381                }
5382
5383                if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5384                        verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5385                                regno);
5386                        return -EACCES;
5387                }
5388                err = check_helper_mem_access(env, regno - 1,
5389                                              reg->umax_value,
5390                                              zero_size_allowed, meta);
5391                if (!err)
5392                        err = mark_chain_precision(env, regno);
5393        } else if (arg_type_is_alloc_size(arg_type)) {
5394                if (!tnum_is_const(reg->var_off)) {
5395                        verbose(env, "R%d is not a known constant'\n",
5396                                regno);
5397                        return -EACCES;
5398                }
5399                meta->mem_size = reg->var_off.value;
5400        } else if (arg_type_is_int_ptr(arg_type)) {
5401                int size = int_ptr_type_to_size(arg_type);
5402
5403                err = check_helper_mem_access(env, regno, size, false, meta);
5404                if (err)
5405                        return err;
5406                err = check_ptr_alignment(env, reg, 0, size, true);
5407        } else if (arg_type == ARG_PTR_TO_CONST_STR) {
5408                struct bpf_map *map = reg->map_ptr;
5409                int map_off;
5410                u64 map_addr;
5411                char *str_ptr;
5412
5413                if (!bpf_map_is_rdonly(map)) {
5414                        verbose(env, "R%d does not point to a readonly map'\n", regno);
5415                        return -EACCES;
5416                }
5417
5418                if (!tnum_is_const(reg->var_off)) {
5419                        verbose(env, "R%d is not a constant address'\n", regno);
5420                        return -EACCES;
5421                }
5422
5423                if (!map->ops->map_direct_value_addr) {
5424                        verbose(env, "no direct value access support for this map type\n");
5425                        return -EACCES;
5426                }
5427
5428                err = check_map_access(env, regno, reg->off,
5429                                       map->value_size - reg->off, false);
5430                if (err)
5431                        return err;
5432
5433                map_off = reg->off + reg->var_off.value;
5434                err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5435                if (err) {
5436                        verbose(env, "direct value access on string failed\n");
5437                        return err;
5438                }
5439
5440                str_ptr = (char *)(long)(map_addr);
5441                if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5442                        verbose(env, "string is not zero-terminated\n");
5443                        return -EINVAL;
5444                }
5445        }
5446
5447        return err;
5448}
5449
5450static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5451{
5452        enum bpf_attach_type eatype = env->prog->expected_attach_type;
5453        enum bpf_prog_type type = resolve_prog_type(env->prog);
5454
5455        if (func_id != BPF_FUNC_map_update_elem)
5456                return false;
5457
5458        /* It's not possible to get access to a locked struct sock in these
5459         * contexts, so updating is safe.
5460         */
5461        switch (type) {
5462        case BPF_PROG_TYPE_TRACING:
5463                if (eatype == BPF_TRACE_ITER)
5464                        return true;
5465                break;
5466        case BPF_PROG_TYPE_SOCKET_FILTER:
5467        case BPF_PROG_TYPE_SCHED_CLS:
5468        case BPF_PROG_TYPE_SCHED_ACT:
5469        case BPF_PROG_TYPE_XDP:
5470        case BPF_PROG_TYPE_SK_REUSEPORT:
5471        case BPF_PROG_TYPE_FLOW_DISSECTOR:
5472        case BPF_PROG_TYPE_SK_LOOKUP:
5473                return true;
5474        default:
5475                break;
5476        }
5477
5478        verbose(env, "cannot update sockmap in this context\n");
5479        return false;
5480}
5481
5482static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5483{
5484        return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5485}
5486
5487static int check_map_func_compatibility(struct bpf_verifier_env *env,
5488                                        struct bpf_map *map, int func_id)
5489{
5490        if (!map)
5491                return 0;
5492
5493        /* We need a two way check, first is from map perspective ... */
5494        switch (map->map_type) {
5495        case BPF_MAP_TYPE_PROG_ARRAY:
5496                if (func_id != BPF_FUNC_tail_call)
5497                        goto error;
5498                break;
5499        case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5500                if (func_id != BPF_FUNC_perf_event_read &&
5501                    func_id != BPF_FUNC_perf_event_output &&
5502                    func_id != BPF_FUNC_skb_output &&
5503                    func_id != BPF_FUNC_perf_event_read_value &&
5504                    func_id != BPF_FUNC_xdp_output)
5505                        goto error;
5506                break;
5507        case BPF_MAP_TYPE_RINGBUF:
5508                if (func_id != BPF_FUNC_ringbuf_output &&
5509                    func_id != BPF_FUNC_ringbuf_reserve &&
5510                    func_id != BPF_FUNC_ringbuf_query)
5511                        goto error;
5512                break;
5513        case BPF_MAP_TYPE_STACK_TRACE:
5514                if (func_id != BPF_FUNC_get_stackid)
5515                        goto error;
5516                break;
5517        case BPF_MAP_TYPE_CGROUP_ARRAY:
5518                if (func_id != BPF_FUNC_skb_under_cgroup &&
5519                    func_id != BPF_FUNC_current_task_under_cgroup)
5520                        goto error;
5521                break;
5522        case BPF_MAP_TYPE_CGROUP_STORAGE:
5523        case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
5524                if (func_id != BPF_FUNC_get_local_storage)
5525                        goto error;
5526                break;
5527        case BPF_MAP_TYPE_DEVMAP:
5528        case BPF_MAP_TYPE_DEVMAP_HASH:
5529                if (func_id != BPF_FUNC_redirect_map &&
5530                    func_id != BPF_FUNC_map_lookup_elem)
5531                        goto error;
5532                break;
5533        /* Restrict bpf side of cpumap and xskmap, open when use-cases
5534         * appear.
5535         */
5536        case BPF_MAP_TYPE_CPUMAP:
5537                if (func_id != BPF_FUNC_redirect_map)
5538                        goto error;
5539                break;
5540        case BPF_MAP_TYPE_XSKMAP:
5541                if (func_id != BPF_FUNC_redirect_map &&
5542                    func_id != BPF_FUNC_map_lookup_elem)
5543                        goto error;
5544                break;
5545        case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5546        case BPF_MAP_TYPE_HASH_OF_MAPS:
5547                if (func_id != BPF_FUNC_map_lookup_elem)
5548                        goto error;
5549                break;
5550        case BPF_MAP_TYPE_SOCKMAP:
5551                if (func_id != BPF_FUNC_sk_redirect_map &&
5552                    func_id != BPF_FUNC_sock_map_update &&
5553                    func_id != BPF_FUNC_map_delete_elem &&
5554                    func_id != BPF_FUNC_msg_redirect_map &&
5555                    func_id != BPF_FUNC_sk_select_reuseport &&
5556                    func_id != BPF_FUNC_map_lookup_elem &&
5557                    !may_update_sockmap(env, func_id))
5558                        goto error;
5559                break;
5560        case BPF_MAP_TYPE_SOCKHASH:
5561                if (func_id != BPF_FUNC_sk_redirect_hash &&
5562                    func_id != BPF_FUNC_sock_hash_update &&
5563                    func_id != BPF_FUNC_map_delete_elem &&
5564                    func_id != BPF_FUNC_msg_redirect_hash &&
5565                    func_id != BPF_FUNC_sk_select_reuseport &&
5566                    func_id != BPF_FUNC_map_lookup_elem &&
5567                    !may_update_sockmap(env, func_id))
5568                        goto error;
5569                break;
5570        case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
5571                if (func_id != BPF_FUNC_sk_select_reuseport)
5572                        goto error;
5573                break;
5574        case BPF_MAP_TYPE_QUEUE:
5575        case BPF_MAP_TYPE_STACK:
5576                if (func_id != BPF_FUNC_map_peek_elem &&
5577                    func_id != BPF_FUNC_map_pop_elem &&
5578                    func_id != BPF_FUNC_map_push_elem)
5579                        goto error;
5580                break;
5581        case BPF_MAP_TYPE_SK_STORAGE:
5582                if (func_id != BPF_FUNC_sk_storage_get &&
5583                    func_id != BPF_FUNC_sk_storage_delete)
5584                        goto error;
5585                break;
5586        case BPF_MAP_TYPE_INODE_STORAGE:
5587                if (func_id != BPF_FUNC_inode_storage_get &&
5588                    func_id != BPF_FUNC_inode_storage_delete)
5589                        goto error;
5590                break;
5591        case BPF_MAP_TYPE_TASK_STORAGE:
5592                if (func_id != BPF_FUNC_task_storage_get &&
5593                    func_id != BPF_FUNC_task_storage_delete)
5594                        goto error;
5595                break;
5596        case BPF_MAP_TYPE_BLOOM_FILTER:
5597                if (func_id != BPF_FUNC_map_peek_elem &&
5598                    func_id != BPF_FUNC_map_push_elem)
5599                        goto error;
5600                break;
5601        default:
5602                break;
5603        }
5604
5605        /* ... and second from the function itself. */
5606        switch (func_id) {
5607        case BPF_FUNC_tail_call:
5608                if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
5609                        goto error;
5610                if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
5611                        verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
5612                        return -EINVAL;
5613                }
5614                break;
5615        case BPF_FUNC_perf_event_read:
5616        case BPF_FUNC_perf_event_output:
5617        case BPF_FUNC_perf_event_read_value:
5618        case BPF_FUNC_skb_output:
5619        case BPF_FUNC_xdp_output:
5620                if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
5621                        goto error;
5622                break;
5623        case BPF_FUNC_ringbuf_output:
5624        case BPF_FUNC_ringbuf_reserve:
5625        case BPF_FUNC_ringbuf_query:
5626                if (map->map_type != BPF_MAP_TYPE_RINGBUF)
5627                        goto error;
5628                break;
5629        case BPF_FUNC_get_stackid:
5630                if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
5631                        goto error;
5632                break;
5633        case BPF_FUNC_current_task_under_cgroup:
5634        case BPF_FUNC_skb_under_cgroup:
5635                if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
5636                        goto error;
5637                break;
5638        case BPF_FUNC_redirect_map:
5639                if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
5640                    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
5641                    map->map_type != BPF_MAP_TYPE_CPUMAP &&
5642                    map->map_type != BPF_MAP_TYPE_XSKMAP)
5643                        goto error;
5644                break;
5645        case BPF_FUNC_sk_redirect_map:
5646        case BPF_FUNC_msg_redirect_map:
5647        case BPF_FUNC_sock_map_update:
5648                if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
5649                        goto error;
5650                break;
5651        case BPF_FUNC_sk_redirect_hash:
5652        case BPF_FUNC_msg_redirect_hash:
5653        case BPF_FUNC_sock_hash_update:
5654                if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
5655                        goto error;
5656                break;
5657        case BPF_FUNC_get_local_storage:
5658                if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
5659                    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
5660                        goto error;
5661                break;
5662        case BPF_FUNC_sk_select_reuseport:
5663                if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
5664                    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
5665                    map->map_type != BPF_MAP_TYPE_SOCKHASH)
5666                        goto error;
5667                break;
5668        case BPF_FUNC_map_pop_elem:
5669                if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5670                    map->map_type != BPF_MAP_TYPE_STACK)
5671                        goto error;
5672                break;
5673        case BPF_FUNC_map_peek_elem:
5674        case BPF_FUNC_map_push_elem:
5675                if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5676                    map->map_type != BPF_MAP_TYPE_STACK &&
5677                    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
5678                        goto error;
5679                break;
5680        case BPF_FUNC_sk_storage_get:
5681        case BPF_FUNC_sk_storage_delete:
5682                if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
5683                        goto error;
5684                break;
5685        case BPF_FUNC_inode_storage_get:
5686        case BPF_FUNC_inode_storage_delete:
5687                if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
5688                        goto error;
5689                break;
5690        case BPF_FUNC_task_storage_get:
5691        case BPF_FUNC_task_storage_delete:
5692                if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
5693                        goto error;
5694                break;
5695        default:
5696                break;
5697        }
5698
5699        return 0;
5700error:
5701        verbose(env, "cannot pass map_type %d into func %s#%d\n",
5702                map->map_type, func_id_name(func_id), func_id);
5703        return -EINVAL;
5704}
5705
5706static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
5707{
5708        int count = 0;
5709
5710        if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
5711                count++;
5712        if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
5713                count++;
5714        if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
5715                count++;
5716        if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
5717                count++;
5718        if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
5719                count++;
5720
5721        /* We only support one arg being in raw mode at the moment,
5722         * which is sufficient for the helper functions we have
5723         * right now.
5724         */
5725        return count <= 1;
5726}
5727
5728static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
5729                                    enum bpf_arg_type arg_next)
5730{
5731        return (arg_type_is_mem_ptr(arg_curr) &&
5732                !arg_type_is_mem_size(arg_next)) ||
5733               (!arg_type_is_mem_ptr(arg_curr) &&
5734                arg_type_is_mem_size(arg_next));
5735}
5736
5737static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
5738{
5739        /* bpf_xxx(..., buf, len) call will access 'len'
5740         * bytes from memory 'buf'. Both arg types need
5741         * to be paired, so make sure there's no buggy
5742         * helper function specification.
5743         */
5744        if (arg_type_is_mem_size(fn->arg1_type) ||
5745            arg_type_is_mem_ptr(fn->arg5_type)  ||
5746            check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
5747            check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
5748            check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
5749            check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
5750                return false;
5751
5752        return true;
5753}
5754
5755static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
5756{
5757        int count = 0;
5758
5759        if (arg_type_may_be_refcounted(fn->arg1_type))
5760                count++;
5761        if (arg_type_may_be_refcounted(fn->arg2_type))
5762                count++;
5763        if (arg_type_may_be_refcounted(fn->arg3_type))
5764                count++;
5765        if (arg_type_may_be_refcounted(fn->arg4_type))
5766                count++;
5767        if (arg_type_may_be_refcounted(fn->arg5_type))
5768                count++;
5769
5770        /* A reference acquiring function cannot acquire
5771         * another refcounted ptr.
5772         */
5773        if (may_be_acquire_function(func_id) && count)
5774                return false;
5775
5776        /* We only support one arg being unreferenced at the moment,
5777         * which is sufficient for the helper functions we have right now.
5778         */
5779        return count <= 1;
5780}
5781
5782static bool check_btf_id_ok(const struct bpf_func_proto *fn)
5783{
5784        int i;
5785
5786        for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
5787                if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
5788                        return false;
5789
5790                if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
5791                        return false;
5792        }
5793
5794        return true;
5795}
5796
5797static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
5798{
5799        return check_raw_mode_ok(fn) &&
5800               check_arg_pair_ok(fn) &&
5801               check_btf_id_ok(fn) &&
5802               check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
5803}
5804
5805/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
5806 * are now invalid, so turn them into unknown SCALAR_VALUE.
5807 */
5808static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
5809                                     struct bpf_func_state *state)
5810{
5811        struct bpf_reg_state *regs = state->regs, *reg;
5812        int i;
5813
5814        for (i = 0; i < MAX_BPF_REG; i++)
5815                if (reg_is_pkt_pointer_any(&regs[i]))
5816                        mark_reg_unknown(env, regs, i);
5817
5818        bpf_for_each_spilled_reg(i, state, reg) {
5819                if (!reg)
5820                        continue;
5821                if (reg_is_pkt_pointer_any(reg))
5822                        __mark_reg_unknown(env, reg);
5823        }
5824}
5825
5826static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5827{
5828        struct bpf_verifier_state *vstate = env->cur_state;
5829        int i;
5830
5831        for (i = 0; i <= vstate->curframe; i++)
5832                __clear_all_pkt_pointers(env, vstate->frame[i]);
5833}
5834
5835enum {
5836        AT_PKT_END = -1,
5837        BEYOND_PKT_END = -2,
5838};
5839
5840static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5841{
5842        struct bpf_func_state *state = vstate->frame[vstate->curframe];
5843        struct bpf_reg_state *reg = &state->regs[regn];
5844
5845        if (reg->type != PTR_TO_PACKET)
5846                /* PTR_TO_PACKET_META is not supported yet */
5847                return;
5848
5849        /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5850         * How far beyond pkt_end it goes is unknown.
5851         * if (!range_open) it's the case of pkt >= pkt_end
5852         * if (range_open) it's the case of pkt > pkt_end
5853         * hence this pointer is at least 1 byte bigger than pkt_end
5854         */
5855        if (range_open)
5856                reg->range = BEYOND_PKT_END;
5857        else
5858                reg->range = AT_PKT_END;
5859}
5860
5861static void release_reg_references(struct bpf_verifier_env *env,
5862                                   struct bpf_func_state *state,
5863                                   int ref_obj_id)
5864{
5865        struct bpf_reg_state *regs = state->regs, *reg;
5866        int i;
5867
5868        for (i = 0; i < MAX_BPF_REG; i++)
5869                if (regs[i].ref_obj_id == ref_obj_id)
5870                        mark_reg_unknown(env, regs, i);
5871
5872        bpf_for_each_spilled_reg(i, state, reg) {
5873                if (!reg)
5874                        continue;
5875                if (reg->ref_obj_id == ref_obj_id)
5876                        __mark_reg_unknown(env, reg);
5877        }
5878}
5879
5880/* The pointer with the specified id has released its reference to kernel
5881 * resources. Identify all copies of the same pointer and clear the reference.
5882 */
5883static int release_reference(struct bpf_verifier_env *env,
5884                             int ref_obj_id)
5885{
5886        struct bpf_verifier_state *vstate = env->cur_state;
5887        int err;
5888        int i;
5889
5890        err = release_reference_state(cur_func(env), ref_obj_id);
5891        if (err)
5892                return err;
5893
5894        for (i = 0; i <= vstate->curframe; i++)
5895                release_reg_references(env, vstate->frame[i], ref_obj_id);
5896
5897        return 0;
5898}
5899
5900static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5901                                    struct bpf_reg_state *regs)
5902{
5903        int i;
5904
5905        /* after the call registers r0 - r5 were scratched */
5906        for (i = 0; i < CALLER_SAVED_REGS; i++) {
5907                mark_reg_not_init(env, regs, caller_saved[i]);
5908                check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5909        }
5910}
5911
5912typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
5913                                   struct bpf_func_state *caller,
5914                                   struct bpf_func_state *callee,
5915                                   int insn_idx);
5916
5917static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5918                             int *insn_idx, int subprog,
5919                             set_callee_state_fn set_callee_state_cb)
5920{
5921        struct bpf_verifier_state *state = env->cur_state;
5922        struct bpf_func_info_aux *func_info_aux;
5923        struct bpf_func_state *caller, *callee;
5924        int err;
5925        bool is_global = false;
5926
5927        if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5928                verbose(env, "the call stack of %d frames is too deep\n",
5929                        state->curframe + 2);
5930                return -E2BIG;
5931        }
5932
5933        caller = state->frame[state->curframe];
5934        if (state->frame[state->curframe + 1]) {
5935                verbose(env, "verifier bug. Frame %d already allocated\n",
5936                        state->curframe + 1);
5937                return -EFAULT;
5938        }
5939
5940        func_info_aux = env->prog->aux->func_info_aux;
5941        if (func_info_aux)
5942                is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5943        err = btf_check_subprog_arg_match(env, subprog, caller->regs);
5944        if (err == -EFAULT)
5945                return err;
5946        if (is_global) {
5947                if (err) {
5948                        verbose(env, "Caller passes invalid args into func#%d\n",
5949                                subprog);
5950                        return err;
5951                } else {
5952                        if (env->log.level & BPF_LOG_LEVEL)
5953                                verbose(env,
5954                                        "Func#%d is global and valid. Skipping.\n",
5955                                        subprog);
5956                        clear_caller_saved_regs(env, caller->regs);
5957
5958                        /* All global functions return a 64-bit SCALAR_VALUE */
5959                        mark_reg_unknown(env, caller->regs, BPF_REG_0);
5960                        caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5961
5962                        /* continue with next insn after call */
5963                        return 0;
5964                }
5965        }
5966
5967        if (insn->code == (BPF_JMP | BPF_CALL) &&
5968            insn->imm == BPF_FUNC_timer_set_callback) {
5969                struct bpf_verifier_state *async_cb;
5970
5971                /* there is no real recursion here. timer callbacks are async */
5972                env->subprog_info[subprog].is_async_cb = true;
5973                async_cb = push_async_cb(env, env->subprog_info[subprog].start,
5974                                         *insn_idx, subprog);
5975                if (!async_cb)
5976                        return -EFAULT;
5977                callee = async_cb->frame[0];
5978                callee->async_entry_cnt = caller->async_entry_cnt + 1;
5979
5980                /* Convert bpf_timer_set_callback() args into timer callback args */
5981                err = set_callee_state_cb(env, caller, callee, *insn_idx);
5982                if (err)
5983                        return err;
5984
5985                clear_caller_saved_regs(env, caller->regs);
5986                mark_reg_unknown(env, caller->regs, BPF_REG_0);
5987                caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5988                /* continue with next insn after call */
5989                return 0;
5990        }
5991
5992        callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5993        if (!callee)
5994                return -ENOMEM;
5995        state->frame[state->curframe + 1] = callee;
5996
5997        /* callee cannot access r0, r6 - r9 for reading and has to write
5998         * into its own stack before reading from it.
5999         * callee can read/write into caller's stack
6000         */
6001        init_func_state(env, callee,
6002                        /* remember the callsite, it will be used by bpf_exit */
6003                        *insn_idx /* callsite */,
6004                        state->curframe + 1 /* frameno within this callchain */,
6005                        subprog /* subprog number within this prog */);
6006
6007        /* Transfer references to the callee */
6008        err = copy_reference_state(callee, caller);
6009        if (err)
6010                return err;
6011
6012        err = set_callee_state_cb(env, caller, callee, *insn_idx);
6013        if (err)
6014                return err;
6015
6016        clear_caller_saved_regs(env, caller->regs);
6017
6018        /* only increment it after check_reg_arg() finished */
6019        state->curframe++;
6020
6021        /* and go analyze first insn of the callee */
6022        *insn_idx = env->subprog_info[subprog].start - 1;
6023
6024        if (env->log.level & BPF_LOG_LEVEL) {
6025                verbose(env, "caller:\n");
6026                print_verifier_state(env, caller);
6027                verbose(env, "callee:\n");
6028                print_verifier_state(env, callee);
6029        }
6030        return 0;
6031}
6032
6033int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6034                                   struct bpf_func_state *caller,
6035                                   struct bpf_func_state *callee)
6036{
6037        /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6038         *      void *callback_ctx, u64 flags);
6039         * callback_fn(struct bpf_map *map, void *key, void *value,
6040         *      void *callback_ctx);
6041         */
6042        callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6043
6044        callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6045        __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6046        callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6047
6048        callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6049        __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6050        callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6051
6052        /* pointer to stack or null */
6053        callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6054
6055        /* unused */
6056        __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6057        return 0;
6058}
6059
6060static int set_callee_state(struct bpf_verifier_env *env,
6061                            struct bpf_func_state *caller,
6062                            struct bpf_func_state *callee, int insn_idx)
6063{
6064        int i;
6065
6066        /* copy r1 - r5 args that callee can access.  The copy includes parent
6067         * pointers, which connects us up to the liveness chain
6068         */
6069        for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6070                callee->regs[i] = caller->regs[i];
6071        return 0;
6072}
6073
6074static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6075                           int *insn_idx)
6076{
6077        int subprog, target_insn;
6078
6079        target_insn = *insn_idx + insn->imm + 1;
6080        subprog = find_subprog(env, target_insn);
6081        if (subprog < 0) {
6082                verbose(env, "verifier bug. No program starts at insn %d\n",
6083                        target_insn);
6084                return -EFAULT;
6085        }
6086
6087        return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6088}
6089
6090static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6091                                       struct bpf_func_state *caller,
6092                                       struct bpf_func_state *callee,
6093                                       int insn_idx)
6094{
6095        struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6096        struct bpf_map *map;
6097        int err;
6098
6099        if (bpf_map_ptr_poisoned(insn_aux)) {
6100                verbose(env, "tail_call abusing map_ptr\n");
6101                return -EINVAL;
6102        }
6103
6104        map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6105        if (!map->ops->map_set_for_each_callback_args ||
6106            !map->ops->map_for_each_callback) {
6107                verbose(env, "callback function not allowed for map\n");
6108                return -ENOTSUPP;
6109        }
6110
6111        err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6112        if (err)
6113                return err;
6114
6115        callee->in_callback_fn = true;
6116        return 0;
6117}
6118
6119static int set_timer_callback_state(struct bpf_verifier_env *env,
6120                                    struct bpf_func_state *caller,
6121                                    struct bpf_func_state *callee,
6122                                    int insn_idx)
6123{
6124        struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6125
6126        /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6127         * callback_fn(struct bpf_map *map, void *key, void *value);
6128         */
6129        callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6130        __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6131        callee->regs[BPF_REG_1].map_ptr = map_ptr;
6132
6133        callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6134        __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6135        callee->regs[BPF_REG_2].map_ptr = map_ptr;
6136
6137        callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6138        __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6139        callee->regs[BPF_REG_3].map_ptr = map_ptr;
6140
6141        /* unused */
6142        __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6143        __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6144        callee->in_async_callback_fn = true;
6145        return 0;
6146}
6147
6148static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6149{
6150        struct bpf_verifier_state *state = env->cur_state;
6151        struct bpf_func_state *caller, *callee;
6152        struct bpf_reg_state *r0;
6153        int err;
6154
6155        callee = state->frame[state->curframe];
6156        r0 = &callee->regs[BPF_REG_0];
6157        if (r0->type == PTR_TO_STACK) {
6158                /* technically it's ok to return caller's stack pointer
6159                 * (or caller's caller's pointer) back to the caller,
6160                 * since these pointers are valid. Only current stack
6161                 * pointer will be invalid as soon as function exits,
6162                 * but let's be conservative
6163                 */
6164                verbose(env, "cannot return stack pointer to the caller\n");
6165                return -EINVAL;
6166        }
6167
6168        state->curframe--;
6169        caller = state->frame[state->curframe];
6170        if (callee->in_callback_fn) {
6171                /* enforce R0 return value range [0, 1]. */
6172                struct tnum range = tnum_range(0, 1);
6173
6174                if (r0->type != SCALAR_VALUE) {
6175                        verbose(env, "R0 not a scalar value\n");
6176                        return -EACCES;
6177                }
6178                if (!tnum_in(range, r0->var_off)) {
6179                        verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6180                        return -EINVAL;
6181                }
6182        } else {
6183                /* return to the caller whatever r0 had in the callee */
6184                caller->regs[BPF_REG_0] = *r0;
6185        }
6186
6187        /* Transfer references to the caller */
6188        err = copy_reference_state(caller, callee);
6189        if (err)
6190                return err;
6191
6192        *insn_idx = callee->callsite + 1;
6193        if (env->log.level & BPF_LOG_LEVEL) {
6194                verbose(env, "returning from callee:\n");
6195                print_verifier_state(env, callee);
6196                verbose(env, "to caller at %d:\n", *insn_idx);
6197                print_verifier_state(env, caller);
6198        }
6199        /* clear everything in the callee */
6200        free_func_state(callee);
6201        state->frame[state->curframe + 1] = NULL;
6202        return 0;
6203}
6204
6205static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6206                                   int func_id,
6207                                   struct bpf_call_arg_meta *meta)
6208{
6209        struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6210
6211        if (ret_type != RET_INTEGER ||
6212            (func_id != BPF_FUNC_get_stack &&
6213             func_id != BPF_FUNC_get_task_stack &&
6214             func_id != BPF_FUNC_probe_read_str &&
6215             func_id != BPF_FUNC_probe_read_kernel_str &&
6216             func_id != BPF_FUNC_probe_read_user_str))
6217                return;
6218
6219        ret_reg->smax_value = meta->msize_max_value;
6220        ret_reg->s32_max_value = meta->msize_max_value;
6221        ret_reg->smin_value = -MAX_ERRNO;
6222        ret_reg->s32_min_value = -MAX_ERRNO;
6223        __reg_deduce_bounds(ret_reg);
6224        __reg_bound_offset(ret_reg);
6225        __update_reg_bounds(ret_reg);
6226}
6227
6228static int
6229record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6230                int func_id, int insn_idx)
6231{
6232        struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6233        struct bpf_map *map = meta->map_ptr;
6234
6235        if (func_id != BPF_FUNC_tail_call &&
6236            func_id != BPF_FUNC_map_lookup_elem &&
6237            func_id != BPF_FUNC_map_update_elem &&
6238            func_id != BPF_FUNC_map_delete_elem &&
6239            func_id != BPF_FUNC_map_push_elem &&
6240            func_id != BPF_FUNC_map_pop_elem &&
6241            func_id != BPF_FUNC_map_peek_elem &&
6242            func_id != BPF_FUNC_for_each_map_elem &&
6243            func_id != BPF_FUNC_redirect_map)
6244                return 0;
6245
6246        if (map == NULL) {
6247                verbose(env, "kernel subsystem misconfigured verifier\n");
6248                return -EINVAL;
6249        }
6250
6251        /* In case of read-only, some additional restrictions
6252         * need to be applied in order to prevent altering the
6253         * state of the map from program side.
6254         */
6255        if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6256            (func_id == BPF_FUNC_map_delete_elem ||
6257             func_id == BPF_FUNC_map_update_elem ||
6258             func_id == BPF_FUNC_map_push_elem ||
6259             func_id == BPF_FUNC_map_pop_elem)) {
6260                verbose(env, "write into map forbidden\n");
6261                return -EACCES;
6262        }
6263
6264        if (!BPF_MAP_PTR(aux->map_ptr_state))
6265                bpf_map_ptr_store(aux, meta->map_ptr,
6266                                  !meta->map_ptr->bypass_spec_v1);
6267        else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
6268                bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
6269                                  !meta->map_ptr->bypass_spec_v1);
6270        return 0;
6271}
6272
6273static int
6274record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6275                int func_id, int insn_idx)
6276{
6277        struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6278        struct bpf_reg_state *regs = cur_regs(env), *reg;
6279        struct bpf_map *map = meta->map_ptr;
6280        struct tnum range;
6281        u64 val;
6282        int err;
6283
6284        if (func_id != BPF_FUNC_tail_call)
6285                return 0;
6286        if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6287                verbose(env, "kernel subsystem misconfigured verifier\n");
6288                return -EINVAL;
6289        }
6290
6291        range = tnum_range(0, map->max_entries - 1);
6292        reg = &regs[BPF_REG_3];
6293
6294        if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6295                bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6296                return 0;
6297        }
6298
6299        err = mark_chain_precision(env, BPF_REG_3);
6300        if (err)
6301                return err;
6302
6303        val = reg->var_off.value;
6304        if (bpf_map_key_unseen(aux))
6305                bpf_map_key_store(aux, val);
6306        else if (!bpf_map_key_poisoned(aux) &&
6307                  bpf_map_key_immediate(aux) != val)
6308                bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6309        return 0;
6310}
6311
6312static int check_reference_leak(struct bpf_verifier_env *env)
6313{
6314        struct bpf_func_state *state = cur_func(env);
6315        int i;
6316
6317        for (i = 0; i < state->acquired_refs; i++) {
6318                verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6319                        state->refs[i].id, state->refs[i].insn_idx);
6320        }
6321        return state->acquired_refs ? -EINVAL : 0;
6322}
6323
6324static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6325                                   struct bpf_reg_state *regs)
6326{
6327        struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6328        struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6329        struct bpf_map *fmt_map = fmt_reg->map_ptr;
6330        int err, fmt_map_off, num_args;
6331        u64 fmt_addr;
6332        char *fmt;
6333
6334        /* data must be an array of u64 */
6335        if (data_len_reg->var_off.value % 8)
6336                return -EINVAL;
6337        num_args = data_len_reg->var_off.value / 8;
6338
6339        /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6340         * and map_direct_value_addr is set.
6341         */
6342        fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6343        err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6344                                                  fmt_map_off);
6345        if (err) {
6346                verbose(env, "verifier bug\n");
6347                return -EFAULT;
6348        }
6349        fmt = (char *)(long)fmt_addr + fmt_map_off;
6350
6351        /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6352         * can focus on validating the format specifiers.
6353         */
6354        err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
6355        if (err < 0)
6356                verbose(env, "Invalid format string\n");
6357
6358        return err;
6359}
6360
6361static int check_get_func_ip(struct bpf_verifier_env *env)
6362{
6363        enum bpf_attach_type eatype = env->prog->expected_attach_type;
6364        enum bpf_prog_type type = resolve_prog_type(env->prog);
6365        int func_id = BPF_FUNC_get_func_ip;
6366
6367        if (type == BPF_PROG_TYPE_TRACING) {
6368                if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
6369                    eatype != BPF_MODIFY_RETURN) {
6370                        verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6371                                func_id_name(func_id), func_id);
6372                        return -ENOTSUPP;
6373                }
6374                return 0;
6375        } else if (type == BPF_PROG_TYPE_KPROBE) {
6376                return 0;
6377        }
6378
6379        verbose(env, "func %s#%d not supported for program type %d\n",
6380                func_id_name(func_id), func_id, type);
6381        return -ENOTSUPP;
6382}
6383
6384static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6385                             int *insn_idx_p)
6386{
6387        const struct bpf_func_proto *fn = NULL;
6388        struct bpf_reg_state *regs;
6389        struct bpf_call_arg_meta meta;
6390        int insn_idx = *insn_idx_p;
6391        bool changes_data;
6392        int i, err, func_id;
6393
6394        /* find function prototype */
6395        func_id = insn->imm;
6396        if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
6397                verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6398                        func_id);
6399                return -EINVAL;
6400        }
6401
6402        if (env->ops->get_func_proto)
6403                fn = env->ops->get_func_proto(func_id, env->prog);
6404        if (!fn) {
6405                verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6406                        func_id);
6407                return -EINVAL;
6408        }
6409
6410        /* eBPF programs must be GPL compatible to use GPL-ed functions */
6411        if (!env->prog->gpl_compatible && fn->gpl_only) {
6412                verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
6413                return -EINVAL;
6414        }
6415
6416        if (fn->allowed && !fn->allowed(env->prog)) {
6417                verbose(env, "helper call is not allowed in probe\n");
6418                return -EINVAL;
6419        }
6420
6421        /* With LD_ABS/IND some JITs save/restore skb from r1. */
6422        changes_data = bpf_helper_changes_pkt_data(fn->func);
6423        if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6424                verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6425                        func_id_name(func_id), func_id);
6426                return -EINVAL;
6427        }
6428
6429        memset(&meta, 0, sizeof(meta));
6430        meta.pkt_access = fn->pkt_access;
6431
6432        err = check_func_proto(fn, func_id);
6433        if (err) {
6434                verbose(env, "kernel subsystem misconfigured func %s#%d\n",
6435                        func_id_name(func_id), func_id);
6436                return err;
6437        }
6438
6439        meta.func_id = func_id;
6440        /* check args */
6441        for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
6442                err = check_func_arg(env, i, &meta, fn);
6443                if (err)
6444                        return err;
6445        }
6446
6447        err = record_func_map(env, &meta, func_id, insn_idx);
6448        if (err)
6449                return err;
6450
6451        err = record_func_key(env, &meta, func_id, insn_idx);
6452        if (err)
6453                return err;
6454
6455        /* Mark slots with STACK_MISC in case of raw mode, stack offset
6456         * is inferred from register state.
6457         */
6458        for (i = 0; i < meta.access_size; i++) {
6459                err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6460                                       BPF_WRITE, -1, false);
6461                if (err)
6462                        return err;
6463        }
6464
6465        if (func_id == BPF_FUNC_tail_call) {
6466                err = check_reference_leak(env);
6467                if (err) {
6468                        verbose(env, "tail_call would lead to reference leak\n");
6469                        return err;
6470                }
6471        } else if (is_release_function(func_id)) {
6472                err = release_reference(env, meta.ref_obj_id);
6473                if (err) {
6474                        verbose(env, "func %s#%d reference has not been acquired before\n",
6475                                func_id_name(func_id), func_id);
6476                        return err;
6477                }
6478        }
6479
6480        regs = cur_regs(env);
6481
6482        /* check that flags argument in get_local_storage(map, flags) is 0,
6483         * this is required because get_local_storage() can't return an error.
6484         */
6485        if (func_id == BPF_FUNC_get_local_storage &&
6486            !register_is_null(&regs[BPF_REG_2])) {
6487                verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6488                return -EINVAL;
6489        }
6490
6491        if (func_id == BPF_FUNC_for_each_map_elem) {
6492                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6493                                        set_map_elem_callback_state);
6494                if (err < 0)
6495                        return -EINVAL;
6496        }
6497
6498        if (func_id == BPF_FUNC_timer_set_callback) {
6499                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6500                                        set_timer_callback_state);
6501                if (err < 0)
6502                        return -EINVAL;
6503        }
6504
6505        if (func_id == BPF_FUNC_snprintf) {
6506                err = check_bpf_snprintf_call(env, regs);
6507                if (err < 0)
6508                        return err;
6509        }
6510
6511        /* reset caller saved regs */
6512        for (i = 0; i < CALLER_SAVED_REGS; i++) {
6513                mark_reg_not_init(env, regs, caller_saved[i]);
6514                check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6515        }
6516
6517        /* helper call returns 64-bit value. */
6518        regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6519
6520        /* update return register (already marked as written above) */
6521        if (fn->ret_type == RET_INTEGER) {
6522                /* sets type to SCALAR_VALUE */
6523                mark_reg_unknown(env, regs, BPF_REG_0);
6524        } else if (fn->ret_type == RET_VOID) {
6525                regs[BPF_REG_0].type = NOT_INIT;
6526        } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
6527                   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6528                /* There is no offset yet applied, variable or fixed */
6529                mark_reg_known_zero(env, regs, BPF_REG_0);
6530                /* remember map_ptr, so that check_map_access()
6531                 * can check 'value_size' boundary of memory access
6532                 * to map element returned from bpf_map_lookup_elem()
6533                 */
6534                if (meta.map_ptr == NULL) {
6535                        verbose(env,
6536                                "kernel subsystem misconfigured verifier\n");
6537                        return -EINVAL;
6538                }
6539                regs[BPF_REG_0].map_ptr = meta.map_ptr;
6540                regs[BPF_REG_0].map_uid = meta.map_uid;
6541                if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6542                        regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
6543                        if (map_value_has_spin_lock(meta.map_ptr))
6544                                regs[BPF_REG_0].id = ++env->id_gen;
6545                } else {
6546                        regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
6547                }
6548        } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
6549                mark_reg_known_zero(env, regs, BPF_REG_0);
6550                regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
6551        } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
6552                mark_reg_known_zero(env, regs, BPF_REG_0);
6553                regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
6554        } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
6555                mark_reg_known_zero(env, regs, BPF_REG_0);
6556                regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
6557        } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
6558                mark_reg_known_zero(env, regs, BPF_REG_0);
6559                regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
6560                regs[BPF_REG_0].mem_size = meta.mem_size;
6561        } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
6562                   fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
6563                const struct btf_type *t;
6564
6565                mark_reg_known_zero(env, regs, BPF_REG_0);
6566                t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
6567                if (!btf_type_is_struct(t)) {
6568                        u32 tsize;
6569                        const struct btf_type *ret;
6570                        const char *tname;
6571
6572                        /* resolve the type size of ksym. */
6573                        ret = btf_resolve_size(meta.ret_btf, t, &tsize);
6574                        if (IS_ERR(ret)) {
6575                                tname = btf_name_by_offset(meta.ret_btf, t->name_off);
6576                                verbose(env, "unable to resolve the size of type '%s': %ld\n",
6577                                        tname, PTR_ERR(ret));
6578                                return -EINVAL;
6579                        }
6580                        regs[BPF_REG_0].type =
6581                                fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6582                                PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
6583                        regs[BPF_REG_0].mem_size = tsize;
6584                } else {
6585                        regs[BPF_REG_0].type =
6586                                fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6587                                PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
6588                        regs[BPF_REG_0].btf = meta.ret_btf;
6589                        regs[BPF_REG_0].btf_id = meta.ret_btf_id;
6590                }
6591        } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
6592                   fn->ret_type == RET_PTR_TO_BTF_ID) {
6593                int ret_btf_id;
6594
6595                mark_reg_known_zero(env, regs, BPF_REG_0);
6596                regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
6597                                                     PTR_TO_BTF_ID :
6598                                                     PTR_TO_BTF_ID_OR_NULL;
6599                ret_btf_id = *fn->ret_btf_id;
6600                if (ret_btf_id == 0) {
6601                        verbose(env, "invalid return type %d of func %s#%d\n",
6602                                fn->ret_type, func_id_name(func_id), func_id);
6603                        return -EINVAL;
6604                }
6605                /* current BPF helper definitions are only coming from
6606                 * built-in code with type IDs from  vmlinux BTF
6607                 */
6608                regs[BPF_REG_0].btf = btf_vmlinux;
6609                regs[BPF_REG_0].btf_id = ret_btf_id;
6610        } else {
6611                verbose(env, "unknown return type %d of func %s#%d\n",
6612                        fn->ret_type, func_id_name(func_id), func_id);
6613                return -EINVAL;
6614        }
6615
6616        if (reg_type_may_be_null(regs[BPF_REG_0].type))
6617                regs[BPF_REG_0].id = ++env->id_gen;
6618
6619        if (is_ptr_cast_function(func_id)) {
6620                /* For release_reference() */
6621                regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
6622        } else if (is_acquire_function(func_id, meta.map_ptr)) {
6623                int id = acquire_reference_state(env, insn_idx);
6624
6625                if (id < 0)
6626                        return id;
6627                /* For mark_ptr_or_null_reg() */
6628                regs[BPF_REG_0].id = id;
6629                /* For release_reference() */
6630                regs[BPF_REG_0].ref_obj_id = id;
6631        }
6632
6633        do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
6634
6635        err = check_map_func_compatibility(env, meta.map_ptr, func_id);
6636        if (err)
6637                return err;
6638
6639        if ((func_id == BPF_FUNC_get_stack ||
6640             func_id == BPF_FUNC_get_task_stack) &&
6641            !env->prog->has_callchain_buf) {
6642                const char *err_str;
6643
6644#ifdef CONFIG_PERF_EVENTS
6645                err = get_callchain_buffers(sysctl_perf_event_max_stack);
6646                err_str = "cannot get callchain buffer for func %s#%d\n";
6647#else
6648                err = -ENOTSUPP;
6649                err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
6650#endif
6651                if (err) {
6652                        verbose(env, err_str, func_id_name(func_id), func_id);
6653                        return err;
6654                }
6655
6656                env->prog->has_callchain_buf = true;
6657        }
6658
6659        if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
6660                env->prog->call_get_stack = true;
6661
6662        if (func_id == BPF_FUNC_get_func_ip) {
6663                if (check_get_func_ip(env))
6664                        return -ENOTSUPP;
6665                env->prog->call_get_func_ip = true;
6666        }
6667
6668        if (changes_data)
6669                clear_all_pkt_pointers(env);
6670        return 0;
6671}
6672
6673/* mark_btf_func_reg_size() is used when the reg size is determined by
6674 * the BTF func_proto's return value size and argument.
6675 */
6676static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
6677                                   size_t reg_size)
6678{
6679        struct bpf_reg_state *reg = &cur_regs(env)[regno];
6680
6681        if (regno == BPF_REG_0) {
6682                /* Function return value */
6683                reg->live |= REG_LIVE_WRITTEN;
6684                reg->subreg_def = reg_size == sizeof(u64) ?
6685                        DEF_NOT_SUBREG : env->insn_idx + 1;
6686        } else {
6687                /* Function argument */
6688                if (reg_size == sizeof(u64)) {
6689                        mark_insn_zext(env, reg);
6690                        mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
6691                } else {
6692                        mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
6693                }
6694        }
6695}
6696
6697static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
6698{
6699        const struct btf_type *t, *func, *func_proto, *ptr_type;
6700        struct bpf_reg_state *regs = cur_regs(env);
6701        const char *func_name, *ptr_type_name;
6702        u32 i, nargs, func_id, ptr_type_id;
6703        struct module *btf_mod = NULL;
6704        const struct btf_param *args;
6705        struct btf *desc_btf;
6706        int err;
6707
6708        /* skip for now, but return error when we find this in fixup_kfunc_call */
6709        if (!insn->imm)
6710                return 0;
6711
6712        desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
6713        if (IS_ERR(desc_btf))
6714                return PTR_ERR(desc_btf);
6715
6716        func_id = insn->imm;
6717        func = btf_type_by_id(desc_btf, func_id);
6718        func_name = btf_name_by_offset(desc_btf, func->name_off);
6719        func_proto = btf_type_by_id(desc_btf, func->type);
6720
6721        if (!env->ops->check_kfunc_call ||
6722            !env->ops->check_kfunc_call(func_id, btf_mod)) {
6723                verbose(env, "calling kernel function %s is not allowed\n",
6724                        func_name);
6725                return -EACCES;
6726        }
6727
6728        /* Check the arguments */
6729        err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
6730        if (err)
6731                return err;
6732
6733        for (i = 0; i < CALLER_SAVED_REGS; i++)
6734                mark_reg_not_init(env, regs, caller_saved[i]);
6735
6736        /* Check return type */
6737        t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
6738        if (btf_type_is_scalar(t)) {
6739                mark_reg_unknown(env, regs, BPF_REG_0);
6740                mark_btf_func_reg_size(env, BPF_REG_0, t->size);
6741        } else if (btf_type_is_ptr(t)) {
6742                ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
6743                                                   &ptr_type_id);
6744                if (!btf_type_is_struct(ptr_type)) {
6745                        ptr_type_name = btf_name_by_offset(desc_btf,
6746                                                           ptr_type->name_off);
6747                        verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
6748                                func_name, btf_type_str(ptr_type),
6749                                ptr_type_name);
6750                        return -EINVAL;
6751                }
6752                mark_reg_known_zero(env, regs, BPF_REG_0);
6753                regs[BPF_REG_0].btf = desc_btf;
6754                regs[BPF_REG_0].type = PTR_TO_BTF_ID;
6755                regs[BPF_REG_0].btf_id = ptr_type_id;
6756                mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
6757        } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
6758
6759        nargs = btf_type_vlen(func_proto);
6760        args = (const struct btf_param *)(func_proto + 1);
6761        for (i = 0; i < nargs; i++) {
6762                u32 regno = i + 1;
6763
6764                t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
6765                if (btf_type_is_ptr(t))
6766                        mark_btf_func_reg_size(env, regno, sizeof(void *));
6767                else
6768                        /* scalar. ensured by btf_check_kfunc_arg_match() */
6769                        mark_btf_func_reg_size(env, regno, t->size);
6770        }
6771
6772        return 0;
6773}
6774
6775static bool signed_add_overflows(s64 a, s64 b)
6776{
6777        /* Do the add in u64, where overflow is well-defined */
6778        s64 res = (s64)((u64)a + (u64)b);
6779
6780        if (b < 0)
6781                return res > a;
6782        return res < a;
6783}
6784
6785static bool signed_add32_overflows(s32 a, s32 b)
6786{
6787        /* Do the add in u32, where overflow is well-defined */
6788        s32 res = (s32)((u32)a + (u32)b);
6789
6790        if (b < 0)
6791                return res > a;
6792        return res < a;
6793}
6794
6795static bool signed_sub_overflows(s64 a, s64 b)
6796{
6797        /* Do the sub in u64, where overflow is well-defined */
6798        s64 res = (s64)((u64)a - (u64)b);
6799
6800        if (b < 0)
6801                return res < a;
6802        return res > a;
6803}
6804
6805static bool signed_sub32_overflows(s32 a, s32 b)
6806{
6807        /* Do the sub in u32, where overflow is well-defined */
6808        s32 res = (s32)((u32)a - (u32)b);
6809
6810        if (b < 0)
6811                return res < a;
6812        return res > a;
6813}
6814
6815static bool check_reg_sane_offset(struct bpf_verifier_env *env,
6816                                  const struct bpf_reg_state *reg,
6817                                  enum bpf_reg_type type)
6818{
6819        bool known = tnum_is_const(reg->var_off);
6820        s64 val = reg->var_off.value;
6821        s64 smin = reg->smin_value;
6822
6823        if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
6824                verbose(env, "math between %s pointer and %lld is not allowed\n",
6825                        reg_type_str[type], val);
6826                return false;
6827        }
6828
6829        if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
6830                verbose(env, "%s pointer offset %d is not allowed\n",
6831                        reg_type_str[type], reg->off);
6832                return false;
6833        }
6834
6835        if (smin == S64_MIN) {
6836                verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
6837                        reg_type_str[type]);
6838                return false;
6839        }
6840
6841        if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
6842                verbose(env, "value %lld makes %s pointer be out of bounds\n",
6843                        smin, reg_type_str[type]);
6844                return false;
6845        }
6846
6847        return true;
6848}
6849
6850static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
6851{
6852        return &env->insn_aux_data[env->insn_idx];
6853}
6854
6855enum {
6856        REASON_BOUNDS   = -1,
6857        REASON_TYPE     = -2,
6858        REASON_PATHS    = -3,
6859        REASON_LIMIT    = -4,
6860        REASON_STACK    = -5,
6861};
6862
6863static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
6864                              u32 *alu_limit, bool mask_to_left)
6865{
6866        u32 max = 0, ptr_limit = 0;
6867
6868        switch (ptr_reg->type) {
6869        case PTR_TO_STACK:
6870                /* Offset 0 is out-of-bounds, but acceptable start for the
6871                 * left direction, see BPF_REG_FP. Also, unknown scalar
6872                 * offset where we would need to deal with min/max bounds is
6873                 * currently prohibited for unprivileged.
6874                 */
6875                max = MAX_BPF_STACK + mask_to_left;
6876                ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
6877                break;
6878        case PTR_TO_MAP_VALUE:
6879                max = ptr_reg->map_ptr->value_size;
6880                ptr_limit = (mask_to_left ?
6881                             ptr_reg->smin_value :
6882                             ptr_reg->umax_value) + ptr_reg->off;
6883                break;
6884        default:
6885                return REASON_TYPE;
6886        }
6887
6888        if (ptr_limit >= max)
6889                return REASON_LIMIT;
6890        *alu_limit = ptr_limit;
6891        return 0;
6892}
6893
6894static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
6895                                    const struct bpf_insn *insn)
6896{
6897        return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
6898}
6899
6900static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
6901                                       u32 alu_state, u32 alu_limit)
6902{
6903        /* If we arrived here from different branches with different
6904         * state or limits to sanitize, then this won't work.
6905         */
6906        if (aux->alu_state &&
6907            (aux->alu_state != alu_state ||
6908             aux->alu_limit != alu_limit))
6909                return REASON_PATHS;
6910
6911        /* Corresponding fixup done in do_misc_fixups(). */
6912        aux->alu_state = alu_state;
6913        aux->alu_limit = alu_limit;
6914        return 0;
6915}
6916
6917static int sanitize_val_alu(struct bpf_verifier_env *env,
6918                            struct bpf_insn *insn)
6919{
6920        struct bpf_insn_aux_data *aux = cur_aux(env);
6921
6922        if (can_skip_alu_sanitation(env, insn))
6923                return 0;
6924
6925        return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
6926}
6927
6928static bool sanitize_needed(u8 opcode)
6929{
6930        return opcode == BPF_ADD || opcode == BPF_SUB;
6931}
6932
6933struct bpf_sanitize_info {
6934        struct bpf_insn_aux_data aux;
6935        bool mask_to_left;
6936};
6937
6938static struct bpf_verifier_state *
6939sanitize_speculative_path(struct bpf_verifier_env *env,
6940                          const struct bpf_insn *insn,
6941                          u32 next_idx, u32 curr_idx)
6942{
6943        struct bpf_verifier_state *branch;
6944        struct bpf_reg_state *regs;
6945
6946        branch = push_stack(env, next_idx, curr_idx, true);
6947        if (branch && insn) {
6948                regs = branch->frame[branch->curframe]->regs;
6949                if (BPF_SRC(insn->code) == BPF_K) {
6950                        mark_reg_unknown(env, regs, insn->dst_reg);
6951                } else if (BPF_SRC(insn->code) == BPF_X) {
6952                        mark_reg_unknown(env, regs, insn->dst_reg);
6953                        mark_reg_unknown(env, regs, insn->src_reg);
6954                }
6955        }
6956        return branch;
6957}
6958
6959static int sanitize_ptr_alu(struct bpf_verifier_env *env,
6960                            struct bpf_insn *insn,
6961                            const struct bpf_reg_state *ptr_reg,
6962                            const struct bpf_reg_state *off_reg,
6963                            struct bpf_reg_state *dst_reg,
6964                            struct bpf_sanitize_info *info,
6965                            const bool commit_window)
6966{
6967        struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
6968        struct bpf_verifier_state *vstate = env->cur_state;
6969        bool off_is_imm = tnum_is_const(off_reg->var_off);
6970        bool off_is_neg = off_reg->smin_value < 0;
6971        bool ptr_is_dst_reg = ptr_reg == dst_reg;
6972        u8 opcode = BPF_OP(insn->code);
6973        u32 alu_state, alu_limit;
6974        struct bpf_reg_state tmp;
6975        bool ret;
6976        int err;
6977
6978        if (can_skip_alu_sanitation(env, insn))
6979                return 0;
6980
6981        /* We already marked aux for masking from non-speculative
6982         * paths, thus we got here in the first place. We only care
6983         * to explore bad access from here.
6984         */
6985        if (vstate->speculative)
6986                goto do_sim;
6987
6988        if (!commit_window) {
6989                if (!tnum_is_const(off_reg->var_off) &&
6990                    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
6991                        return REASON_BOUNDS;
6992
6993                info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
6994                                     (opcode == BPF_SUB && !off_is_neg);
6995        }
6996
6997        err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
6998        if (err < 0)
6999                return err;
7000
7001        if (commit_window) {
7002                /* In commit phase we narrow the masking window based on
7003                 * the observed pointer move after the simulated operation.
7004                 */
7005                alu_state = info->aux.alu_state;
7006                alu_limit = abs(info->aux.alu_limit - alu_limit);
7007        } else {
7008                alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
7009                alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7010                alu_state |= ptr_is_dst_reg ?
7011                             BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
7012
7013                /* Limit pruning on unknown scalars to enable deep search for
7014                 * potential masking differences from other program paths.
7015                 */
7016                if (!off_is_imm)
7017                        env->explore_alu_limits = true;
7018        }
7019
7020        err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7021        if (err < 0)
7022                return err;
7023do_sim:
7024        /* If we're in commit phase, we're done here given we already
7025         * pushed the truncated dst_reg into the speculative verification
7026         * stack.
7027         *
7028         * Also, when register is a known constant, we rewrite register-based
7029         * operation to immediate-based, and thus do not need masking (and as
7030         * a consequence, do not need to simulate the zero-truncation either).
7031         */
7032        if (commit_window || off_is_imm)
7033                return 0;
7034
7035        /* Simulate and find potential out-of-bounds access under
7036         * speculative execution from truncation as a result of
7037         * masking when off was not within expected range. If off
7038         * sits in dst, then we temporarily need to move ptr there
7039         * to simulate dst (== 0) +/-= ptr. Needed, for example,
7040         * for cases where we use K-based arithmetic in one direction
7041         * and truncated reg-based in the other in order to explore
7042         * bad access.
7043         */
7044        if (!ptr_is_dst_reg) {
7045                tmp = *dst_reg;
7046                *dst_reg = *ptr_reg;
7047        }
7048        ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7049                                        env->insn_idx);
7050        if (!ptr_is_dst_reg && ret)
7051                *dst_reg = tmp;
7052        return !ret ? REASON_STACK : 0;
7053}
7054
7055static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7056{
7057        struct bpf_verifier_state *vstate = env->cur_state;
7058
7059        /* If we simulate paths under speculation, we don't update the
7060         * insn as 'seen' such that when we verify unreachable paths in
7061         * the non-speculative domain, sanitize_dead_code() can still
7062         * rewrite/sanitize them.
7063         */
7064        if (!vstate->speculative)
7065                env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7066}
7067
7068static int sanitize_err(struct bpf_verifier_env *env,
7069                        const struct bpf_insn *insn, int reason,
7070                        const struct bpf_reg_state *off_reg,
7071                        const struct bpf_reg_state *dst_reg)
7072{
7073        static const char *err = "pointer arithmetic with it prohibited for !root";
7074        const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7075        u32 dst = insn->dst_reg, src = insn->src_reg;
7076
7077        switch (reason) {
7078        case REASON_BOUNDS:
7079                verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7080                        off_reg == dst_reg ? dst : src, err);
7081                break;
7082        case REASON_TYPE:
7083                verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7084                        off_reg == dst_reg ? src : dst, err);
7085                break;
7086        case REASON_PATHS:
7087                verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7088                        dst, op, err);
7089                break;
7090        case REASON_LIMIT:
7091                verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7092                        dst, op, err);
7093                break;
7094        case REASON_STACK:
7095                verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7096                        dst, err);
7097                break;
7098        default:
7099                verbose(env, "verifier internal error: unknown reason (%d)\n",
7100                        reason);
7101                break;
7102        }
7103
7104        return -EACCES;
7105}
7106
7107/* check that stack access falls within stack limits and that 'reg' doesn't
7108 * have a variable offset.
7109 *
7110 * Variable offset is prohibited for unprivileged mode for simplicity since it
7111 * requires corresponding support in Spectre masking for stack ALU.  See also
7112 * retrieve_ptr_limit().
7113 *
7114 *
7115 * 'off' includes 'reg->off'.
7116 */
7117static int check_stack_access_for_ptr_arithmetic(
7118                                struct bpf_verifier_env *env,
7119                                int regno,
7120                                const struct bpf_reg_state *reg,
7121                                int off)
7122{
7123        if (!tnum_is_const(reg->var_off)) {
7124                char tn_buf[48];
7125
7126                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7127                verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7128                        regno, tn_buf, off);
7129                return -EACCES;
7130        }
7131
7132        if (off >= 0 || off < -MAX_BPF_STACK) {
7133                verbose(env, "R%d stack pointer arithmetic goes out of range, "
7134                        "prohibited for !root; off=%d\n", regno, off);
7135                return -EACCES;
7136        }
7137
7138        return 0;
7139}
7140
7141static int sanitize_check_bounds(struct bpf_verifier_env *env,
7142                                 const struct bpf_insn *insn,
7143                                 const struct bpf_reg_state *dst_reg)
7144{
7145        u32 dst = insn->dst_reg;
7146
7147        /* For unprivileged we require that resulting offset must be in bounds
7148         * in order to be able to sanitize access later on.
7149         */
7150        if (env->bypass_spec_v1)
7151                return 0;
7152
7153        switch (dst_reg->type) {
7154        case PTR_TO_STACK:
7155                if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7156                                        dst_reg->off + dst_reg->var_off.value))
7157                        return -EACCES;
7158                break;
7159        case PTR_TO_MAP_VALUE:
7160                if (check_map_access(env, dst, dst_reg->off, 1, false)) {
7161                        verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7162                                "prohibited for !root\n", dst);
7163                        return -EACCES;
7164                }
7165                break;
7166        default:
7167                break;
7168        }
7169
7170        return 0;
7171}
7172
7173/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
7174 * Caller should also handle BPF_MOV case separately.
7175 * If we return -EACCES, caller may want to try again treating pointer as a
7176 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
7177 */
7178static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7179                                   struct bpf_insn *insn,
7180                                   const struct bpf_reg_state *ptr_reg,
7181                                   const struct bpf_reg_state *off_reg)
7182{
7183        struct bpf_verifier_state *vstate = env->cur_state;
7184        struct bpf_func_state *state = vstate->frame[vstate->curframe];
7185        struct bpf_reg_state *regs = state->regs, *dst_reg;
7186        bool known = tnum_is_const(off_reg->var_off);
7187        s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7188            smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7189        u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7190            umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
7191        struct bpf_sanitize_info info = {};
7192        u8 opcode = BPF_OP(insn->code);
7193        u32 dst = insn->dst_reg;
7194        int ret;
7195
7196        dst_reg = &regs[dst];
7197
7198        if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7199            smin_val > smax_val || umin_val > umax_val) {
7200                /* Taint dst register if offset had invalid bounds derived from
7201                 * e.g. dead branches.
7202                 */
7203                __mark_reg_unknown(env, dst_reg);
7204                return 0;
7205        }
7206
7207        if (BPF_CLASS(insn->code) != BPF_ALU64) {
7208                /* 32-bit ALU ops on pointers produce (meaningless) scalars */
7209                if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7210                        __mark_reg_unknown(env, dst_reg);
7211                        return 0;
7212                }
7213
7214                verbose(env,
7215                        "R%d 32-bit pointer arithmetic prohibited\n",
7216                        dst);
7217                return -EACCES;
7218        }
7219
7220        switch (ptr_reg->type) {
7221        case PTR_TO_MAP_VALUE_OR_NULL:
7222                verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
7223                        dst, reg_type_str[ptr_reg->type]);
7224                return -EACCES;
7225        case CONST_PTR_TO_MAP:
7226                /* smin_val represents the known value */
7227                if (known && smin_val == 0 && opcode == BPF_ADD)
7228                        break;
7229                fallthrough;
7230        case PTR_TO_PACKET_END:
7231        case PTR_TO_SOCKET:
7232        case PTR_TO_SOCKET_OR_NULL:
7233        case PTR_TO_SOCK_COMMON:
7234        case PTR_TO_SOCK_COMMON_OR_NULL:
7235        case PTR_TO_TCP_SOCK:
7236        case PTR_TO_TCP_SOCK_OR_NULL:
7237        case PTR_TO_XDP_SOCK:
7238                verbose(env, "R%d pointer arithmetic on %s prohibited\n",
7239                        dst, reg_type_str[ptr_reg->type]);
7240                return -EACCES;
7241        default:
7242                break;
7243        }
7244
7245        /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7246         * The id may be overwritten later if we create a new variable offset.
7247         */
7248        dst_reg->type = ptr_reg->type;
7249        dst_reg->id = ptr_reg->id;
7250
7251        if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7252            !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7253                return -EINVAL;
7254
7255        /* pointer types do not carry 32-bit bounds at the moment. */
7256        __mark_reg32_unbounded(dst_reg);
7257
7258        if (sanitize_needed(opcode)) {
7259                ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
7260                                       &info, false);
7261                if (ret < 0)
7262                        return sanitize_err(env, insn, ret, off_reg, dst_reg);
7263        }
7264
7265        switch (opcode) {
7266        case BPF_ADD:
7267                /* We can take a fixed offset as long as it doesn't overflow
7268                 * the s32 'off' field
7269                 */
7270                if (known && (ptr_reg->off + smin_val ==
7271                              (s64)(s32)(ptr_reg->off + smin_val))) {
7272                        /* pointer += K.  Accumulate it into fixed offset */
7273                        dst_reg->smin_value = smin_ptr;
7274                        dst_reg->smax_value = smax_ptr;
7275                        dst_reg->umin_value = umin_ptr;
7276                        dst_reg->umax_value = umax_ptr;
7277                        dst_reg->var_off = ptr_reg->var_off;
7278                        dst_reg->off = ptr_reg->off + smin_val;
7279                        dst_reg->raw = ptr_reg->raw;
7280                        break;
7281                }
7282                /* A new variable offset is created.  Note that off_reg->off
7283                 * == 0, since it's a scalar.
7284                 * dst_reg gets the pointer type and since some positive
7285                 * integer value was added to the pointer, give it a new 'id'
7286                 * if it's a PTR_TO_PACKET.
7287                 * this creates a new 'base' pointer, off_reg (variable) gets
7288                 * added into the variable offset, and we copy the fixed offset
7289                 * from ptr_reg.
7290                 */
7291                if (signed_add_overflows(smin_ptr, smin_val) ||
7292                    signed_add_overflows(smax_ptr, smax_val)) {
7293                        dst_reg->smin_value = S64_MIN;
7294                        dst_reg->smax_value = S64_MAX;
7295                } else {
7296                        dst_reg->smin_value = smin_ptr + smin_val;
7297                        dst_reg->smax_value = smax_ptr + smax_val;
7298                }
7299                if (umin_ptr + umin_val < umin_ptr ||
7300                    umax_ptr + umax_val < umax_ptr) {
7301                        dst_reg->umin_value = 0;
7302                        dst_reg->umax_value = U64_MAX;
7303                } else {
7304                        dst_reg->umin_value = umin_ptr + umin_val;
7305                        dst_reg->umax_value = umax_ptr + umax_val;
7306                }
7307                dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7308                dst_reg->off = ptr_reg->off;
7309                dst_reg->raw = ptr_reg->raw;
7310                if (reg_is_pkt_pointer(ptr_reg)) {
7311                        dst_reg->id = ++env->id_gen;
7312                        /* something was added to pkt_ptr, set range to zero */
7313                        memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7314                }
7315                break;
7316        case BPF_SUB:
7317                if (dst_reg == off_reg) {
7318                        /* scalar -= pointer.  Creates an unknown scalar */
7319                        verbose(env, "R%d tried to subtract pointer from scalar\n",
7320                                dst);
7321                        return -EACCES;
7322                }
7323                /* We don't allow subtraction from FP, because (according to
7324                 * test_verifier.c test "invalid fp arithmetic", JITs might not
7325                 * be able to deal with it.
7326                 */
7327                if (ptr_reg->type == PTR_TO_STACK) {
7328                        verbose(env, "R%d subtraction from stack pointer prohibited\n",
7329                                dst);
7330                        return -EACCES;
7331                }
7332                if (known && (ptr_reg->off - smin_val ==
7333                              (s64)(s32)(ptr_reg->off - smin_val))) {
7334                        /* pointer -= K.  Subtract it from fixed offset */
7335                        dst_reg->smin_value = smin_ptr;
7336                        dst_reg->smax_value = smax_ptr;
7337                        dst_reg->umin_value = umin_ptr;
7338                        dst_reg->umax_value = umax_ptr;
7339                        dst_reg->var_off = ptr_reg->var_off;
7340                        dst_reg->id = ptr_reg->id;
7341                        dst_reg->off = ptr_reg->off - smin_val;
7342                        dst_reg->raw = ptr_reg->raw;
7343                        break;
7344                }
7345                /* A new variable offset is created.  If the subtrahend is known
7346                 * nonnegative, then any reg->range we had before is still good.
7347                 */
7348                if (signed_sub_overflows(smin_ptr, smax_val) ||
7349                    signed_sub_overflows(smax_ptr, smin_val)) {
7350                        /* Overflow possible, we know nothing */
7351                        dst_reg->smin_value = S64_MIN;
7352                        dst_reg->smax_value = S64_MAX;
7353                } else {
7354                        dst_reg->smin_value = smin_ptr - smax_val;
7355                        dst_reg->smax_value = smax_ptr - smin_val;
7356                }
7357                if (umin_ptr < umax_val) {
7358                        /* Overflow possible, we know nothing */
7359                        dst_reg->umin_value = 0;
7360                        dst_reg->umax_value = U64_MAX;
7361                } else {
7362                        /* Cannot overflow (as long as bounds are consistent) */
7363                        dst_reg->umin_value = umin_ptr - umax_val;
7364                        dst_reg->umax_value = umax_ptr - umin_val;
7365                }
7366                dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7367                dst_reg->off = ptr_reg->off;
7368                dst_reg->raw = ptr_reg->raw;
7369                if (reg_is_pkt_pointer(ptr_reg)) {
7370                        dst_reg->id = ++env->id_gen;
7371                        /* something was added to pkt_ptr, set range to zero */
7372                        if (smin_val < 0)
7373                                memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7374                }
7375                break;
7376        case BPF_AND:
7377        case BPF_OR:
7378        case BPF_XOR:
7379                /* bitwise ops on pointers are troublesome, prohibit. */
7380                verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7381                        dst, bpf_alu_string[opcode >> 4]);
7382                return -EACCES;
7383        default:
7384                /* other operators (e.g. MUL,LSH) produce non-pointer results */
7385                verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7386                        dst, bpf_alu_string[opcode >> 4]);
7387                return -EACCES;
7388        }
7389
7390        if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7391                return -EINVAL;
7392
7393        __update_reg_bounds(dst_reg);
7394        __reg_deduce_bounds(dst_reg);
7395        __reg_bound_offset(dst_reg);
7396
7397        if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7398                return -EACCES;
7399        if (sanitize_needed(opcode)) {
7400                ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
7401                                       &info, true);
7402                if (ret < 0)
7403                        return sanitize_err(env, insn, ret, off_reg, dst_reg);
7404        }
7405
7406        return 0;
7407}
7408
7409static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7410                                 struct bpf_reg_state *src_reg)
7411{
7412        s32 smin_val = src_reg->s32_min_value;
7413        s32 smax_val = src_reg->s32_max_value;
7414        u32 umin_val = src_reg->u32_min_value;
7415        u32 umax_val = src_reg->u32_max_value;
7416
7417        if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7418            signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7419                dst_reg->s32_min_value = S32_MIN;
7420                dst_reg->s32_max_value = S32_MAX;
7421        } else {
7422                dst_reg->s32_min_value += smin_val;
7423                dst_reg->s32_max_value += smax_val;
7424        }
7425        if (dst_reg->u32_min_value + umin_val < umin_val ||
7426            dst_reg->u32_max_value + umax_val < umax_val) {
7427                dst_reg->u32_min_value = 0;
7428                dst_reg->u32_max_value = U32_MAX;
7429        } else {
7430                dst_reg->u32_min_value += umin_val;
7431                dst_reg->u32_max_value += umax_val;
7432        }
7433}
7434
7435static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7436                               struct bpf_reg_state *src_reg)
7437{
7438        s64 smin_val = src_reg->smin_value;
7439        s64 smax_val = src_reg->smax_value;
7440        u64 umin_val = src_reg->umin_value;
7441        u64 umax_val = src_reg->umax_value;
7442
7443        if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
7444            signed_add_overflows(dst_reg->smax_value, smax_val)) {
7445                dst_reg->smin_value = S64_MIN;
7446                dst_reg->smax_value = S64_MAX;
7447        } else {
7448                dst_reg->smin_value += smin_val;
7449                dst_reg->smax_value += smax_val;
7450        }
7451        if (dst_reg->umin_value + umin_val < umin_val ||
7452            dst_reg->umax_value + umax_val < umax_val) {
7453                dst_reg->umin_value = 0;
7454                dst_reg->umax_value = U64_MAX;
7455        } else {
7456                dst_reg->umin_value += umin_val;
7457                dst_reg->umax_value += umax_val;
7458        }
7459}
7460
7461static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
7462                                 struct bpf_reg_state *src_reg)
7463{
7464        s32 smin_val = src_reg->s32_min_value;
7465        s32 smax_val = src_reg->s32_max_value;
7466        u32 umin_val = src_reg->u32_min_value;
7467        u32 umax_val = src_reg->u32_max_value;
7468
7469        if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
7470            signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
7471                /* Overflow possible, we know nothing */
7472                dst_reg->s32_min_value = S32_MIN;
7473                dst_reg->s32_max_value = S32_MAX;
7474        } else {
7475                dst_reg->s32_min_value -= smax_val;
7476                dst_reg->s32_max_value -= smin_val;
7477        }
7478        if (dst_reg->u32_min_value < umax_val) {
7479                /* Overflow possible, we know nothing */
7480                dst_reg->u32_min_value = 0;
7481                dst_reg->u32_max_value = U32_MAX;
7482        } else {
7483                /* Cannot overflow (as long as bounds are consistent) */
7484                dst_reg->u32_min_value -= umax_val;
7485                dst_reg->u32_max_value -= umin_val;
7486        }
7487}
7488
7489static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
7490                               struct bpf_reg_state *src_reg)
7491{
7492        s64 smin_val = src_reg->smin_value;
7493        s64 smax_val = src_reg->smax_value;
7494        u64 umin_val = src_reg->umin_value;
7495        u64 umax_val = src_reg->umax_value;
7496
7497        if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
7498            signed_sub_overflows(dst_reg->smax_value, smin_val)) {
7499                /* Overflow possible, we know nothing */
7500                dst_reg->smin_value = S64_MIN;
7501                dst_reg->smax_value = S64_MAX;
7502        } else {
7503                dst_reg->smin_value -= smax_val;
7504                dst_reg->smax_value -= smin_val;
7505        }
7506        if (dst_reg->umin_value < umax_val) {
7507                /* Overflow possible, we know nothing */
7508                dst_reg->umin_value = 0;
7509                dst_reg->umax_value = U64_MAX;
7510        } else {
7511                /* Cannot overflow (as long as bounds are consistent) */
7512                dst_reg->umin_value -= umax_val;
7513                dst_reg->umax_value -= umin_val;
7514        }
7515}
7516
7517static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
7518                                 struct bpf_reg_state *src_reg)
7519{
7520        s32 smin_val = src_reg->s32_min_value;
7521        u32 umin_val = src_reg->u32_min_value;
7522        u32 umax_val = src_reg->u32_max_value;
7523
7524        if (smin_val < 0 || dst_reg->s32_min_value < 0) {
7525                /* Ain't nobody got time to multiply that sign */
7526                __mark_reg32_unbounded(dst_reg);
7527                return;
7528        }
7529        /* Both values are positive, so we can work with unsigned and
7530         * copy the result to signed (unless it exceeds S32_MAX).
7531         */
7532        if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
7533                /* Potential overflow, we know nothing */
7534                __mark_reg32_unbounded(dst_reg);
7535                return;
7536        }
7537        dst_reg->u32_min_value *= umin_val;
7538        dst_reg->u32_max_value *= umax_val;
7539        if (dst_reg->u32_max_value > S32_MAX) {
7540                /* Overflow possible, we know nothing */
7541                dst_reg->s32_min_value = S32_MIN;
7542                dst_reg->s32_max_value = S32_MAX;
7543        } else {
7544                dst_reg->s32_min_value = dst_reg->u32_min_value;
7545                dst_reg->s32_max_value = dst_reg->u32_max_value;
7546        }
7547}
7548
7549static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
7550                               struct bpf_reg_state *src_reg)
7551{
7552        s64 smin_val = src_reg->smin_value;
7553        u64 umin_val = src_reg->umin_value;
7554        u64 umax_val = src_reg->umax_value;
7555
7556        if (smin_val < 0 || dst_reg->smin_value < 0) {
7557                /* Ain't nobody got time to multiply that sign */
7558                __mark_reg64_unbounded(dst_reg);
7559                return;
7560        }
7561        /* Both values are positive, so we can work with unsigned and
7562         * copy the result to signed (unless it exceeds S64_MAX).
7563         */
7564        if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
7565                /* Potential overflow, we know nothing */
7566                __mark_reg64_unbounded(dst_reg);
7567                return;
7568        }
7569        dst_reg->umin_value *= umin_val;
7570        dst_reg->umax_value *= umax_val;
7571        if (dst_reg->umax_value > S64_MAX) {
7572                /* Overflow possible, we know nothing */
7573                dst_reg->smin_value = S64_MIN;
7574                dst_reg->smax_value = S64_MAX;
7575        } else {
7576                dst_reg->smin_value = dst_reg->umin_value;
7577                dst_reg->smax_value = dst_reg->umax_value;
7578        }
7579}
7580
7581static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
7582                                 struct bpf_reg_state *src_reg)
7583{
7584        bool src_known = tnum_subreg_is_const(src_reg->var_off);
7585        bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7586        struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7587        s32 smin_val = src_reg->s32_min_value;
7588        u32 umax_val = src_reg->u32_max_value;
7589
7590        if (src_known && dst_known) {
7591                __mark_reg32_known(dst_reg, var32_off.value);
7592                return;
7593        }
7594
7595        /* We get our minimum from the var_off, since that's inherently
7596         * bitwise.  Our maximum is the minimum of the operands' maxima.
7597         */
7598        dst_reg->u32_min_value = var32_off.value;
7599        dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
7600        if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7601                /* Lose signed bounds when ANDing negative numbers,
7602                 * ain't nobody got time for that.
7603                 */
7604                dst_reg->s32_min_value = S32_MIN;
7605                dst_reg->s32_max_value = S32_MAX;
7606        } else {
7607                /* ANDing two positives gives a positive, so safe to
7608                 * cast result into s64.
7609                 */
7610                dst_reg->s32_min_value = dst_reg->u32_min_value;
7611                dst_reg->s32_max_value = dst_reg->u32_max_value;
7612        }
7613}
7614
7615static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
7616                               struct bpf_reg_state *src_reg)
7617{
7618        bool src_known = tnum_is_const(src_reg->var_off);
7619        bool dst_known = tnum_is_const(dst_reg->var_off);
7620        s64 smin_val = src_reg->smin_value;
7621        u64 umax_val = src_reg->umax_value;
7622
7623        if (src_known && dst_known) {
7624                __mark_reg_known(dst_reg, dst_reg->var_off.value);
7625                return;
7626        }
7627
7628        /* We get our minimum from the var_off, since that's inherently
7629         * bitwise.  Our maximum is the minimum of the operands' maxima.
7630         */
7631        dst_reg->umin_value = dst_reg->var_off.value;
7632        dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
7633        if (dst_reg->smin_value < 0 || smin_val < 0) {
7634                /* Lose signed bounds when ANDing negative numbers,
7635                 * ain't nobody got time for that.
7636                 */
7637                dst_reg->smin_value = S64_MIN;
7638                dst_reg->smax_value = S64_MAX;
7639        } else {
7640                /* ANDing two positives gives a positive, so safe to
7641                 * cast result into s64.
7642                 */
7643                dst_reg->smin_value = dst_reg->umin_value;
7644                dst_reg->smax_value = dst_reg->umax_value;
7645        }
7646        /* We may learn something more from the var_off */
7647        __update_reg_bounds(dst_reg);
7648}
7649
7650static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
7651                                struct bpf_reg_state *src_reg)
7652{
7653        bool src_known = tnum_subreg_is_const(src_reg->var_off);
7654        bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7655        struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7656        s32 smin_val = src_reg->s32_min_value;
7657        u32 umin_val = src_reg->u32_min_value;
7658
7659        if (src_known && dst_known) {
7660                __mark_reg32_known(dst_reg, var32_off.value);
7661                return;
7662        }
7663
7664        /* We get our maximum from the var_off, and our minimum is the
7665         * maximum of the operands' minima
7666         */
7667        dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
7668        dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7669        if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7670                /* Lose signed bounds when ORing negative numbers,
7671                 * ain't nobody got time for that.
7672                 */
7673                dst_reg->s32_min_value = S32_MIN;
7674                dst_reg->s32_max_value = S32_MAX;
7675        } else {
7676                /* ORing two positives gives a positive, so safe to
7677                 * cast result into s64.
7678                 */
7679                dst_reg->s32_min_value = dst_reg->u32_min_value;
7680                dst_reg->s32_max_value = dst_reg->u32_max_value;
7681        }
7682}
7683
7684static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
7685                              struct bpf_reg_state *src_reg)
7686{
7687        bool src_known = tnum_is_const(src_reg->var_off);
7688        bool dst_known = tnum_is_const(dst_reg->var_off);
7689        s64 smin_val = src_reg->smin_value;
7690        u64 umin_val = src_reg->umin_value;
7691
7692        if (src_known && dst_known) {
7693                __mark_reg_known(dst_reg, dst_reg->var_off.value);
7694                return;
7695        }
7696
7697        /* We get our maximum from the var_off, and our minimum is the
7698         * maximum of the operands' minima
7699         */
7700        dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
7701        dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7702        if (dst_reg->smin_value < 0 || smin_val < 0) {
7703                /* Lose signed bounds when ORing negative numbers,
7704                 * ain't nobody got time for that.
7705                 */
7706                dst_reg->smin_value = S64_MIN;
7707                dst_reg->smax_value = S64_MAX;
7708        } else {
7709                /* ORing two positives gives a positive, so safe to
7710                 * cast result into s64.
7711                 */
7712                dst_reg->smin_value = dst_reg->umin_value;
7713                dst_reg->smax_value = dst_reg->umax_value;
7714        }
7715        /* We may learn something more from the var_off */
7716        __update_reg_bounds(dst_reg);
7717}
7718
7719static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
7720                                 struct bpf_reg_state *src_reg)
7721{
7722        bool src_known = tnum_subreg_is_const(src_reg->var_off);
7723        bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7724        struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7725        s32 smin_val = src_reg->s32_min_value;
7726
7727        if (src_known && dst_known) {
7728                __mark_reg32_known(dst_reg, var32_off.value);
7729                return;
7730        }
7731
7732        /* We get both minimum and maximum from the var32_off. */
7733        dst_reg->u32_min_value = var32_off.value;
7734        dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7735
7736        if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
7737                /* XORing two positive sign numbers gives a positive,
7738                 * so safe to cast u32 result into s32.
7739                 */
7740                dst_reg->s32_min_value = dst_reg->u32_min_value;
7741                dst_reg->s32_max_value = dst_reg->u32_max_value;
7742        } else {
7743                dst_reg->s32_min_value = S32_MIN;
7744                dst_reg->s32_max_value = S32_MAX;
7745        }
7746}
7747
7748static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
7749                               struct bpf_reg_state *src_reg)
7750{
7751        bool src_known = tnum_is_const(src_reg->var_off);
7752        bool dst_known = tnum_is_const(dst_reg->var_off);
7753        s64 smin_val = src_reg->smin_value;
7754
7755        if (src_known && dst_known) {
7756                /* dst_reg->var_off.value has been updated earlier */
7757                __mark_reg_known(dst_reg, dst_reg->var_off.value);
7758                return;
7759        }
7760
7761        /* We get both minimum and maximum from the var_off. */
7762        dst_reg->umin_value = dst_reg->var_off.value;
7763        dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7764
7765        if (dst_reg->smin_value >= 0 && smin_val >= 0) {
7766                /* XORing two positive sign numbers gives a positive,
7767                 * so safe to cast u64 result into s64.
7768                 */
7769                dst_reg->smin_value = dst_reg->umin_value;
7770                dst_reg->smax_value = dst_reg->umax_value;
7771        } else {
7772                dst_reg->smin_value = S64_MIN;
7773                dst_reg->smax_value = S64_MAX;
7774        }
7775
7776        __update_reg_bounds(dst_reg);
7777}
7778
7779static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7780                                   u64 umin_val, u64 umax_val)
7781{
7782        /* We lose all sign bit information (except what we can pick
7783         * up from var_off)
7784         */
7785        dst_reg->s32_min_value = S32_MIN;
7786        dst_reg->s32_max_value = S32_MAX;
7787        /* If we might shift our top bit out, then we know nothing */
7788        if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
7789                dst_reg->u32_min_value = 0;
7790                dst_reg->u32_max_value = U32_MAX;
7791        } else {
7792                dst_reg->u32_min_value <<= umin_val;
7793                dst_reg->u32_max_value <<= umax_val;
7794        }
7795}
7796
7797static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7798                                 struct bpf_reg_state *src_reg)
7799{
7800        u32 umax_val = src_reg->u32_max_value;
7801        u32 umin_val = src_reg->u32_min_value;
7802        /* u32 alu operation will zext upper bits */
7803        struct tnum subreg = tnum_subreg(dst_reg->var_off);
7804
7805        __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7806        dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
7807        /* Not required but being careful mark reg64 bounds as unknown so
7808         * that we are forced to pick them up from tnum and zext later and
7809         * if some path skips this step we are still safe.
7810         */
7811        __mark_reg64_unbounded(dst_reg);
7812        __update_reg32_bounds(dst_reg);
7813}
7814
7815static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
7816                                   u64 umin_val, u64 umax_val)
7817{
7818        /* Special case <<32 because it is a common compiler pattern to sign
7819         * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
7820         * positive we know this shift will also be positive so we can track
7821         * bounds correctly. Otherwise we lose all sign bit information except
7822         * what we can pick up from var_off. Perhaps we can generalize this
7823         * later to shifts of any length.
7824         */
7825        if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
7826                dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
7827        else
7828                dst_reg->smax_value = S64_MAX;
7829
7830        if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
7831                dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
7832        else
7833                dst_reg->smin_value = S64_MIN;
7834
7835        /* If we might shift our top bit out, then we know nothing */
7836        if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
7837                dst_reg->umin_value = 0;
7838                dst_reg->umax_value = U64_MAX;
7839        } else {
7840                dst_reg->umin_value <<= umin_val;
7841                dst_reg->umax_value <<= umax_val;
7842        }
7843}
7844
7845static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
7846                               struct bpf_reg_state *src_reg)
7847{
7848        u64 umax_val = src_reg->umax_value;
7849        u64 umin_val = src_reg->umin_value;
7850
7851        /* scalar64 calc uses 32bit unshifted bounds so must be called first */
7852        __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
7853        __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7854
7855        dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
7856        /* We may learn something more from the var_off */
7857        __update_reg_bounds(dst_reg);
7858}
7859
7860static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
7861                                 struct bpf_reg_state *src_reg)
7862{
7863        struct tnum subreg = tnum_subreg(dst_reg->var_off);
7864        u32 umax_val = src_reg->u32_max_value;
7865        u32 umin_val = src_reg->u32_min_value;
7866
7867        /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7868         * be negative, then either:
7869         * 1) src_reg might be zero, so the sign bit of the result is
7870         *    unknown, so we lose our signed bounds
7871         * 2) it's known negative, thus the unsigned bounds capture the
7872         *    signed bounds
7873         * 3) the signed bounds cross zero, so they tell us nothing
7874         *    about the result
7875         * If the value in dst_reg is known nonnegative, then again the
7876         * unsigned bounds capture the signed bounds.
7877         * Thus, in all cases it suffices to blow away our signed bounds
7878         * and rely on inferring new ones from the unsigned bounds and
7879         * var_off of the result.
7880         */
7881        dst_reg->s32_min_value = S32_MIN;
7882        dst_reg->s32_max_value = S32_MAX;
7883
7884        dst_reg->var_off = tnum_rshift(subreg, umin_val);
7885        dst_reg->u32_min_value >>= umax_val;
7886        dst_reg->u32_max_value >>= umin_val;
7887
7888        __mark_reg64_unbounded(dst_reg);
7889        __update_reg32_bounds(dst_reg);
7890}
7891
7892static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
7893                               struct bpf_reg_state *src_reg)
7894{
7895        u64 umax_val = src_reg->umax_value;
7896        u64 umin_val = src_reg->umin_value;
7897
7898        /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7899         * be negative, then either:
7900         * 1) src_reg might be zero, so the sign bit of the result is
7901         *    unknown, so we lose our signed bounds
7902         * 2) it's known negative, thus the unsigned bounds capture the
7903         *    signed bounds
7904         * 3) the signed bounds cross zero, so they tell us nothing
7905         *    about the result
7906         * If the value in dst_reg is known nonnegative, then again the
7907         * unsigned bounds capture the signed bounds.
7908         * Thus, in all cases it suffices to blow away our signed bounds
7909         * and rely on inferring new ones from the unsigned bounds and
7910         * var_off of the result.
7911         */
7912        dst_reg->smin_value = S64_MIN;
7913        dst_reg->smax_value = S64_MAX;
7914        dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
7915        dst_reg->umin_value >>= umax_val;
7916        dst_reg->umax_value >>= umin_val;
7917
7918        /* Its not easy to operate on alu32 bounds here because it depends
7919         * on bits being shifted in. Take easy way out and mark unbounded
7920         * so we can recalculate later from tnum.
7921         */
7922        __mark_reg32_unbounded(dst_reg);
7923        __update_reg_bounds(dst_reg);
7924}
7925
7926static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
7927                                  struct bpf_reg_state *src_reg)
7928{
7929        u64 umin_val = src_reg->u32_min_value;
7930
7931        /* Upon reaching here, src_known is true and
7932         * umax_val is equal to umin_val.
7933         */
7934        dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
7935        dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
7936
7937        dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
7938
7939        /* blow away the dst_reg umin_value/umax_value and rely on
7940         * dst_reg var_off to refine the result.
7941         */
7942        dst_reg->u32_min_value = 0;
7943        dst_reg->u32_max_value = U32_MAX;
7944
7945        __mark_reg64_unbounded(dst_reg);
7946        __update_reg32_bounds(dst_reg);
7947}
7948
7949static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
7950                                struct bpf_reg_state *src_reg)
7951{
7952        u64 umin_val = src_reg->umin_value;
7953
7954        /* Upon reaching here, src_known is true and umax_val is equal
7955         * to umin_val.
7956         */
7957        dst_reg->smin_value >>= umin_val;
7958        dst_reg->smax_value >>= umin_val;
7959
7960        dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
7961
7962        /* blow away the dst_reg umin_value/umax_value and rely on
7963         * dst_reg var_off to refine the result.
7964         */
7965        dst_reg->umin_value = 0;
7966        dst_reg->umax_value = U64_MAX;
7967
7968        /* Its not easy to operate on alu32 bounds here because it depends
7969         * on bits being shifted in from upper 32-bits. Take easy way out
7970         * and mark unbounded so we can recalculate later from tnum.
7971         */
7972        __mark_reg32_unbounded(dst_reg);
7973        __update_reg_bounds(dst_reg);
7974}
7975
7976/* WARNING: This function does calculations on 64-bit values, but the actual
7977 * execution may occur on 32-bit values. Therefore, things like bitshifts
7978 * need extra checks in the 32-bit case.
7979 */
7980static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
7981                                      struct bpf_insn *insn,
7982                                      struct bpf_reg_state *dst_reg,
7983                                      struct bpf_reg_state src_reg)
7984{
7985        struct bpf_reg_state *regs = cur_regs(env);
7986        u8 opcode = BPF_OP(insn->code);
7987        bool src_known;
7988        s64 smin_val, smax_val;
7989        u64 umin_val, umax_val;
7990        s32 s32_min_val, s32_max_val;
7991        u32 u32_min_val, u32_max_val;
7992        u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
7993        bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
7994        int ret;
7995
7996        smin_val = src_reg.smin_value;
7997        smax_val = src_reg.smax_value;
7998        umin_val = src_reg.umin_value;
7999        umax_val = src_reg.umax_value;
8000
8001        s32_min_val = src_reg.s32_min_value;
8002        s32_max_val = src_reg.s32_max_value;
8003        u32_min_val = src_reg.u32_min_value;
8004        u32_max_val = src_reg.u32_max_value;
8005
8006        if (alu32) {
8007                src_known = tnum_subreg_is_const(src_reg.var_off);
8008                if ((src_known &&
8009                     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
8010                    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
8011                        /* Taint dst register if offset had invalid bounds
8012                         * derived from e.g. dead branches.
8013                         */
8014                        __mark_reg_unknown(env, dst_reg);
8015                        return 0;
8016                }
8017        } else {
8018                src_known = tnum_is_const(src_reg.var_off);
8019                if ((src_known &&
8020                     (smin_val != smax_val || umin_val != umax_val)) ||
8021                    smin_val > smax_val || umin_val > umax_val) {
8022                        /* Taint dst register if offset had invalid bounds
8023                         * derived from e.g. dead branches.
8024                         */
8025                        __mark_reg_unknown(env, dst_reg);
8026                        return 0;
8027                }
8028        }
8029
8030        if (!src_known &&
8031            opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8032                __mark_reg_unknown(env, dst_reg);
8033                return 0;
8034        }
8035
8036        if (sanitize_needed(opcode)) {
8037                ret = sanitize_val_alu(env, insn);
8038                if (ret < 0)
8039                        return sanitize_err(env, insn, ret, NULL, NULL);
8040        }
8041
8042        /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8043         * There are two classes of instructions: The first class we track both
8044         * alu32 and alu64 sign/unsigned bounds independently this provides the
8045         * greatest amount of precision when alu operations are mixed with jmp32
8046         * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8047         * and BPF_OR. This is possible because these ops have fairly easy to
8048         * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8049         * See alu32 verifier tests for examples. The second class of
8050         * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8051         * with regards to tracking sign/unsigned bounds because the bits may
8052         * cross subreg boundaries in the alu64 case. When this happens we mark
8053         * the reg unbounded in the subreg bound space and use the resulting
8054         * tnum to calculate an approximation of the sign/unsigned bounds.
8055         */
8056        switch (opcode) {
8057        case BPF_ADD:
8058                scalar32_min_max_add(dst_reg, &src_reg);
8059                scalar_min_max_add(dst_reg, &src_reg);
8060                dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8061                break;
8062        case BPF_SUB:
8063                scalar32_min_max_sub(dst_reg, &src_reg);
8064                scalar_min_max_sub(dst_reg, &src_reg);
8065                dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8066                break;
8067        case BPF_MUL:
8068                dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8069                scalar32_min_max_mul(dst_reg, &src_reg);
8070                scalar_min_max_mul(dst_reg, &src_reg);
8071                break;
8072        case BPF_AND:
8073                dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8074                scalar32_min_max_and(dst_reg, &src_reg);
8075                scalar_min_max_and(dst_reg, &src_reg);
8076                break;
8077        case BPF_OR:
8078                dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8079                scalar32_min_max_or(dst_reg, &src_reg);
8080                scalar_min_max_or(dst_reg, &src_reg);
8081                break;
8082        case BPF_XOR:
8083                dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8084                scalar32_min_max_xor(dst_reg, &src_reg);
8085                scalar_min_max_xor(dst_reg, &src_reg);
8086                break;
8087        case BPF_LSH:
8088                if (umax_val >= insn_bitness) {
8089                        /* Shifts greater than 31 or 63 are undefined.
8090                         * This includes shifts by a negative number.
8091                         */
8092                        mark_reg_unknown(env, regs, insn->dst_reg);
8093                        break;
8094                }
8095                if (alu32)
8096                        scalar32_min_max_lsh(dst_reg, &src_reg);
8097                else
8098                        scalar_min_max_lsh(dst_reg, &src_reg);
8099                break;
8100        case BPF_RSH:
8101                if (umax_val >= insn_bitness) {
8102                        /* Shifts greater than 31 or 63 are undefined.
8103                         * This includes shifts by a negative number.
8104                         */
8105                        mark_reg_unknown(env, regs, insn->dst_reg);
8106                        break;
8107                }
8108                if (alu32)
8109                        scalar32_min_max_rsh(dst_reg, &src_reg);
8110                else
8111                        scalar_min_max_rsh(dst_reg, &src_reg);
8112                break;
8113        case BPF_ARSH:
8114                if (umax_val >= insn_bitness) {
8115                        /* Shifts greater than 31 or 63 are undefined.
8116                         * This includes shifts by a negative number.
8117                         */
8118                        mark_reg_unknown(env, regs, insn->dst_reg);
8119                        break;
8120                }
8121                if (alu32)
8122                        scalar32_min_max_arsh(dst_reg, &src_reg);
8123                else
8124                        scalar_min_max_arsh(dst_reg, &src_reg);
8125                break;
8126        default:
8127                mark_reg_unknown(env, regs, insn->dst_reg);
8128                break;
8129        }
8130
8131        /* ALU32 ops are zero extended into 64bit register */
8132        if (alu32)
8133                zext_32_to_64(dst_reg);
8134
8135        __update_reg_bounds(dst_reg);
8136        __reg_deduce_bounds(dst_reg);
8137        __reg_bound_offset(dst_reg);
8138        return 0;
8139}
8140
8141/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8142 * and var_off.
8143 */
8144static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8145                                   struct bpf_insn *insn)
8146{
8147        struct bpf_verifier_state *vstate = env->cur_state;
8148        struct bpf_func_state *state = vstate->frame[vstate->curframe];
8149        struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
8150        struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8151        u8 opcode = BPF_OP(insn->code);
8152        int err;
8153
8154        dst_reg = &regs[insn->dst_reg];
8155        src_reg = NULL;
8156        if (dst_reg->type != SCALAR_VALUE)
8157                ptr_reg = dst_reg;
8158        else
8159                /* Make sure ID is cleared otherwise dst_reg min/max could be
8160                 * incorrectly propagated into other registers by find_equal_scalars()
8161                 */
8162                dst_reg->id = 0;
8163        if (BPF_SRC(insn->code) == BPF_X) {
8164                src_reg = &regs[insn->src_reg];
8165                if (src_reg->type != SCALAR_VALUE) {
8166                        if (dst_reg->type != SCALAR_VALUE) {
8167                                /* Combining two pointers by any ALU op yields
8168                                 * an arbitrary scalar. Disallow all math except
8169                                 * pointer subtraction
8170                                 */
8171                                if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8172                                        mark_reg_unknown(env, regs, insn->dst_reg);
8173                                        return 0;
8174                                }
8175                                verbose(env, "R%d pointer %s pointer prohibited\n",
8176                                        insn->dst_reg,
8177                                        bpf_alu_string[opcode >> 4]);
8178                                return -EACCES;
8179                        } else {
8180                                /* scalar += pointer
8181                                 * This is legal, but we have to reverse our
8182                                 * src/dest handling in computing the range
8183                                 */
8184                                err = mark_chain_precision(env, insn->dst_reg);
8185                                if (err)
8186                                        return err;
8187                                return adjust_ptr_min_max_vals(env, insn,
8188                                                               src_reg, dst_reg);
8189                        }
8190                } else if (ptr_reg) {
8191                        /* pointer += scalar */
8192                        err = mark_chain_precision(env, insn->src_reg);
8193                        if (err)
8194                                return err;
8195                        return adjust_ptr_min_max_vals(env, insn,
8196                                                       dst_reg, src_reg);
8197                }
8198        } else {
8199                /* Pretend the src is a reg with a known value, since we only
8200                 * need to be able to read from this state.
8201                 */
8202                off_reg.type = SCALAR_VALUE;
8203                __mark_reg_known(&off_reg, insn->imm);
8204                src_reg = &off_reg;
8205                if (ptr_reg) /* pointer += K */
8206                        return adjust_ptr_min_max_vals(env, insn,
8207                                                       ptr_reg, src_reg);
8208        }
8209
8210        /* Got here implies adding two SCALAR_VALUEs */
8211        if (WARN_ON_ONCE(ptr_reg)) {
8212                print_verifier_state(env, state);
8213                verbose(env, "verifier internal error: unexpected ptr_reg\n");
8214                return -EINVAL;
8215        }
8216        if (WARN_ON(!src_reg)) {
8217                print_verifier_state(env, state);
8218                verbose(env, "verifier internal error: no src_reg\n");
8219                return -EINVAL;
8220        }
8221        return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
8222}
8223
8224/* check validity of 32-bit and 64-bit arithmetic operations */
8225static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8226{
8227        struct bpf_reg_state *regs = cur_regs(env);
8228        u8 opcode = BPF_OP(insn->code);
8229        int err;
8230
8231        if (opcode == BPF_END || opcode == BPF_NEG) {
8232                if (opcode == BPF_NEG) {
8233                        if (BPF_SRC(insn->code) != 0 ||
8234                            insn->src_reg != BPF_REG_0 ||
8235                            insn->off != 0 || insn->imm != 0) {
8236                                verbose(env, "BPF_NEG uses reserved fields\n");
8237                                return -EINVAL;
8238                        }
8239                } else {
8240                        if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
8241                            (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8242                            BPF_CLASS(insn->code) == BPF_ALU64) {
8243                                verbose(env, "BPF_END uses reserved fields\n");
8244                                return -EINVAL;
8245                        }
8246                }
8247
8248                /* check src operand */
8249                err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8250                if (err)
8251                        return err;
8252
8253                if (is_pointer_value(env, insn->dst_reg)) {
8254                        verbose(env, "R%d pointer arithmetic prohibited\n",
8255                                insn->dst_reg);
8256                        return -EACCES;
8257                }
8258
8259                /* check dest operand */
8260                err = check_reg_arg(env, insn->dst_reg, DST_OP);
8261                if (err)
8262                        return err;
8263
8264        } else if (opcode == BPF_MOV) {
8265
8266                if (BPF_SRC(insn->code) == BPF_X) {
8267                        if (insn->imm != 0 || insn->off != 0) {
8268                                verbose(env, "BPF_MOV uses reserved fields\n");
8269                                return -EINVAL;
8270                        }
8271
8272                        /* check src operand */
8273                        err = check_reg_arg(env, insn->src_reg, SRC_OP);
8274                        if (err)
8275                                return err;
8276                } else {
8277                        if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8278                                verbose(env, "BPF_MOV uses reserved fields\n");
8279                                return -EINVAL;
8280                        }
8281                }
8282
8283                /* check dest operand, mark as required later */
8284                err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8285                if (err)
8286                        return err;
8287
8288                if (BPF_SRC(insn->code) == BPF_X) {
8289                        struct bpf_reg_state *src_reg = regs + insn->src_reg;
8290                        struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8291
8292                        if (BPF_CLASS(insn->code) == BPF_ALU64) {
8293                                /* case: R1 = R2
8294                                 * copy register state to dest reg
8295                                 */
8296                                if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8297                                        /* Assign src and dst registers the same ID
8298                                         * that will be used by find_equal_scalars()
8299                                         * to propagate min/max range.
8300                                         */
8301                                        src_reg->id = ++env->id_gen;
8302                                *dst_reg = *src_reg;
8303                                dst_reg->live |= REG_LIVE_WRITTEN;
8304                                dst_reg->subreg_def = DEF_NOT_SUBREG;
8305                        } else {
8306                                /* R1 = (u32) R2 */
8307                                if (is_pointer_value(env, insn->src_reg)) {
8308                                        verbose(env,
8309                                                "R%d partial copy of pointer\n",
8310                                                insn->src_reg);
8311                                        return -EACCES;
8312                                } else if (src_reg->type == SCALAR_VALUE) {
8313                                        *dst_reg = *src_reg;
8314                                        /* Make sure ID is cleared otherwise
8315                                         * dst_reg min/max could be incorrectly
8316                                         * propagated into src_reg by find_equal_scalars()
8317                                         */
8318                                        dst_reg->id = 0;
8319                                        dst_reg->live |= REG_LIVE_WRITTEN;
8320                                        dst_reg->subreg_def = env->insn_idx + 1;
8321                                } else {
8322                                        mark_reg_unknown(env, regs,
8323                                                         insn->dst_reg);
8324                                }
8325                                zext_32_to_64(dst_reg);
8326
8327                                __update_reg_bounds(dst_reg);
8328                                __reg_deduce_bounds(dst_reg);
8329                                __reg_bound_offset(dst_reg);
8330                        }
8331                } else {
8332                        /* case: R = imm
8333                         * remember the value we stored into this reg
8334                         */
8335                        /* clear any state __mark_reg_known doesn't set */
8336                        mark_reg_unknown(env, regs, insn->dst_reg);
8337                        regs[insn->dst_reg].type = SCALAR_VALUE;
8338                        if (BPF_CLASS(insn->code) == BPF_ALU64) {
8339                                __mark_reg_known(regs + insn->dst_reg,
8340                                                 insn->imm);
8341                        } else {
8342                                __mark_reg_known(regs + insn->dst_reg,
8343                                                 (u32)insn->imm);
8344                        }
8345                }
8346
8347        } else if (opcode > BPF_END) {
8348                verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
8349                return -EINVAL;
8350
8351        } else {        /* all other ALU ops: and, sub, xor, add, ... */
8352
8353                if (BPF_SRC(insn->code) == BPF_X) {
8354                        if (insn->imm != 0 || insn->off != 0) {
8355                                verbose(env, "BPF_ALU uses reserved fields\n");
8356                                return -EINVAL;
8357                        }
8358                        /* check src1 operand */
8359                        err = check_reg_arg(env, insn->src_reg, SRC_OP);
8360                        if (err)
8361                                return err;
8362                } else {
8363                        if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8364                                verbose(env, "BPF_ALU uses reserved fields\n");
8365                                return -EINVAL;
8366                        }
8367                }
8368
8369                /* check src2 operand */
8370                err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8371                if (err)
8372                        return err;
8373
8374                if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8375                    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
8376                        verbose(env, "div by zero\n");
8377                        return -EINVAL;
8378                }
8379
8380                if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8381                     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8382                        int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8383
8384                        if (insn->imm < 0 || insn->imm >= size) {
8385                                verbose(env, "invalid shift %d\n", insn->imm);
8386                                return -EINVAL;
8387                        }
8388                }
8389
8390                /* check dest operand */
8391                err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8392                if (err)
8393                        return err;
8394
8395                return adjust_reg_min_max_vals(env, insn);
8396        }
8397
8398        return 0;
8399}
8400
8401static void __find_good_pkt_pointers(struct bpf_func_state *state,
8402                                     struct bpf_reg_state *dst_reg,
8403                                     enum bpf_reg_type type, int new_range)
8404{
8405        struct bpf_reg_state *reg;
8406        int i;
8407
8408        for (i = 0; i < MAX_BPF_REG; i++) {
8409                reg = &state->regs[i];
8410                if (reg->type == type && reg->id == dst_reg->id)
8411                        /* keep the maximum range already checked */
8412                        reg->range = max(reg->range, new_range);
8413        }
8414
8415        bpf_for_each_spilled_reg(i, state, reg) {
8416                if (!reg)
8417                        continue;
8418                if (reg->type == type && reg->id == dst_reg->id)
8419                        reg->range = max(reg->range, new_range);
8420        }
8421}
8422
8423static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
8424                                   struct bpf_reg_state *dst_reg,
8425                                   enum bpf_reg_type type,
8426                                   bool range_right_open)
8427{
8428        int new_range, i;
8429
8430        if (dst_reg->off < 0 ||
8431            (dst_reg->off == 0 && range_right_open))
8432                /* This doesn't give us any range */
8433                return;
8434
8435        if (dst_reg->umax_value > MAX_PACKET_OFF ||
8436            dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
8437                /* Risk of overflow.  For instance, ptr + (1<<63) may be less
8438                 * than pkt_end, but that's because it's also less than pkt.
8439                 */
8440                return;
8441
8442        new_range = dst_reg->off;
8443        if (range_right_open)
8444                new_range++;
8445
8446        /* Examples for register markings:
8447         *
8448         * pkt_data in dst register:
8449         *
8450         *   r2 = r3;
8451         *   r2 += 8;
8452         *   if (r2 > pkt_end) goto <handle exception>
8453         *   <access okay>
8454         *
8455         *   r2 = r3;
8456         *   r2 += 8;
8457         *   if (r2 < pkt_end) goto <access okay>
8458         *   <handle exception>
8459         *
8460         *   Where:
8461         *     r2 == dst_reg, pkt_end == src_reg
8462         *     r2=pkt(id=n,off=8,r=0)
8463         *     r3=pkt(id=n,off=0,r=0)
8464         *
8465         * pkt_data in src register:
8466         *
8467         *   r2 = r3;
8468         *   r2 += 8;
8469         *   if (pkt_end >= r2) goto <access okay>
8470         *   <handle exception>
8471         *
8472         *   r2 = r3;
8473         *   r2 += 8;
8474         *   if (pkt_end <= r2) goto <handle exception>
8475         *   <access okay>
8476         *
8477         *   Where:
8478         *     pkt_end == dst_reg, r2 == src_reg
8479         *     r2=pkt(id=n,off=8,r=0)
8480         *     r3=pkt(id=n,off=0,r=0)
8481         *
8482         * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
8483         * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
8484         * and [r3, r3 + 8-1) respectively is safe to access depending on
8485         * the check.
8486         */
8487
8488        /* If our ids match, then we must have the same max_value.  And we
8489         * don't care about the other reg's fixed offset, since if it's too big
8490         * the range won't allow anything.
8491         * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
8492         */
8493        for (i = 0; i <= vstate->curframe; i++)
8494                __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
8495                                         new_range);
8496}
8497
8498static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
8499{
8500        struct tnum subreg = tnum_subreg(reg->var_off);
8501        s32 sval = (s32)val;
8502
8503        switch (opcode) {
8504        case BPF_JEQ:
8505                if (tnum_is_const(subreg))
8506                        return !!tnum_equals_const(subreg, val);
8507                break;
8508        case BPF_JNE:
8509                if (tnum_is_const(subreg))
8510                        return !tnum_equals_const(subreg, val);
8511                break;
8512        case BPF_JSET:
8513                if ((~subreg.mask & subreg.value) & val)
8514                        return 1;
8515                if (!((subreg.mask | subreg.value) & val))
8516                        return 0;
8517                break;
8518        case BPF_JGT:
8519                if (reg->u32_min_value > val)
8520                        return 1;
8521                else if (reg->u32_max_value <= val)
8522                        return 0;
8523                break;
8524        case BPF_JSGT:
8525                if (reg->s32_min_value > sval)
8526                        return 1;
8527                else if (reg->s32_max_value <= sval)
8528                        return 0;
8529                break;
8530        case BPF_JLT:
8531                if (reg->u32_max_value < val)
8532                        return 1;
8533                else if (reg->u32_min_value >= val)
8534                        return 0;
8535                break;
8536        case BPF_JSLT:
8537                if (reg->s32_max_value < sval)
8538                        return 1;
8539                else if (reg->s32_min_value >= sval)
8540                        return 0;
8541                break;
8542        case BPF_JGE:
8543                if (reg->u32_min_value >= val)
8544                        return 1;
8545                else if (reg->u32_max_value < val)
8546                        return 0;
8547                break;
8548        case BPF_JSGE:
8549                if (reg->s32_min_value >= sval)
8550                        return 1;
8551                else if (reg->s32_max_value < sval)
8552                        return 0;
8553                break;
8554        case BPF_JLE:
8555                if (reg->u32_max_value <= val)
8556                        return 1;
8557                else if (reg->u32_min_value > val)
8558                        return 0;
8559                break;
8560        case BPF_JSLE:
8561                if (reg->s32_max_value <= sval)
8562                        return 1;
8563                else if (reg->s32_min_value > sval)
8564                        return 0;
8565                break;
8566        }
8567
8568        return -1;
8569}
8570
8571
8572static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
8573{
8574        s64 sval = (s64)val;
8575
8576        switch (opcode) {
8577        case BPF_JEQ:
8578                if (tnum_is_const(reg->var_off))
8579                        return !!tnum_equals_const(reg->var_off, val);
8580                break;
8581        case BPF_JNE:
8582                if (tnum_is_const(reg->var_off))
8583                        return !tnum_equals_const(reg->var_off, val);
8584                break;
8585        case BPF_JSET:
8586                if ((~reg->var_off.mask & reg->var_off.value) & val)
8587                        return 1;
8588                if (!((reg->var_off.mask | reg->var_off.value) & val))
8589                        return 0;
8590                break;
8591        case BPF_JGT:
8592                if (reg->umin_value > val)
8593                        return 1;
8594                else if (reg->umax_value <= val)
8595                        return 0;
8596                break;
8597        case BPF_JSGT:
8598                if (reg->smin_value > sval)
8599                        return 1;
8600                else if (reg->smax_value <= sval)
8601                        return 0;
8602                break;
8603        case BPF_JLT:
8604                if (reg->umax_value < val)
8605                        return 1;
8606                else if (reg->umin_value >= val)
8607                        return 0;
8608                break;
8609        case BPF_JSLT:
8610                if (reg->smax_value < sval)
8611                        return 1;
8612                else if (reg->smin_value >= sval)
8613                        return 0;
8614                break;
8615        case BPF_JGE:
8616                if (reg->umin_value >= val)
8617                        return 1;
8618                else if (reg->umax_value < val)
8619                        return 0;
8620                break;
8621        case BPF_JSGE:
8622                if (reg->smin_value >= sval)
8623                        return 1;
8624                else if (reg->smax_value < sval)
8625                        return 0;
8626                break;
8627        case BPF_JLE:
8628                if (reg->umax_value <= val)
8629                        return 1;
8630                else if (reg->umin_value > val)
8631                        return 0;
8632                break;
8633        case BPF_JSLE:
8634                if (reg->smax_value <= sval)
8635                        return 1;
8636                else if (reg->smin_value > sval)
8637                        return 0;
8638                break;
8639        }
8640
8641        return -1;
8642}
8643
8644/* compute branch direction of the expression "if (reg opcode val) goto target;"
8645 * and return:
8646 *  1 - branch will be taken and "goto target" will be executed
8647 *  0 - branch will not be taken and fall-through to next insn
8648 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
8649 *      range [0,10]
8650 */
8651static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
8652                           bool is_jmp32)
8653{
8654        if (__is_pointer_value(false, reg)) {
8655                if (!reg_type_not_null(reg->type))
8656                        return -1;
8657
8658                /* If pointer is valid tests against zero will fail so we can
8659                 * use this to direct branch taken.
8660                 */
8661                if (val != 0)
8662                        return -1;
8663
8664                switch (opcode) {
8665                case BPF_JEQ:
8666                        return 0;
8667                case BPF_JNE:
8668                        return 1;
8669                default:
8670                        return -1;
8671                }
8672        }
8673
8674        if (is_jmp32)
8675                return is_branch32_taken(reg, val, opcode);
8676        return is_branch64_taken(reg, val, opcode);
8677}
8678
8679static int flip_opcode(u32 opcode)
8680{
8681        /* How can we transform "a <op> b" into "b <op> a"? */
8682        static const u8 opcode_flip[16] = {
8683                /* these stay the same */
8684                [BPF_JEQ  >> 4] = BPF_JEQ,
8685                [BPF_JNE  >> 4] = BPF_JNE,
8686                [BPF_JSET >> 4] = BPF_JSET,
8687                /* these swap "lesser" and "greater" (L and G in the opcodes) */
8688                [BPF_JGE  >> 4] = BPF_JLE,
8689                [BPF_JGT  >> 4] = BPF_JLT,
8690                [BPF_JLE  >> 4] = BPF_JGE,
8691                [BPF_JLT  >> 4] = BPF_JGT,
8692                [BPF_JSGE >> 4] = BPF_JSLE,
8693                [BPF_JSGT >> 4] = BPF_JSLT,
8694                [BPF_JSLE >> 4] = BPF_JSGE,
8695                [BPF_JSLT >> 4] = BPF_JSGT
8696        };
8697        return opcode_flip[opcode >> 4];
8698}
8699
8700static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
8701                                   struct bpf_reg_state *src_reg,
8702                                   u8 opcode)
8703{
8704        struct bpf_reg_state *pkt;
8705
8706        if (src_reg->type == PTR_TO_PACKET_END) {
8707                pkt = dst_reg;
8708        } else if (dst_reg->type == PTR_TO_PACKET_END) {
8709                pkt = src_reg;
8710                opcode = flip_opcode(opcode);
8711        } else {
8712                return -1;
8713        }
8714
8715        if (pkt->range >= 0)
8716                return -1;
8717
8718        switch (opcode) {
8719        case BPF_JLE:
8720                /* pkt <= pkt_end */
8721                fallthrough;
8722        case BPF_JGT:
8723                /* pkt > pkt_end */
8724                if (pkt->range == BEYOND_PKT_END)
8725                        /* pkt has at last one extra byte beyond pkt_end */
8726                        return opcode == BPF_JGT;
8727                break;
8728        case BPF_JLT:
8729                /* pkt < pkt_end */
8730                fallthrough;
8731        case BPF_JGE:
8732                /* pkt >= pkt_end */
8733                if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
8734                        return opcode == BPF_JGE;
8735                break;
8736        }
8737        return -1;
8738}
8739
8740/* Adjusts the register min/max values in the case that the dst_reg is the
8741 * variable register that we are working on, and src_reg is a constant or we're
8742 * simply doing a BPF_K check.
8743 * In JEQ/JNE cases we also adjust the var_off values.
8744 */
8745static void reg_set_min_max(struct bpf_reg_state *true_reg,
8746                            struct bpf_reg_state *false_reg,
8747                            u64 val, u32 val32,
8748                            u8 opcode, bool is_jmp32)
8749{
8750        struct tnum false_32off = tnum_subreg(false_reg->var_off);
8751        struct tnum false_64off = false_reg->var_off;
8752        struct tnum true_32off = tnum_subreg(true_reg->var_off);
8753        struct tnum true_64off = true_reg->var_off;
8754        s64 sval = (s64)val;
8755        s32 sval32 = (s32)val32;
8756
8757        /* If the dst_reg is a pointer, we can't learn anything about its
8758         * variable offset from the compare (unless src_reg were a pointer into
8759         * the same object, but we don't bother with that.
8760         * Since false_reg and true_reg have the same type by construction, we
8761         * only need to check one of them for pointerness.
8762         */
8763        if (__is_pointer_value(false, false_reg))
8764                return;
8765
8766        switch (opcode) {
8767        case BPF_JEQ:
8768        case BPF_JNE:
8769        {
8770                struct bpf_reg_state *reg =
8771                        opcode == BPF_JEQ ? true_reg : false_reg;
8772
8773                /* JEQ/JNE comparison doesn't change the register equivalence.
8774                 * r1 = r2;
8775                 * if (r1 == 42) goto label;
8776                 * ...
8777                 * label: // here both r1 and r2 are known to be 42.
8778                 *
8779                 * Hence when marking register as known preserve it's ID.
8780                 */
8781                if (is_jmp32)
8782                        __mark_reg32_known(reg, val32);
8783                else
8784                        ___mark_reg_known(reg, val);
8785                break;
8786        }
8787        case BPF_JSET:
8788                if (is_jmp32) {
8789                        false_32off = tnum_and(false_32off, tnum_const(~val32));
8790                        if (is_power_of_2(val32))
8791                                true_32off = tnum_or(true_32off,
8792                                                     tnum_const(val32));
8793                } else {
8794                        false_64off = tnum_and(false_64off, tnum_const(~val));
8795                        if (is_power_of_2(val))
8796                                true_64off = tnum_or(true_64off,
8797                                                     tnum_const(val));
8798                }
8799                break;
8800        case BPF_JGE:
8801        case BPF_JGT:
8802        {
8803                if (is_jmp32) {
8804                        u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
8805                        u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
8806
8807                        false_reg->u32_max_value = min(false_reg->u32_max_value,
8808                                                       false_umax);
8809                        true_reg->u32_min_value = max(true_reg->u32_min_value,
8810                                                      true_umin);
8811                } else {
8812                        u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
8813                        u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
8814
8815                        false_reg->umax_value = min(false_reg->umax_value, false_umax);
8816                        true_reg->umin_value = max(true_reg->umin_value, true_umin);
8817                }
8818                break;
8819        }
8820        case BPF_JSGE:
8821        case BPF_JSGT:
8822        {
8823                if (is_jmp32) {
8824                        s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
8825                        s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
8826
8827                        false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
8828                        true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
8829                } else {
8830                        s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
8831                        s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
8832
8833                        false_reg->smax_value = min(false_reg->smax_value, false_smax);
8834                        true_reg->smin_value = max(true_reg->smin_value, true_smin);
8835                }
8836                break;
8837        }
8838        case BPF_JLE:
8839        case BPF_JLT:
8840        {
8841                if (is_jmp32) {
8842                        u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
8843                        u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
8844
8845                        false_reg->u32_min_value = max(false_reg->u32_min_value,
8846                                                       false_umin);
8847                        true_reg->u32_max_value = min(true_reg->u32_max_value,
8848                                                      true_umax);
8849                } else {
8850                        u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
8851                        u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
8852
8853                        false_reg->umin_value = max(false_reg->umin_value, false_umin);
8854                        true_reg->umax_value = min(true_reg->umax_value, true_umax);
8855                }
8856                break;
8857        }
8858        case BPF_JSLE:
8859        case BPF_JSLT:
8860        {
8861                if (is_jmp32) {
8862                        s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
8863                        s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
8864
8865                        false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
8866                        true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
8867                } else {
8868                        s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
8869                        s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
8870
8871                        false_reg->smin_value = max(false_reg->smin_value, false_smin);
8872                        true_reg->smax_value = min(true_reg->smax_value, true_smax);
8873                }
8874                break;
8875        }
8876        default:
8877                return;
8878        }
8879
8880        if (is_jmp32) {
8881                false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
8882                                             tnum_subreg(false_32off));
8883                true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
8884                                            tnum_subreg(true_32off));
8885                __reg_combine_32_into_64(false_reg);
8886                __reg_combine_32_into_64(true_reg);
8887        } else {
8888                false_reg->var_off = false_64off;
8889                true_reg->var_off = true_64off;
8890                __reg_combine_64_into_32(false_reg);
8891                __reg_combine_64_into_32(true_reg);
8892        }
8893}
8894
8895/* Same as above, but for the case that dst_reg holds a constant and src_reg is
8896 * the variable reg.
8897 */
8898static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
8899                                struct bpf_reg_state *false_reg,
8900                                u64 val, u32 val32,
8901                                u8 opcode, bool is_jmp32)
8902{
8903        opcode = flip_opcode(opcode);
8904        /* This uses zero as "not present in table"; luckily the zero opcode,
8905         * BPF_JA, can't get here.
8906         */
8907        if (opcode)
8908                reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
8909}
8910
8911/* Regs are known to be equal, so intersect their min/max/var_off */
8912static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
8913                                  struct bpf_reg_state *dst_reg)
8914{
8915        src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
8916                                                        dst_reg->umin_value);
8917        src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
8918                                                        dst_reg->umax_value);
8919        src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
8920                                                        dst_reg->smin_value);
8921        src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
8922                                                        dst_reg->smax_value);
8923        src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
8924                                                             dst_reg->var_off);
8925        /* We might have learned new bounds from the var_off. */
8926        __update_reg_bounds(src_reg);
8927        __update_reg_bounds(dst_reg);
8928        /* We might have learned something about the sign bit. */
8929        __reg_deduce_bounds(src_reg);
8930        __reg_deduce_bounds(dst_reg);
8931        /* We might have learned some bits from the bounds. */
8932        __reg_bound_offset(src_reg);
8933        __reg_bound_offset(dst_reg);
8934        /* Intersecting with the old var_off might have improved our bounds
8935         * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
8936         * then new var_off is (0; 0x7f...fc) which improves our umax.
8937         */
8938        __update_reg_bounds(src_reg);
8939        __update_reg_bounds(dst_reg);
8940}
8941
8942static void reg_combine_min_max(struct bpf_reg_state *true_src,
8943                                struct bpf_reg_state *true_dst,
8944                                struct bpf_reg_state *false_src,
8945                                struct bpf_reg_state *false_dst,
8946                                u8 opcode)
8947{
8948        switch (opcode) {
8949        case BPF_JEQ:
8950                __reg_combine_min_max(true_src, true_dst);
8951                break;
8952        case BPF_JNE:
8953                __reg_combine_min_max(false_src, false_dst);
8954                break;
8955        }
8956}
8957
8958static void mark_ptr_or_null_reg(struct bpf_func_state *state,
8959                                 struct bpf_reg_state *reg, u32 id,
8960                                 bool is_null)
8961{
8962        if (reg_type_may_be_null(reg->type) && reg->id == id &&
8963            !WARN_ON_ONCE(!reg->id)) {
8964                /* Old offset (both fixed and variable parts) should
8965                 * have been known-zero, because we don't allow pointer
8966                 * arithmetic on pointers that might be NULL.
8967                 */
8968                if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
8969                                 !tnum_equals_const(reg->var_off, 0) ||
8970                                 reg->off)) {
8971                        __mark_reg_known_zero(reg);
8972                        reg->off = 0;
8973                }
8974                if (is_null) {
8975                        reg->type = SCALAR_VALUE;
8976                        /* We don't need id and ref_obj_id from this point
8977                         * onwards anymore, thus we should better reset it,
8978                         * so that state pruning has chances to take effect.
8979                         */
8980                        reg->id = 0;
8981                        reg->ref_obj_id = 0;
8982
8983                        return;
8984                }
8985
8986                mark_ptr_not_null_reg(reg);
8987
8988                if (!reg_may_point_to_spin_lock(reg)) {
8989                        /* For not-NULL ptr, reg->ref_obj_id will be reset
8990                         * in release_reg_references().
8991                         *
8992                         * reg->id is still used by spin_lock ptr. Other
8993                         * than spin_lock ptr type, reg->id can be reset.
8994                         */
8995                        reg->id = 0;
8996                }
8997        }
8998}
8999
9000static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
9001                                    bool is_null)
9002{
9003        struct bpf_reg_state *reg;
9004        int i;
9005
9006        for (i = 0; i < MAX_BPF_REG; i++)
9007                mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
9008
9009        bpf_for_each_spilled_reg(i, state, reg) {
9010                if (!reg)
9011                        continue;
9012                mark_ptr_or_null_reg(state, reg, id, is_null);
9013        }
9014}
9015
9016/* The logic is similar to find_good_pkt_pointers(), both could eventually
9017 * be folded together at some point.
9018 */
9019static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9020                                  bool is_null)
9021{
9022        struct bpf_func_state *state = vstate->frame[vstate->curframe];
9023        struct bpf_reg_state *regs = state->regs;
9024        u32 ref_obj_id = regs[regno].ref_obj_id;
9025        u32 id = regs[regno].id;
9026        int i;
9027
9028        if (ref_obj_id && ref_obj_id == id && is_null)
9029                /* regs[regno] is in the " == NULL" branch.
9030                 * No one could have freed the reference state before
9031                 * doing the NULL check.
9032                 */
9033                WARN_ON_ONCE(release_reference_state(state, id));
9034
9035        for (i = 0; i <= vstate->curframe; i++)
9036                __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9037}
9038
9039static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9040                                   struct bpf_reg_state *dst_reg,
9041                                   struct bpf_reg_state *src_reg,
9042                                   struct bpf_verifier_state *this_branch,
9043                                   struct bpf_verifier_state *other_branch)
9044{
9045        if (BPF_SRC(insn->code) != BPF_X)
9046                return false;
9047
9048        /* Pointers are always 64-bit. */
9049        if (BPF_CLASS(insn->code) == BPF_JMP32)
9050                return false;
9051
9052        switch (BPF_OP(insn->code)) {
9053        case BPF_JGT:
9054                if ((dst_reg->type == PTR_TO_PACKET &&
9055                     src_reg->type == PTR_TO_PACKET_END) ||
9056                    (dst_reg->type == PTR_TO_PACKET_META &&
9057                     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9058                        /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9059                        find_good_pkt_pointers(this_branch, dst_reg,
9060                                               dst_reg->type, false);
9061                        mark_pkt_end(other_branch, insn->dst_reg, true);
9062                } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9063                            src_reg->type == PTR_TO_PACKET) ||
9064                           (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9065                            src_reg->type == PTR_TO_PACKET_META)) {
9066                        /* pkt_end > pkt_data', pkt_data > pkt_meta' */
9067                        find_good_pkt_pointers(other_branch, src_reg,
9068                                               src_reg->type, true);
9069                        mark_pkt_end(this_branch, insn->src_reg, false);
9070                } else {
9071                        return false;
9072                }
9073                break;
9074        case BPF_JLT:
9075                if ((dst_reg->type == PTR_TO_PACKET &&
9076                     src_reg->type == PTR_TO_PACKET_END) ||
9077                    (dst_reg->type == PTR_TO_PACKET_META &&
9078                     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9079                        /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9080                        find_good_pkt_pointers(other_branch, dst_reg,
9081                                               dst_reg->type, true);
9082                        mark_pkt_end(this_branch, insn->dst_reg, false);
9083                } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9084                            src_reg->type == PTR_TO_PACKET) ||
9085                           (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9086                            src_reg->type == PTR_TO_PACKET_META)) {
9087                        /* pkt_end < pkt_data', pkt_data > pkt_meta' */
9088                        find_good_pkt_pointers(this_branch, src_reg,
9089                                               src_reg->type, false);
9090                        mark_pkt_end(other_branch, insn->src_reg, true);
9091                } else {
9092                        return false;
9093                }
9094                break;
9095        case BPF_JGE:
9096                if ((dst_reg->type == PTR_TO_PACKET &&
9097                     src_reg->type == PTR_TO_PACKET_END) ||
9098                    (dst_reg->type == PTR_TO_PACKET_META &&
9099                     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9100                        /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9101                        find_good_pkt_pointers(this_branch, dst_reg,
9102                                               dst_reg->type, true);
9103                        mark_pkt_end(other_branch, insn->dst_reg, false);
9104                } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9105                            src_reg->type == PTR_TO_PACKET) ||
9106                           (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9107                            src_reg->type == PTR_TO_PACKET_META)) {
9108                        /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9109                        find_good_pkt_pointers(other_branch, src_reg,
9110                                               src_reg->type, false);
9111                        mark_pkt_end(this_branch, insn->src_reg, true);
9112                } else {
9113                        return false;
9114                }
9115                break;
9116        case BPF_JLE:
9117                if ((dst_reg->type == PTR_TO_PACKET &&
9118                     src_reg->type == PTR_TO_PACKET_END) ||
9119                    (dst_reg->type == PTR_TO_PACKET_META &&
9120                     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9121                        /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9122                        find_good_pkt_pointers(other_branch, dst_reg,
9123                                               dst_reg->type, false);
9124                        mark_pkt_end(this_branch, insn->dst_reg, true);
9125                } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9126                            src_reg->type == PTR_TO_PACKET) ||
9127                           (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9128                            src_reg->type == PTR_TO_PACKET_META)) {
9129                        /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9130                        find_good_pkt_pointers(this_branch, src_reg,
9131                                               src_reg->type, true);
9132                        mark_pkt_end(other_branch, insn->src_reg, false);
9133                } else {
9134                        return false;
9135                }
9136                break;
9137        default:
9138                return false;
9139        }
9140
9141        return true;
9142}
9143
9144static void find_equal_scalars(struct bpf_verifier_state *vstate,
9145                               struct bpf_reg_state *known_reg)
9146{
9147        struct bpf_func_state *state;
9148        struct bpf_reg_state *reg;
9149        int i, j;
9150
9151        for (i = 0; i <= vstate->curframe; i++) {
9152                state = vstate->frame[i];
9153                for (j = 0; j < MAX_BPF_REG; j++) {
9154                        reg = &state->regs[j];
9155                        if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9156                                *reg = *known_reg;
9157                }
9158
9159                bpf_for_each_spilled_reg(j, state, reg) {
9160                        if (!reg)
9161                                continue;
9162                        if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9163                                *reg = *known_reg;
9164                }
9165        }
9166}
9167
9168static int check_cond_jmp_op(struct bpf_verifier_env *env,
9169                             struct bpf_insn *insn, int *insn_idx)
9170{
9171        struct bpf_verifier_state *this_branch = env->cur_state;
9172        struct bpf_verifier_state *other_branch;
9173        struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
9174        struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
9175        u8 opcode = BPF_OP(insn->code);
9176        bool is_jmp32;
9177        int pred = -1;
9178        int err;
9179
9180        /* Only conditional jumps are expected to reach here. */
9181        if (opcode == BPF_JA || opcode > BPF_JSLE) {
9182                verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
9183                return -EINVAL;
9184        }
9185
9186        if (BPF_SRC(insn->code) == BPF_X) {
9187                if (insn->imm != 0) {
9188                        verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9189                        return -EINVAL;
9190                }
9191
9192                /* check src1 operand */
9193                err = check_reg_arg(env, insn->src_reg, SRC_OP);
9194                if (err)
9195                        return err;
9196
9197                if (is_pointer_value(env, insn->src_reg)) {
9198                        verbose(env, "R%d pointer comparison prohibited\n",
9199                                insn->src_reg);
9200                        return -EACCES;
9201                }
9202                src_reg = &regs[insn->src_reg];
9203        } else {
9204                if (insn->src_reg != BPF_REG_0) {
9205                        verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9206                        return -EINVAL;
9207                }
9208        }
9209
9210        /* check src2 operand */
9211        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9212        if (err)
9213                return err;
9214
9215        dst_reg = &regs[insn->dst_reg];
9216        is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
9217
9218        if (BPF_SRC(insn->code) == BPF_K) {
9219                pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9220        } else if (src_reg->type == SCALAR_VALUE &&
9221                   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9222                pred = is_branch_taken(dst_reg,
9223                                       tnum_subreg(src_reg->var_off).value,
9224                                       opcode,
9225                                       is_jmp32);
9226        } else if (src_reg->type == SCALAR_VALUE &&
9227                   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9228                pred = is_branch_taken(dst_reg,
9229                                       src_reg->var_off.value,
9230                                       opcode,
9231                                       is_jmp32);
9232        } else if (reg_is_pkt_pointer_any(dst_reg) &&
9233                   reg_is_pkt_pointer_any(src_reg) &&
9234                   !is_jmp32) {
9235                pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
9236        }
9237
9238        if (pred >= 0) {
9239                /* If we get here with a dst_reg pointer type it is because
9240                 * above is_branch_taken() special cased the 0 comparison.
9241                 */
9242                if (!__is_pointer_value(false, dst_reg))
9243                        err = mark_chain_precision(env, insn->dst_reg);
9244                if (BPF_SRC(insn->code) == BPF_X && !err &&
9245                    !__is_pointer_value(false, src_reg))
9246                        err = mark_chain_precision(env, insn->src_reg);
9247                if (err)
9248                        return err;
9249        }
9250
9251        if (pred == 1) {
9252                /* Only follow the goto, ignore fall-through. If needed, push
9253                 * the fall-through branch for simulation under speculative
9254                 * execution.
9255                 */
9256                if (!env->bypass_spec_v1 &&
9257                    !sanitize_speculative_path(env, insn, *insn_idx + 1,
9258                                               *insn_idx))
9259                        return -EFAULT;
9260                *insn_idx += insn->off;
9261                return 0;
9262        } else if (pred == 0) {
9263                /* Only follow the fall-through branch, since that's where the
9264                 * program will go. If needed, push the goto branch for
9265                 * simulation under speculative execution.
9266                 */
9267                if (!env->bypass_spec_v1 &&
9268                    !sanitize_speculative_path(env, insn,
9269                                               *insn_idx + insn->off + 1,
9270                                               *insn_idx))
9271                        return -EFAULT;
9272                return 0;
9273        }
9274
9275        other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9276                                  false);
9277        if (!other_branch)
9278                return -EFAULT;
9279        other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
9280
9281        /* detect if we are comparing against a constant value so we can adjust
9282         * our min/max values for our dst register.
9283         * this is only legit if both are scalars (or pointers to the same
9284         * object, I suppose, but we don't support that right now), because
9285         * otherwise the different base pointers mean the offsets aren't
9286         * comparable.
9287         */
9288        if (BPF_SRC(insn->code) == BPF_X) {
9289                struct bpf_reg_state *src_reg = &regs[insn->src_reg];
9290
9291                if (dst_reg->type == SCALAR_VALUE &&
9292                    src_reg->type == SCALAR_VALUE) {
9293                        if (tnum_is_const(src_reg->var_off) ||
9294                            (is_jmp32 &&
9295                             tnum_is_const(tnum_subreg(src_reg->var_off))))
9296                                reg_set_min_max(&other_branch_regs[insn->dst_reg],
9297                                                dst_reg,
9298                                                src_reg->var_off.value,
9299                                                tnum_subreg(src_reg->var_off).value,
9300                                                opcode, is_jmp32);
9301                        else if (tnum_is_const(dst_reg->var_off) ||
9302                                 (is_jmp32 &&
9303                                  tnum_is_const(tnum_subreg(dst_reg->var_off))))
9304                                reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
9305                                                    src_reg,
9306                                                    dst_reg->var_off.value,
9307                                                    tnum_subreg(dst_reg->var_off).value,
9308                                                    opcode, is_jmp32);
9309                        else if (!is_jmp32 &&
9310                                 (opcode == BPF_JEQ || opcode == BPF_JNE))
9311                                /* Comparing for equality, we can combine knowledge */
9312                                reg_combine_min_max(&other_branch_regs[insn->src_reg],
9313                                                    &other_branch_regs[insn->dst_reg],
9314                                                    src_reg, dst_reg, opcode);
9315                        if (src_reg->id &&
9316                            !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
9317                                find_equal_scalars(this_branch, src_reg);
9318                                find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9319                        }
9320
9321                }
9322        } else if (dst_reg->type == SCALAR_VALUE) {
9323                reg_set_min_max(&other_branch_regs[insn->dst_reg],
9324                                        dst_reg, insn->imm, (u32)insn->imm,
9325                                        opcode, is_jmp32);
9326        }
9327
9328        if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9329            !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
9330                find_equal_scalars(this_branch, dst_reg);
9331                find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9332        }
9333
9334        /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9335         * NOTE: these optimizations below are related with pointer comparison
9336         *       which will never be JMP32.
9337         */
9338        if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
9339            insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
9340            reg_type_may_be_null(dst_reg->type)) {
9341                /* Mark all identical registers in each branch as either
9342                 * safe or unknown depending R == 0 or R != 0 conditional.
9343                 */
9344                mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9345                                      opcode == BPF_JNE);
9346                mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9347                                      opcode == BPF_JEQ);
9348        } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9349                                           this_branch, other_branch) &&
9350                   is_pointer_value(env, insn->dst_reg)) {
9351                verbose(env, "R%d pointer comparison prohibited\n",
9352                        insn->dst_reg);
9353                return -EACCES;
9354        }
9355        if (env->log.level & BPF_LOG_LEVEL)
9356                print_verifier_state(env, this_branch->frame[this_branch->curframe]);
9357        return 0;
9358}
9359
9360/* verify BPF_LD_IMM64 instruction */
9361static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
9362{
9363        struct bpf_insn_aux_data *aux = cur_aux(env);
9364        struct bpf_reg_state *regs = cur_regs(env);
9365        struct bpf_reg_state *dst_reg;
9366        struct bpf_map *map;
9367        int err;
9368
9369        if (BPF_SIZE(insn->code) != BPF_DW) {
9370                verbose(env, "invalid BPF_LD_IMM insn\n");
9371                return -EINVAL;
9372        }
9373        if (insn->off != 0) {
9374                verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
9375                return -EINVAL;
9376        }
9377
9378        err = check_reg_arg(env, insn->dst_reg, DST_OP);
9379        if (err)
9380                return err;
9381
9382        dst_reg = &regs[insn->dst_reg];
9383        if (insn->src_reg == 0) {
9384                u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9385
9386                dst_reg->type = SCALAR_VALUE;
9387                __mark_reg_known(&regs[insn->dst_reg], imm);
9388                return 0;
9389        }
9390
9391        if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
9392                mark_reg_known_zero(env, regs, insn->dst_reg);
9393
9394                dst_reg->type = aux->btf_var.reg_type;
9395                switch (dst_reg->type) {
9396                case PTR_TO_MEM:
9397                        dst_reg->mem_size = aux->btf_var.mem_size;
9398                        break;
9399                case PTR_TO_BTF_ID:
9400                case PTR_TO_PERCPU_BTF_ID:
9401                        dst_reg->btf = aux->btf_var.btf;
9402                        dst_reg->btf_id = aux->btf_var.btf_id;
9403                        break;
9404                default:
9405                        verbose(env, "bpf verifier is misconfigured\n");
9406                        return -EFAULT;
9407                }
9408                return 0;
9409        }
9410
9411        if (insn->src_reg == BPF_PSEUDO_FUNC) {
9412                struct bpf_prog_aux *aux = env->prog->aux;
9413                u32 subprogno = find_subprog(env,
9414                                             env->insn_idx + insn->imm + 1);
9415
9416                if (!aux->func_info) {
9417                        verbose(env, "missing btf func_info\n");
9418                        return -EINVAL;
9419                }
9420                if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9421                        verbose(env, "callback function not static\n");
9422                        return -EINVAL;
9423                }
9424
9425                dst_reg->type = PTR_TO_FUNC;
9426                dst_reg->subprogno = subprogno;
9427                return 0;
9428        }
9429
9430        map = env->used_maps[aux->map_index];
9431        mark_reg_known_zero(env, regs, insn->dst_reg);
9432        dst_reg->map_ptr = map;
9433
9434        if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9435            insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
9436                dst_reg->type = PTR_TO_MAP_VALUE;
9437                dst_reg->off = aux->map_off;
9438                if (map_value_has_spin_lock(map))
9439                        dst_reg->id = ++env->id_gen;
9440        } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
9441                   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
9442                dst_reg->type = CONST_PTR_TO_MAP;
9443        } else {
9444                verbose(env, "bpf verifier is misconfigured\n");
9445                return -EINVAL;
9446        }
9447
9448        return 0;
9449}
9450
9451static bool may_access_skb(enum bpf_prog_type type)
9452{
9453        switch (type) {
9454        case BPF_PROG_TYPE_SOCKET_FILTER:
9455        case BPF_PROG_TYPE_SCHED_CLS:
9456        case BPF_PROG_TYPE_SCHED_ACT:
9457                return true;
9458        default:
9459                return false;
9460        }
9461}
9462
9463/* verify safety of LD_ABS|LD_IND instructions:
9464 * - they can only appear in the programs where ctx == skb
9465 * - since they are wrappers of function calls, they scratch R1-R5 registers,
9466 *   preserve R6-R9, and store return value into R0
9467 *
9468 * Implicit input:
9469 *   ctx == skb == R6 == CTX
9470 *
9471 * Explicit input:
9472 *   SRC == any register
9473 *   IMM == 32-bit immediate
9474 *
9475 * Output:
9476 *   R0 - 8/16/32-bit skb data converted to cpu endianness
9477 */
9478static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
9479{
9480        struct bpf_reg_state *regs = cur_regs(env);
9481        static const int ctx_reg = BPF_REG_6;
9482        u8 mode = BPF_MODE(insn->code);
9483        int i, err;
9484
9485        if (!may_access_skb(resolve_prog_type(env->prog))) {
9486                verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
9487                return -EINVAL;
9488        }
9489
9490        if (!env->ops->gen_ld_abs) {
9491                verbose(env, "bpf verifier is misconfigured\n");
9492                return -EINVAL;
9493        }
9494
9495        if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
9496            BPF_SIZE(insn->code) == BPF_DW ||
9497            (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
9498                verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
9499                return -EINVAL;
9500        }
9501
9502        /* check whether implicit source operand (register R6) is readable */
9503        err = check_reg_arg(env, ctx_reg, SRC_OP);
9504        if (err)
9505                return err;
9506
9507        /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
9508         * gen_ld_abs() may terminate the program at runtime, leading to
9509         * reference leak.
9510         */
9511        err = check_reference_leak(env);
9512        if (err) {
9513                verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
9514                return err;
9515        }
9516
9517        if (env->cur_state->active_spin_lock) {
9518                verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
9519                return -EINVAL;
9520        }
9521
9522        if (regs[ctx_reg].type != PTR_TO_CTX) {
9523                verbose(env,
9524                        "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
9525                return -EINVAL;
9526        }
9527
9528        if (mode == BPF_IND) {
9529                /* check explicit source operand */
9530                err = check_reg_arg(env, insn->src_reg, SRC_OP);
9531                if (err)
9532                        return err;
9533        }
9534
9535        err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
9536        if (err < 0)
9537                return err;
9538
9539        /* reset caller saved regs to unreadable */
9540        for (i = 0; i < CALLER_SAVED_REGS; i++) {
9541                mark_reg_not_init(env, regs, caller_saved[i]);
9542                check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9543        }
9544
9545        /* mark destination R0 register as readable, since it contains
9546         * the value fetched from the packet.
9547         * Already marked as written above.
9548         */
9549        mark_reg_unknown(env, regs, BPF_REG_0);
9550        /* ld_abs load up to 32-bit skb data. */
9551        regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
9552        return 0;
9553}
9554
9555static int check_return_code(struct bpf_verifier_env *env)
9556{
9557        struct tnum enforce_attach_type_range = tnum_unknown;
9558        const struct bpf_prog *prog = env->prog;
9559        struct bpf_reg_state *reg;
9560        struct tnum range = tnum_range(0, 1);
9561        enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9562        int err;
9563        struct bpf_func_state *frame = env->cur_state->frame[0];
9564        const bool is_subprog = frame->subprogno;
9565
9566        /* LSM and struct_ops func-ptr's return type could be "void" */
9567        if (!is_subprog &&
9568            (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
9569             prog_type == BPF_PROG_TYPE_LSM) &&
9570            !prog->aux->attach_func_proto->type)
9571                return 0;
9572
9573        /* eBPF calling convention is such that R0 is used
9574         * to return the value from eBPF program.
9575         * Make sure that it's readable at this time
9576         * of bpf_exit, which means that program wrote
9577         * something into it earlier
9578         */
9579        err = check_reg_arg(env, BPF_REG_0, SRC_OP);
9580        if (err)
9581                return err;
9582
9583        if (is_pointer_value(env, BPF_REG_0)) {
9584                verbose(env, "R0 leaks addr as return value\n");
9585                return -EACCES;
9586        }
9587
9588        reg = cur_regs(env) + BPF_REG_0;
9589
9590        if (frame->in_async_callback_fn) {
9591                /* enforce return zero from async callbacks like timer */
9592                if (reg->type != SCALAR_VALUE) {
9593                        verbose(env, "In async callback the register R0 is not a known value (%s)\n",
9594                                reg_type_str[reg->type]);
9595                        return -EINVAL;
9596                }
9597
9598                if (!tnum_in(tnum_const(0), reg->var_off)) {
9599                        verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
9600                        return -EINVAL;
9601                }
9602                return 0;
9603        }
9604
9605        if (is_subprog) {
9606                if (reg->type != SCALAR_VALUE) {
9607                        verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
9608                                reg_type_str[reg->type]);
9609                        return -EINVAL;
9610                }
9611                return 0;
9612        }
9613
9614        switch (prog_type) {
9615        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
9616                if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
9617                    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
9618                    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
9619                    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
9620                    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
9621                    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
9622                        range = tnum_range(1, 1);
9623                if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
9624                    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
9625                        range = tnum_range(0, 3);
9626                break;
9627        case BPF_PROG_TYPE_CGROUP_SKB:
9628                if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
9629                        range = tnum_range(0, 3);
9630                        enforce_attach_type_range = tnum_range(2, 3);
9631                }
9632                break;
9633        case BPF_PROG_TYPE_CGROUP_SOCK:
9634        case BPF_PROG_TYPE_SOCK_OPS:
9635        case BPF_PROG_TYPE_CGROUP_DEVICE:
9636        case BPF_PROG_TYPE_CGROUP_SYSCTL:
9637        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
9638                break;
9639        case BPF_PROG_TYPE_RAW_TRACEPOINT:
9640                if (!env->prog->aux->attach_btf_id)
9641                        return 0;
9642                range = tnum_const(0);
9643                break;
9644        case BPF_PROG_TYPE_TRACING:
9645                switch (env->prog->expected_attach_type) {
9646                case BPF_TRACE_FENTRY:
9647                case BPF_TRACE_FEXIT:
9648                        range = tnum_const(0);
9649                        break;
9650                case BPF_TRACE_RAW_TP:
9651                case BPF_MODIFY_RETURN:
9652                        return 0;
9653                case BPF_TRACE_ITER:
9654                        break;
9655                default:
9656                        return -ENOTSUPP;
9657                }
9658                break;
9659        case BPF_PROG_TYPE_SK_LOOKUP:
9660                range = tnum_range(SK_DROP, SK_PASS);
9661                break;
9662        case BPF_PROG_TYPE_EXT:
9663                /* freplace program can return anything as its return value
9664                 * depends on the to-be-replaced kernel func or bpf program.
9665                 */
9666        default:
9667                return 0;
9668        }
9669
9670        if (reg->type != SCALAR_VALUE) {
9671                verbose(env, "At program exit the register R0 is not a known value (%s)\n",
9672                        reg_type_str[reg->type]);
9673                return -EINVAL;
9674        }
9675
9676        if (!tnum_in(range, reg->var_off)) {
9677                verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
9678                return -EINVAL;
9679        }
9680
9681        if (!tnum_is_unknown(enforce_attach_type_range) &&
9682            tnum_in(enforce_attach_type_range, reg->var_off))
9683                env->prog->enforce_expected_attach_type = 1;
9684        return 0;
9685}
9686
9687/* non-recursive DFS pseudo code
9688 * 1  procedure DFS-iterative(G,v):
9689 * 2      label v as discovered
9690 * 3      let S be a stack
9691 * 4      S.push(v)
9692 * 5      while S is not empty
9693 * 6            t <- S.pop()
9694 * 7            if t is what we're looking for:
9695 * 8                return t
9696 * 9            for all edges e in G.adjacentEdges(t) do
9697 * 10               if edge e is already labelled
9698 * 11                   continue with the next edge
9699 * 12               w <- G.adjacentVertex(t,e)
9700 * 13               if vertex w is not discovered and not explored
9701 * 14                   label e as tree-edge
9702 * 15                   label w as discovered
9703 * 16                   S.push(w)
9704 * 17                   continue at 5
9705 * 18               else if vertex w is discovered
9706 * 19                   label e as back-edge
9707 * 20               else
9708 * 21                   // vertex w is explored
9709 * 22                   label e as forward- or cross-edge
9710 * 23           label t as explored
9711 * 24           S.pop()
9712 *
9713 * convention:
9714 * 0x10 - discovered
9715 * 0x11 - discovered and fall-through edge labelled
9716 * 0x12 - discovered and fall-through and branch edges labelled
9717 * 0x20 - explored
9718 */
9719
9720enum {
9721        DISCOVERED = 0x10,
9722        EXPLORED = 0x20,
9723        FALLTHROUGH = 1,
9724        BRANCH = 2,
9725};
9726
9727static u32 state_htab_size(struct bpf_verifier_env *env)
9728{
9729        return env->prog->len;
9730}
9731
9732static struct bpf_verifier_state_list **explored_state(
9733                                        struct bpf_verifier_env *env,
9734                                        int idx)
9735{
9736        struct bpf_verifier_state *cur = env->cur_state;
9737        struct bpf_func_state *state = cur->frame[cur->curframe];
9738
9739        return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
9740}
9741
9742static void init_explored_state(struct bpf_verifier_env *env, int idx)
9743{
9744        env->insn_aux_data[idx].prune_point = true;
9745}
9746
9747enum {
9748        DONE_EXPLORING = 0,
9749        KEEP_EXPLORING = 1,
9750};
9751
9752/* t, w, e - match pseudo-code above:
9753 * t - index of current instruction
9754 * w - next instruction
9755 * e - edge
9756 */
9757static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
9758                     bool loop_ok)
9759{
9760        int *insn_stack = env->cfg.insn_stack;
9761        int *insn_state = env->cfg.insn_state;
9762
9763        if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
9764                return DONE_EXPLORING;
9765
9766        if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
9767                return DONE_EXPLORING;
9768
9769        if (w < 0 || w >= env->prog->len) {
9770                verbose_linfo(env, t, "%d: ", t);
9771                verbose(env, "jump out of range from insn %d to %d\n", t, w);
9772                return -EINVAL;
9773        }
9774
9775        if (e == BRANCH)
9776                /* mark branch target for state pruning */
9777                init_explored_state(env, w);
9778
9779        if (insn_state[w] == 0) {
9780                /* tree-edge */
9781                insn_state[t] = DISCOVERED | e;
9782                insn_state[w] = DISCOVERED;
9783                if (env->cfg.cur_stack >= env->prog->len)
9784                        return -E2BIG;
9785                insn_stack[env->cfg.cur_stack++] = w;
9786                return KEEP_EXPLORING;
9787        } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
9788                if (loop_ok && env->bpf_capable)
9789                        return DONE_EXPLORING;
9790                verbose_linfo(env, t, "%d: ", t);
9791                verbose_linfo(env, w, "%d: ", w);
9792                verbose(env, "back-edge from insn %d to %d\n", t, w);
9793                return -EINVAL;
9794        } else if (insn_state[w] == EXPLORED) {
9795                /* forward- or cross-edge */
9796                insn_state[t] = DISCOVERED | e;
9797        } else {
9798                verbose(env, "insn state internal bug\n");
9799                return -EFAULT;
9800        }
9801        return DONE_EXPLORING;
9802}
9803
9804static int visit_func_call_insn(int t, int insn_cnt,
9805                                struct bpf_insn *insns,
9806                                struct bpf_verifier_env *env,
9807                                bool visit_callee)
9808{
9809        int ret;
9810
9811        ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
9812        if (ret)
9813                return ret;
9814
9815        if (t + 1 < insn_cnt)
9816                init_explored_state(env, t + 1);
9817        if (visit_callee) {
9818                init_explored_state(env, t);
9819                ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
9820                                /* It's ok to allow recursion from CFG point of
9821                                 * view. __check_func_call() will do the actual
9822                                 * check.
9823                                 */
9824                                bpf_pseudo_func(insns + t));
9825        }
9826        return ret;
9827}
9828
9829/* Visits the instruction at index t and returns one of the following:
9830 *  < 0 - an error occurred
9831 *  DONE_EXPLORING - the instruction was fully explored
9832 *  KEEP_EXPLORING - there is still work to be done before it is fully explored
9833 */
9834static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
9835{
9836        struct bpf_insn *insns = env->prog->insnsi;
9837        int ret;
9838
9839        if (bpf_pseudo_func(insns + t))
9840                return visit_func_call_insn(t, insn_cnt, insns, env, true);
9841
9842        /* All non-branch instructions have a single fall-through edge. */
9843        if (BPF_CLASS(insns[t].code) != BPF_JMP &&
9844            BPF_CLASS(insns[t].code) != BPF_JMP32)
9845                return push_insn(t, t + 1, FALLTHROUGH, env, false);
9846
9847        switch (BPF_OP(insns[t].code)) {
9848        case BPF_EXIT:
9849                return DONE_EXPLORING;
9850
9851        case BPF_CALL:
9852                if (insns[t].imm == BPF_FUNC_timer_set_callback)
9853                        /* Mark this call insn to trigger is_state_visited() check
9854                         * before call itself is processed by __check_func_call().
9855                         * Otherwise new async state will be pushed for further
9856                         * exploration.
9857                         */
9858                        init_explored_state(env, t);
9859                return visit_func_call_insn(t, insn_cnt, insns, env,
9860                                            insns[t].src_reg == BPF_PSEUDO_CALL);
9861
9862        case BPF_JA:
9863                if (BPF_SRC(insns[t].code) != BPF_K)
9864                        return -EINVAL;
9865
9866                /* unconditional jump with single edge */
9867                ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
9868                                true);
9869                if (ret)
9870                        return ret;
9871
9872                /* unconditional jmp is not a good pruning point,
9873                 * but it's marked, since backtracking needs
9874                 * to record jmp history in is_state_visited().
9875                 */
9876                init_explored_state(env, t + insns[t].off + 1);
9877                /* tell verifier to check for equivalent states
9878                 * after every call and jump
9879                 */
9880                if (t + 1 < insn_cnt)
9881                        init_explored_state(env, t + 1);
9882
9883                return ret;
9884
9885        default:
9886                /* conditional jump with two edges */
9887                init_explored_state(env, t);
9888                ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
9889                if (ret)
9890                        return ret;
9891
9892                return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
9893        }
9894}
9895
9896/* non-recursive depth-first-search to detect loops in BPF program
9897 * loop == back-edge in directed graph
9898 */
9899static int check_cfg(struct bpf_verifier_env *env)
9900{
9901        int insn_cnt = env->prog->len;
9902        int *insn_stack, *insn_state;
9903        int ret = 0;
9904        int i;
9905
9906        insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9907        if (!insn_state)
9908                return -ENOMEM;
9909
9910        insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9911        if (!insn_stack) {
9912                kvfree(insn_state);
9913                return -ENOMEM;
9914        }
9915
9916        insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
9917        insn_stack[0] = 0; /* 0 is the first instruction */
9918        env->cfg.cur_stack = 1;
9919
9920        while (env->cfg.cur_stack > 0) {
9921                int t = insn_stack[env->cfg.cur_stack - 1];
9922
9923                ret = visit_insn(t, insn_cnt, env);
9924                switch (ret) {
9925                case DONE_EXPLORING:
9926                        insn_state[t] = EXPLORED;
9927                        env->cfg.cur_stack--;
9928                        break;
9929                case KEEP_EXPLORING:
9930                        break;
9931                default:
9932                        if (ret > 0) {
9933                                verbose(env, "visit_insn internal bug\n");
9934                                ret = -EFAULT;
9935                        }
9936                        goto err_free;
9937                }
9938        }
9939
9940        if (env->cfg.cur_stack < 0) {
9941                verbose(env, "pop stack internal bug\n");
9942                ret = -EFAULT;
9943                goto err_free;
9944        }
9945
9946        for (i = 0; i < insn_cnt; i++) {
9947                if (insn_state[i] != EXPLORED) {
9948                        verbose(env, "unreachable insn %d\n", i);
9949                        ret = -EINVAL;
9950                        goto err_free;
9951                }
9952        }
9953        ret = 0; /* cfg looks good */
9954
9955err_free:
9956        kvfree(insn_state);
9957        kvfree(insn_stack);
9958        env->cfg.insn_state = env->cfg.insn_stack = NULL;
9959        return ret;
9960}
9961
9962static int check_abnormal_return(struct bpf_verifier_env *env)
9963{
9964        int i;
9965
9966        for (i = 1; i < env->subprog_cnt; i++) {
9967                if (env->subprog_info[i].has_ld_abs) {
9968                        verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
9969                        return -EINVAL;
9970                }
9971                if (env->subprog_info[i].has_tail_call) {
9972                        verbose(env, "tail_call is not allowed in subprogs without BTF\n");
9973                        return -EINVAL;
9974                }
9975        }
9976        return 0;
9977}
9978
9979/* The minimum supported BTF func info size */
9980#define MIN_BPF_FUNCINFO_SIZE   8
9981#define MAX_FUNCINFO_REC_SIZE   252
9982
9983static int check_btf_func(struct bpf_verifier_env *env,
9984                          const union bpf_attr *attr,
9985                          bpfptr_t uattr)
9986{
9987        const struct btf_type *type, *func_proto, *ret_type;
9988        u32 i, nfuncs, urec_size, min_size;
9989        u32 krec_size = sizeof(struct bpf_func_info);
9990        struct bpf_func_info *krecord;
9991        struct bpf_func_info_aux *info_aux = NULL;
9992        struct bpf_prog *prog;
9993        const struct btf *btf;
9994        bpfptr_t urecord;
9995        u32 prev_offset = 0;
9996        bool scalar_return;
9997        int ret = -ENOMEM;
9998
9999        nfuncs = attr->func_info_cnt;
10000        if (!nfuncs) {
10001                if (check_abnormal_return(env))
10002                        return -EINVAL;
10003                return 0;
10004        }
10005
10006        if (nfuncs != env->subprog_cnt) {
10007                verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
10008                return -EINVAL;
10009        }
10010
10011        urec_size = attr->func_info_rec_size;
10012        if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
10013            urec_size > MAX_FUNCINFO_REC_SIZE ||
10014            urec_size % sizeof(u32)) {
10015                verbose(env, "invalid func info rec size %u\n", urec_size);
10016                return -EINVAL;
10017        }
10018
10019        prog = env->prog;
10020        btf = prog->aux->btf;
10021
10022        urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
10023        min_size = min_t(u32, krec_size, urec_size);
10024
10025        krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
10026        if (!krecord)
10027                return -ENOMEM;
10028        info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10029        if (!info_aux)
10030                goto err_free;
10031
10032        for (i = 0; i < nfuncs; i++) {
10033                ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10034                if (ret) {
10035                        if (ret == -E2BIG) {
10036                                verbose(env, "nonzero tailing record in func info");
10037                                /* set the size kernel expects so loader can zero
10038                                 * out the rest of the record.
10039                                 */
10040                                if (copy_to_bpfptr_offset(uattr,
10041                                                          offsetof(union bpf_attr, func_info_rec_size),
10042                                                          &min_size, sizeof(min_size)))
10043                                        ret = -EFAULT;
10044                        }
10045                        goto err_free;
10046                }
10047
10048                if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10049                        ret = -EFAULT;
10050                        goto err_free;
10051                }
10052
10053                /* check insn_off */
10054                ret = -EINVAL;
10055                if (i == 0) {
10056                        if (krecord[i].insn_off) {
10057                                verbose(env,
10058                                        "nonzero insn_off %u for the first func info record",
10059                                        krecord[i].insn_off);
10060                                goto err_free;
10061                        }
10062                } else if (krecord[i].insn_off <= prev_offset) {
10063                        verbose(env,
10064                                "same or smaller insn offset (%u) than previous func info record (%u)",
10065                                krecord[i].insn_off, prev_offset);
10066                        goto err_free;
10067                }
10068
10069                if (env->subprog_info[i].start != krecord[i].insn_off) {
10070                        verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10071                        goto err_free;
10072                }
10073
10074                /* check type_id */
10075                type = btf_type_by_id(btf, krecord[i].type_id);
10076                if (!type || !btf_type_is_func(type)) {
10077                        verbose(env, "invalid type id %d in func info",
10078                                krecord[i].type_id);
10079                        goto err_free;
10080                }
10081                info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10082
10083                func_proto = btf_type_by_id(btf, type->type);
10084                if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10085                        /* btf_func_check() already verified it during BTF load */
10086                        goto err_free;
10087                ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10088                scalar_return =
10089                        btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10090                if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10091                        verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10092                        goto err_free;
10093                }
10094                if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10095                        verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10096                        goto err_free;
10097                }
10098
10099                prev_offset = krecord[i].insn_off;
10100                bpfptr_add(&urecord, urec_size);
10101        }
10102
10103        prog->aux->func_info = krecord;
10104        prog->aux->func_info_cnt = nfuncs;
10105        prog->aux->func_info_aux = info_aux;
10106        return 0;
10107
10108err_free:
10109        kvfree(krecord);
10110        kfree(info_aux);
10111        return ret;
10112}
10113
10114static void adjust_btf_func(struct bpf_verifier_env *env)
10115{
10116        struct bpf_prog_aux *aux = env->prog->aux;
10117        int i;
10118
10119        if (!aux->func_info)
10120                return;
10121
10122        for (i = 0; i < env->subprog_cnt; i++)
10123                aux->func_info[i].insn_off = env->subprog_info[i].start;
10124}
10125
10126#define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
10127                sizeof(((struct bpf_line_info *)(0))->line_col))
10128#define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
10129
10130static int check_btf_line(struct bpf_verifier_env *env,
10131                          const union bpf_attr *attr,
10132                          bpfptr_t uattr)
10133{
10134        u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10135        struct bpf_subprog_info *sub;
10136        struct bpf_line_info *linfo;
10137        struct bpf_prog *prog;
10138        const struct btf *btf;
10139        bpfptr_t ulinfo;
10140        int err;
10141
10142        nr_linfo = attr->line_info_cnt;
10143        if (!nr_linfo)
10144                return 0;
10145        if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10146                return -EINVAL;
10147
10148        rec_size = attr->line_info_rec_size;
10149        if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10150            rec_size > MAX_LINEINFO_REC_SIZE ||
10151            rec_size & (sizeof(u32) - 1))
10152                return -EINVAL;
10153
10154        /* Need to zero it in case the userspace may
10155         * pass in a smaller bpf_line_info object.
10156         */
10157        linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10158                         GFP_KERNEL | __GFP_NOWARN);
10159        if (!linfo)
10160                return -ENOMEM;
10161
10162        prog = env->prog;
10163        btf = prog->aux->btf;
10164
10165        s = 0;
10166        sub = env->subprog_info;
10167        ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
10168        expected_size = sizeof(struct bpf_line_info);
10169        ncopy = min_t(u32, expected_size, rec_size);
10170        for (i = 0; i < nr_linfo; i++) {
10171                err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10172                if (err) {
10173                        if (err == -E2BIG) {
10174                                verbose(env, "nonzero tailing record in line_info");
10175                                if (copy_to_bpfptr_offset(uattr,
10176                                                          offsetof(union bpf_attr, line_info_rec_size),
10177                                                          &expected_size, sizeof(expected_size)))
10178                                        err = -EFAULT;
10179                        }
10180                        goto err_free;
10181                }
10182
10183                if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
10184                        err = -EFAULT;
10185                        goto err_free;
10186                }
10187
10188                /*
10189                 * Check insn_off to ensure
10190                 * 1) strictly increasing AND
10191                 * 2) bounded by prog->len
10192                 *
10193                 * The linfo[0].insn_off == 0 check logically falls into
10194                 * the later "missing bpf_line_info for func..." case
10195                 * because the first linfo[0].insn_off must be the
10196                 * first sub also and the first sub must have
10197                 * subprog_info[0].start == 0.
10198                 */
10199                if ((i && linfo[i].insn_off <= prev_offset) ||
10200                    linfo[i].insn_off >= prog->len) {
10201                        verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10202                                i, linfo[i].insn_off, prev_offset,
10203                                prog->len);
10204                        err = -EINVAL;
10205                        goto err_free;
10206                }
10207
10208                if (!prog->insnsi[linfo[i].insn_off].code) {
10209                        verbose(env,
10210                                "Invalid insn code at line_info[%u].insn_off\n",
10211                                i);
10212                        err = -EINVAL;
10213                        goto err_free;
10214                }
10215
10216                if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10217                    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
10218                        verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10219                        err = -EINVAL;
10220                        goto err_free;
10221                }
10222
10223                if (s != env->subprog_cnt) {
10224                        if (linfo[i].insn_off == sub[s].start) {
10225                                sub[s].linfo_idx = i;
10226                                s++;
10227                        } else if (sub[s].start < linfo[i].insn_off) {
10228                                verbose(env, "missing bpf_line_info for func#%u\n", s);
10229                                err = -EINVAL;
10230                                goto err_free;
10231                        }
10232                }
10233
10234                prev_offset = linfo[i].insn_off;
10235                bpfptr_add(&ulinfo, rec_size);
10236        }
10237
10238        if (s != env->subprog_cnt) {
10239                verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10240                        env->subprog_cnt - s, s);
10241                err = -EINVAL;
10242                goto err_free;
10243        }
10244
10245        prog->aux->linfo = linfo;
10246        prog->aux->nr_linfo = nr_linfo;
10247
10248        return 0;
10249
10250err_free:
10251        kvfree(linfo);
10252        return err;
10253}
10254
10255static int check_btf_info(struct bpf_verifier_env *env,
10256                          const union bpf_attr *attr,
10257                          bpfptr_t uattr)
10258{
10259        struct btf *btf;
10260        int err;
10261
10262        if (!attr->func_info_cnt && !attr->line_info_cnt) {
10263                if (check_abnormal_return(env))
10264                        return -EINVAL;
10265                return 0;
10266        }
10267
10268        btf = btf_get_by_fd(attr->prog_btf_fd);
10269        if (IS_ERR(btf))
10270                return PTR_ERR(btf);
10271        if (btf_is_kernel(btf)) {
10272                btf_put(btf);
10273                return -EACCES;
10274        }
10275        env->prog->aux->btf = btf;
10276
10277        err = check_btf_func(env, attr, uattr);
10278        if (err)
10279                return err;
10280
10281        err = check_btf_line(env, attr, uattr);
10282        if (err)
10283                return err;
10284
10285        return 0;
10286}
10287
10288/* check %cur's range satisfies %old's */
10289static bool range_within(struct bpf_reg_state *old,
10290                         struct bpf_reg_state *cur)
10291{
10292        return old->umin_value <= cur->umin_value &&
10293               old->umax_value >= cur->umax_value &&
10294               old->smin_value <= cur->smin_value &&
10295               old->smax_value >= cur->smax_value &&
10296               old->u32_min_value <= cur->u32_min_value &&
10297               old->u32_max_value >= cur->u32_max_value &&
10298               old->s32_min_value <= cur->s32_min_value &&
10299               old->s32_max_value >= cur->s32_max_value;
10300}
10301
10302/* If in the old state two registers had the same id, then they need to have
10303 * the same id in the new state as well.  But that id could be different from
10304 * the old state, so we need to track the mapping from old to new ids.
10305 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10306 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
10307 * regs with a different old id could still have new id 9, we don't care about
10308 * that.
10309 * So we look through our idmap to see if this old id has been seen before.  If
10310 * so, we require the new id to match; otherwise, we add the id pair to the map.
10311 */
10312static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
10313{
10314        unsigned int i;
10315
10316        for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
10317                if (!idmap[i].old) {
10318                        /* Reached an empty slot; haven't seen this id before */
10319                        idmap[i].old = old_id;
10320                        idmap[i].cur = cur_id;
10321                        return true;
10322                }
10323                if (idmap[i].old == old_id)
10324                        return idmap[i].cur == cur_id;
10325        }
10326        /* We ran out of idmap slots, which should be impossible */
10327        WARN_ON_ONCE(1);
10328        return false;
10329}
10330
10331static void clean_func_state(struct bpf_verifier_env *env,
10332                             struct bpf_func_state *st)
10333{
10334        enum bpf_reg_liveness live;
10335        int i, j;
10336
10337        for (i = 0; i < BPF_REG_FP; i++) {
10338                live = st->regs[i].live;
10339                /* liveness must not touch this register anymore */
10340                st->regs[i].live |= REG_LIVE_DONE;
10341                if (!(live & REG_LIVE_READ))
10342                        /* since the register is unused, clear its state
10343                         * to make further comparison simpler
10344                         */
10345                        __mark_reg_not_init(env, &st->regs[i]);
10346        }
10347
10348        for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10349                live = st->stack[i].spilled_ptr.live;
10350                /* liveness must not touch this stack slot anymore */
10351                st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10352                if (!(live & REG_LIVE_READ)) {
10353                        __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
10354                        for (j = 0; j < BPF_REG_SIZE; j++)
10355                                st->stack[i].slot_type[j] = STACK_INVALID;
10356                }
10357        }
10358}
10359
10360static void clean_verifier_state(struct bpf_verifier_env *env,
10361                                 struct bpf_verifier_state *st)
10362{
10363        int i;
10364
10365        if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
10366                /* all regs in this state in all frames were already marked */
10367                return;
10368
10369        for (i = 0; i <= st->curframe; i++)
10370                clean_func_state(env, st->frame[i]);
10371}
10372
10373/* the parentage chains form a tree.
10374 * the verifier states are added to state lists at given insn and
10375 * pushed into state stack for future exploration.
10376 * when the verifier reaches bpf_exit insn some of the verifer states
10377 * stored in the state lists have their final liveness state already,
10378 * but a lot of states will get revised from liveness point of view when
10379 * the verifier explores other branches.
10380 * Example:
10381 * 1: r0 = 1
10382 * 2: if r1 == 100 goto pc+1
10383 * 3: r0 = 2
10384 * 4: exit
10385 * when the verifier reaches exit insn the register r0 in the state list of
10386 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
10387 * of insn 2 and goes exploring further. At the insn 4 it will walk the
10388 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
10389 *
10390 * Since the verifier pushes the branch states as it sees them while exploring
10391 * the program the condition of walking the branch instruction for the second
10392 * time means that all states below this branch were already explored and
10393 * their final liveness marks are already propagated.
10394 * Hence when the verifier completes the search of state list in is_state_visited()
10395 * we can call this clean_live_states() function to mark all liveness states
10396 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
10397 * will not be used.
10398 * This function also clears the registers and stack for states that !READ
10399 * to simplify state merging.
10400 *
10401 * Important note here that walking the same branch instruction in the callee
10402 * doesn't meant that the states are DONE. The verifier has to compare
10403 * the callsites
10404 */
10405static void clean_live_states(struct bpf_verifier_env *env, int insn,
10406                              struct bpf_verifier_state *cur)
10407{
10408        struct bpf_verifier_state_list *sl;
10409        int i;
10410
10411        sl = *explored_state(env, insn);
10412        while (sl) {
10413                if (sl->state.branches)
10414                        goto next;
10415                if (sl->state.insn_idx != insn ||
10416                    sl->state.curframe != cur->curframe)
10417                        goto next;
10418                for (i = 0; i <= cur->curframe; i++)
10419                        if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
10420                                goto next;
10421                clean_verifier_state(env, &sl->state);
10422next:
10423                sl = sl->next;
10424        }
10425}
10426
10427/* Returns true if (rold safe implies rcur safe) */
10428static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
10429                    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
10430{
10431        bool equal;
10432
10433        if (!(rold->live & REG_LIVE_READ))
10434                /* explored state didn't use this */
10435                return true;
10436
10437        equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
10438
10439        if (rold->type == PTR_TO_STACK)
10440                /* two stack pointers are equal only if they're pointing to
10441                 * the same stack frame, since fp-8 in foo != fp-8 in bar
10442                 */
10443                return equal && rold->frameno == rcur->frameno;
10444
10445        if (equal)
10446                return true;
10447
10448        if (rold->type == NOT_INIT)
10449                /* explored state can't have used this */
10450                return true;
10451        if (rcur->type == NOT_INIT)
10452                return false;
10453        switch (rold->type) {
10454        case SCALAR_VALUE:
10455                if (env->explore_alu_limits)
10456                        return false;
10457                if (rcur->type == SCALAR_VALUE) {
10458                        if (!rold->precise && !rcur->precise)
10459                                return true;
10460                        /* new val must satisfy old val knowledge */
10461                        return range_within(rold, rcur) &&
10462                               tnum_in(rold->var_off, rcur->var_off);
10463                } else {
10464                        /* We're trying to use a pointer in place of a scalar.
10465                         * Even if the scalar was unbounded, this could lead to
10466                         * pointer leaks because scalars are allowed to leak
10467                         * while pointers are not. We could make this safe in
10468                         * special cases if root is calling us, but it's
10469                         * probably not worth the hassle.
10470                         */
10471                        return false;
10472                }
10473        case PTR_TO_MAP_KEY:
10474        case PTR_TO_MAP_VALUE:
10475                /* If the new min/max/var_off satisfy the old ones and
10476                 * everything else matches, we are OK.
10477                 * 'id' is not compared, since it's only used for maps with
10478                 * bpf_spin_lock inside map element and in such cases if
10479                 * the rest of the prog is valid for one map element then
10480                 * it's valid for all map elements regardless of the key
10481                 * used in bpf_map_lookup()
10482                 */
10483                return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
10484                       range_within(rold, rcur) &&
10485                       tnum_in(rold->var_off, rcur->var_off);
10486        case PTR_TO_MAP_VALUE_OR_NULL:
10487                /* a PTR_TO_MAP_VALUE could be safe to use as a
10488                 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
10489                 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
10490                 * checked, doing so could have affected others with the same
10491                 * id, and we can't check for that because we lost the id when
10492                 * we converted to a PTR_TO_MAP_VALUE.
10493                 */
10494                if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
10495                        return false;
10496                if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
10497                        return false;
10498                /* Check our ids match any regs they're supposed to */
10499                return check_ids(rold->id, rcur->id, idmap);
10500        case PTR_TO_PACKET_META:
10501        case PTR_TO_PACKET:
10502                if (rcur->type != rold->type)
10503                        return false;
10504                /* We must have at least as much range as the old ptr
10505                 * did, so that any accesses which were safe before are
10506                 * still safe.  This is true even if old range < old off,
10507                 * since someone could have accessed through (ptr - k), or
10508                 * even done ptr -= k in a register, to get a safe access.
10509                 */
10510                if (rold->range > rcur->range)
10511                        return false;
10512                /* If the offsets don't match, we can't trust our alignment;
10513                 * nor can we be sure that we won't fall out of range.
10514                 */
10515                if (rold->off != rcur->off)
10516                        return false;
10517                /* id relations must be preserved */
10518                if (rold->id && !check_ids(rold->id, rcur->id, idmap))
10519                        return false;
10520                /* new val must satisfy old val knowledge */
10521                return range_within(rold, rcur) &&
10522                       tnum_in(rold->var_off, rcur->var_off);
10523        case PTR_TO_CTX:
10524        case CONST_PTR_TO_MAP:
10525        case PTR_TO_PACKET_END:
10526        case PTR_TO_FLOW_KEYS:
10527        case PTR_TO_SOCKET:
10528        case PTR_TO_SOCKET_OR_NULL:
10529        case PTR_TO_SOCK_COMMON:
10530        case PTR_TO_SOCK_COMMON_OR_NULL:
10531        case PTR_TO_TCP_SOCK:
10532        case PTR_TO_TCP_SOCK_OR_NULL:
10533        case PTR_TO_XDP_SOCK:
10534                /* Only valid matches are exact, which memcmp() above
10535                 * would have accepted
10536                 */
10537        default:
10538                /* Don't know what's going on, just say it's not safe */
10539                return false;
10540        }
10541
10542        /* Shouldn't get here; if we do, say it's not safe */
10543        WARN_ON_ONCE(1);
10544        return false;
10545}
10546
10547static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
10548                      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
10549{
10550        int i, spi;
10551
10552        /* walk slots of the explored stack and ignore any additional
10553         * slots in the current stack, since explored(safe) state
10554         * didn't use them
10555         */
10556        for (i = 0; i < old->allocated_stack; i++) {
10557                spi = i / BPF_REG_SIZE;
10558
10559                if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
10560                        i += BPF_REG_SIZE - 1;
10561                        /* explored state didn't use this */
10562                        continue;
10563                }
10564
10565                if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
10566                        continue;
10567
10568                /* explored stack has more populated slots than current stack
10569                 * and these slots were used
10570                 */
10571                if (i >= cur->allocated_stack)
10572                        return false;
10573
10574                /* if old state was safe with misc data in the stack
10575                 * it will be safe with zero-initialized stack.
10576                 * The opposite is not true
10577                 */
10578                if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
10579                    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
10580                        continue;
10581                if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
10582                    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
10583                        /* Ex: old explored (safe) state has STACK_SPILL in
10584                         * this stack slot, but current has STACK_MISC ->
10585                         * this verifier states are not equivalent,
10586                         * return false to continue verification of this path
10587                         */
10588                        return false;
10589                if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
10590                        continue;
10591                if (!is_spilled_reg(&old->stack[spi]))
10592                        continue;
10593                if (!regsafe(env, &old->stack[spi].spilled_ptr,
10594                             &cur->stack[spi].spilled_ptr, idmap))
10595                        /* when explored and current stack slot are both storing
10596                         * spilled registers, check that stored pointers types
10597                         * are the same as well.
10598                         * Ex: explored safe path could have stored
10599                         * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
10600                         * but current path has stored:
10601                         * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
10602                         * such verifier states are not equivalent.
10603                         * return false to continue verification of this path
10604                         */
10605                        return false;
10606        }
10607        return true;
10608}
10609
10610static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
10611{
10612        if (old->acquired_refs != cur->acquired_refs)
10613                return false;
10614        return !memcmp(old->refs, cur->refs,
10615                       sizeof(*old->refs) * old->acquired_refs);
10616}
10617
10618/* compare two verifier states
10619 *
10620 * all states stored in state_list are known to be valid, since
10621 * verifier reached 'bpf_exit' instruction through them
10622 *
10623 * this function is called when verifier exploring different branches of
10624 * execution popped from the state stack. If it sees an old state that has
10625 * more strict register state and more strict stack state then this execution
10626 * branch doesn't need to be explored further, since verifier already
10627 * concluded that more strict state leads to valid finish.
10628 *
10629 * Therefore two states are equivalent if register state is more conservative
10630 * and explored stack state is more conservative than the current one.
10631 * Example:
10632 *       explored                   current
10633 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
10634 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
10635 *
10636 * In other words if current stack state (one being explored) has more
10637 * valid slots than old one that already passed validation, it means
10638 * the verifier can stop exploring and conclude that current state is valid too
10639 *
10640 * Similarly with registers. If explored state has register type as invalid
10641 * whereas register type in current state is meaningful, it means that
10642 * the current state will reach 'bpf_exit' instruction safely
10643 */
10644static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
10645                              struct bpf_func_state *cur)
10646{
10647        int i;
10648
10649        memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
10650        for (i = 0; i < MAX_BPF_REG; i++)
10651                if (!regsafe(env, &old->regs[i], &cur->regs[i],
10652                             env->idmap_scratch))
10653                        return false;
10654
10655        if (!stacksafe(env, old, cur, env->idmap_scratch))
10656                return false;
10657
10658        if (!refsafe(old, cur))
10659                return false;
10660
10661        return true;
10662}
10663
10664static bool states_equal(struct bpf_verifier_env *env,
10665                         struct bpf_verifier_state *old,
10666                         struct bpf_verifier_state *cur)
10667{
10668        int i;
10669
10670        if (old->curframe != cur->curframe)
10671                return false;
10672
10673        /* Verification state from speculative execution simulation
10674         * must never prune a non-speculative execution one.
10675         */
10676        if (old->speculative && !cur->speculative)
10677                return false;
10678
10679        if (old->active_spin_lock != cur->active_spin_lock)
10680                return false;
10681
10682        /* for states to be equal callsites have to be the same
10683         * and all frame states need to be equivalent
10684         */
10685        for (i = 0; i <= old->curframe; i++) {
10686                if (old->frame[i]->callsite != cur->frame[i]->callsite)
10687                        return false;
10688                if (!func_states_equal(env, old->frame[i], cur->frame[i]))
10689                        return false;
10690        }
10691        return true;
10692}
10693
10694/* Return 0 if no propagation happened. Return negative error code if error
10695 * happened. Otherwise, return the propagated bit.
10696 */
10697static int propagate_liveness_reg(struct bpf_verifier_env *env,
10698                                  struct bpf_reg_state *reg,
10699                                  struct bpf_reg_state *parent_reg)
10700{
10701        u8 parent_flag = parent_reg->live & REG_LIVE_READ;
10702        u8 flag = reg->live & REG_LIVE_READ;
10703        int err;
10704
10705        /* When comes here, read flags of PARENT_REG or REG could be any of
10706         * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
10707         * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
10708         */
10709        if (parent_flag == REG_LIVE_READ64 ||
10710            /* Or if there is no read flag from REG. */
10711            !flag ||
10712            /* Or if the read flag from REG is the same as PARENT_REG. */
10713            parent_flag == flag)
10714                return 0;
10715
10716        err = mark_reg_read(env, reg, parent_reg, flag);
10717        if (err)
10718                return err;
10719
10720        return flag;
10721}
10722
10723/* A write screens off any subsequent reads; but write marks come from the
10724 * straight-line code between a state and its parent.  When we arrive at an
10725 * equivalent state (jump target or such) we didn't arrive by the straight-line
10726 * code, so read marks in the state must propagate to the parent regardless
10727 * of the state's write marks. That's what 'parent == state->parent' comparison
10728 * in mark_reg_read() is for.
10729 */
10730static int propagate_liveness(struct bpf_verifier_env *env,
10731                              const struct bpf_verifier_state *vstate,
10732                              struct bpf_verifier_state *vparent)
10733{
10734        struct bpf_reg_state *state_reg, *parent_reg;
10735        struct bpf_func_state *state, *parent;
10736        int i, frame, err = 0;
10737
10738        if (vparent->curframe != vstate->curframe) {
10739                WARN(1, "propagate_live: parent frame %d current frame %d\n",
10740                     vparent->curframe, vstate->curframe);
10741                return -EFAULT;
10742        }
10743        /* Propagate read liveness of registers... */
10744        BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
10745        for (frame = 0; frame <= vstate->curframe; frame++) {
10746                parent = vparent->frame[frame];
10747                state = vstate->frame[frame];
10748                parent_reg = parent->regs;
10749                state_reg = state->regs;
10750                /* We don't need to worry about FP liveness, it's read-only */
10751                for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
10752                        err = propagate_liveness_reg(env, &state_reg[i],
10753                                                     &parent_reg[i]);
10754                        if (err < 0)
10755                                return err;
10756                        if (err == REG_LIVE_READ64)
10757                                mark_insn_zext(env, &parent_reg[i]);
10758                }
10759
10760                /* Propagate stack slots. */
10761                for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
10762                            i < parent->allocated_stack / BPF_REG_SIZE; i++) {
10763                        parent_reg = &parent->stack[i].spilled_ptr;
10764                        state_reg = &state->stack[i].spilled_ptr;
10765                        err = propagate_liveness_reg(env, state_reg,
10766                                                     parent_reg);
10767                        if (err < 0)
10768                                return err;
10769                }
10770        }
10771        return 0;
10772}
10773
10774/* find precise scalars in the previous equivalent state and
10775 * propagate them into the current state
10776 */
10777static int propagate_precision(struct bpf_verifier_env *env,
10778                               const struct bpf_verifier_state *old)
10779{
10780        struct bpf_reg_state *state_reg;
10781        struct bpf_func_state *state;
10782        int i, err = 0;
10783
10784        state = old->frame[old->curframe];
10785        state_reg = state->regs;
10786        for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
10787                if (state_reg->type != SCALAR_VALUE ||
10788                    !state_reg->precise)
10789                        continue;
10790                if (env->log.level & BPF_LOG_LEVEL2)
10791                        verbose(env, "propagating r%d\n", i);
10792                err = mark_chain_precision(env, i);
10793                if (err < 0)
10794                        return err;
10795        }
10796
10797        for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
10798                if (!is_spilled_reg(&state->stack[i]))
10799                        continue;
10800                state_reg = &state->stack[i].spilled_ptr;
10801                if (state_reg->type != SCALAR_VALUE ||
10802                    !state_reg->precise)
10803                        continue;
10804                if (env->log.level & BPF_LOG_LEVEL2)
10805                        verbose(env, "propagating fp%d\n",
10806                                (-i - 1) * BPF_REG_SIZE);
10807                err = mark_chain_precision_stack(env, i);
10808                if (err < 0)
10809                        return err;
10810        }
10811        return 0;
10812}
10813
10814static bool states_maybe_looping(struct bpf_verifier_state *old,
10815                                 struct bpf_verifier_state *cur)
10816{
10817        struct bpf_func_state *fold, *fcur;
10818        int i, fr = cur->curframe;
10819
10820        if (old->curframe != fr)
10821                return false;
10822
10823        fold = old->frame[fr];
10824        fcur = cur->frame[fr];
10825        for (i = 0; i < MAX_BPF_REG; i++)
10826                if (memcmp(&fold->regs[i], &fcur->regs[i],
10827                           offsetof(struct bpf_reg_state, parent)))
10828                        return false;
10829        return true;
10830}
10831
10832
10833static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
10834{
10835        struct bpf_verifier_state_list *new_sl;
10836        struct bpf_verifier_state_list *sl, **pprev;
10837        struct bpf_verifier_state *cur = env->cur_state, *new;
10838        int i, j, err, states_cnt = 0;
10839        bool add_new_state = env->test_state_freq ? true : false;
10840
10841        cur->last_insn_idx = env->prev_insn_idx;
10842        if (!env->insn_aux_data[insn_idx].prune_point)
10843                /* this 'insn_idx' instruction wasn't marked, so we will not
10844                 * be doing state search here
10845                 */
10846                return 0;
10847
10848        /* bpf progs typically have pruning point every 4 instructions
10849         * http://vger.kernel.org/bpfconf2019.html#session-1
10850         * Do not add new state for future pruning if the verifier hasn't seen
10851         * at least 2 jumps and at least 8 instructions.
10852         * This heuristics helps decrease 'total_states' and 'peak_states' metric.
10853         * In tests that amounts to up to 50% reduction into total verifier
10854         * memory consumption and 20% verifier time speedup.
10855         */
10856        if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
10857            env->insn_processed - env->prev_insn_processed >= 8)
10858                add_new_state = true;
10859
10860        pprev = explored_state(env, insn_idx);
10861        sl = *pprev;
10862
10863        clean_live_states(env, insn_idx, cur);
10864
10865        while (sl) {
10866                states_cnt++;
10867                if (sl->state.insn_idx != insn_idx)
10868                        goto next;
10869
10870                if (sl->state.branches) {
10871                        struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
10872
10873                        if (frame->in_async_callback_fn &&
10874                            frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
10875                                /* Different async_entry_cnt means that the verifier is
10876                                 * processing another entry into async callback.
10877                                 * Seeing the same state is not an indication of infinite
10878                                 * loop or infinite recursion.
10879                                 * But finding the same state doesn't mean that it's safe
10880                                 * to stop processing the current state. The previous state
10881                                 * hasn't yet reached bpf_exit, since state.branches > 0.
10882                                 * Checking in_async_callback_fn alone is not enough either.
10883                                 * Since the verifier still needs to catch infinite loops
10884                                 * inside async callbacks.
10885                                 */
10886                        } else if (states_maybe_looping(&sl->state, cur) &&
10887                                   states_equal(env, &sl->state, cur)) {
10888                                verbose_linfo(env, insn_idx, "; ");
10889                                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
10890                                return -EINVAL;
10891                        }
10892                        /* if the verifier is processing a loop, avoid adding new state
10893                         * too often, since different loop iterations have distinct
10894                         * states and may not help future pruning.
10895                         * This threshold shouldn't be too low to make sure that
10896                         * a loop with large bound will be rejected quickly.
10897                         * The most abusive loop will be:
10898                         * r1 += 1
10899                         * if r1 < 1000000 goto pc-2
10900                         * 1M insn_procssed limit / 100 == 10k peak states.
10901                         * This threshold shouldn't be too high either, since states
10902                         * at the end of the loop are likely to be useful in pruning.
10903                         */
10904                        if (env->jmps_processed - env->prev_jmps_processed < 20 &&
10905                            env->insn_processed - env->prev_insn_processed < 100)
10906                                add_new_state = false;
10907                        goto miss;
10908                }
10909                if (states_equal(env, &sl->state, cur)) {
10910                        sl->hit_cnt++;
10911                        /* reached equivalent register/stack state,
10912                         * prune the search.
10913                         * Registers read by the continuation are read by us.
10914                         * If we have any write marks in env->cur_state, they
10915                         * will prevent corresponding reads in the continuation
10916                         * from reaching our parent (an explored_state).  Our
10917                         * own state will get the read marks recorded, but
10918                         * they'll be immediately forgotten as we're pruning
10919                         * this state and will pop a new one.
10920                         */
10921                        err = propagate_liveness(env, &sl->state, cur);
10922
10923                        /* if previous state reached the exit with precision and
10924                         * current state is equivalent to it (except precsion marks)
10925                         * the precision needs to be propagated back in
10926                         * the current state.
10927                         */
10928                        err = err ? : push_jmp_history(env, cur);
10929                        err = err ? : propagate_precision(env, &sl->state);
10930                        if (err)
10931                                return err;
10932                        return 1;
10933                }
10934miss:
10935                /* when new state is not going to be added do not increase miss count.
10936                 * Otherwise several loop iterations will remove the state
10937                 * recorded earlier. The goal of these heuristics is to have
10938                 * states from some iterations of the loop (some in the beginning
10939                 * and some at the end) to help pruning.
10940                 */
10941                if (add_new_state)
10942                        sl->miss_cnt++;
10943                /* heuristic to determine whether this state is beneficial
10944                 * to keep checking from state equivalence point of view.
10945                 * Higher numbers increase max_states_per_insn and verification time,
10946                 * but do not meaningfully decrease insn_processed.
10947                 */
10948                if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
10949                        /* the state is unlikely to be useful. Remove it to
10950                         * speed up verification
10951                         */
10952                        *pprev = sl->next;
10953                        if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
10954                                u32 br = sl->state.branches;
10955
10956                                WARN_ONCE(br,
10957                                          "BUG live_done but branches_to_explore %d\n",
10958                                          br);
10959                                free_verifier_state(&sl->state, false);
10960                                kfree(sl);
10961                                env->peak_states--;
10962                        } else {
10963                                /* cannot free this state, since parentage chain may
10964                                 * walk it later. Add it for free_list instead to
10965                                 * be freed at the end of verification
10966                                 */
10967                                sl->next = env->free_list;
10968                                env->free_list = sl;
10969                        }
10970                        sl = *pprev;
10971                        continue;
10972                }
10973next:
10974                pprev = &sl->next;
10975                sl = *pprev;
10976        }
10977
10978        if (env->max_states_per_insn < states_cnt)
10979                env->max_states_per_insn = states_cnt;
10980
10981        if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
10982                return push_jmp_history(env, cur);
10983
10984        if (!add_new_state)
10985                return push_jmp_history(env, cur);
10986
10987        /* There were no equivalent states, remember the current one.
10988         * Technically the current state is not proven to be safe yet,
10989         * but it will either reach outer most bpf_exit (which means it's safe)
10990         * or it will be rejected. When there are no loops the verifier won't be
10991         * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
10992         * again on the way to bpf_exit.
10993         * When looping the sl->state.branches will be > 0 and this state
10994         * will not be considered for equivalence until branches == 0.
10995         */
10996        new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
10997        if (!new_sl)
10998                return -ENOMEM;
10999        env->total_states++;
11000        env->peak_states++;
11001        env->prev_jmps_processed = env->jmps_processed;
11002        env->prev_insn_processed = env->insn_processed;
11003
11004        /* add new state to the head of linked list */
11005        new = &new_sl->state;
11006        err = copy_verifier_state(new, cur);
11007        if (err) {
11008                free_verifier_state(new, false);
11009                kfree(new_sl);
11010                return err;
11011        }
11012        new->insn_idx = insn_idx;
11013        WARN_ONCE(new->branches != 1,
11014                  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
11015
11016        cur->parent = new;
11017        cur->first_insn_idx = insn_idx;
11018        clear_jmp_history(cur);
11019        new_sl->next = *explored_state(env, insn_idx);
11020        *explored_state(env, insn_idx) = new_sl;
11021        /* connect new state to parentage chain. Current frame needs all
11022         * registers connected. Only r6 - r9 of the callers are alive (pushed
11023         * to the stack implicitly by JITs) so in callers' frames connect just
11024         * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
11025         * the state of the call instruction (with WRITTEN set), and r0 comes
11026         * from callee with its full parentage chain, anyway.
11027         */
11028        /* clear write marks in current state: the writes we did are not writes
11029         * our child did, so they don't screen off its reads from us.
11030         * (There are no read marks in current state, because reads always mark
11031         * their parent and current state never has children yet.  Only
11032         * explored_states can get read marks.)
11033         */
11034        for (j = 0; j <= cur->curframe; j++) {
11035                for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11036                        cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11037                for (i = 0; i < BPF_REG_FP; i++)
11038                        cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11039        }
11040
11041        /* all stack frames are accessible from callee, clear them all */
11042        for (j = 0; j <= cur->curframe; j++) {
11043                struct bpf_func_state *frame = cur->frame[j];
11044                struct bpf_func_state *newframe = new->frame[j];
11045
11046                for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
11047                        frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
11048                        frame->stack[i].spilled_ptr.parent =
11049                                                &newframe->stack[i].spilled_ptr;
11050                }
11051        }
11052        return 0;
11053}
11054
11055/* Return true if it's OK to have the same insn return a different type. */
11056static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11057{
11058        switch (type) {
11059        case PTR_TO_CTX:
11060        case PTR_TO_SOCKET:
11061        case PTR_TO_SOCKET_OR_NULL:
11062        case PTR_TO_SOCK_COMMON:
11063        case PTR_TO_SOCK_COMMON_OR_NULL:
11064        case PTR_TO_TCP_SOCK:
11065        case PTR_TO_TCP_SOCK_OR_NULL:
11066        case PTR_TO_XDP_SOCK:
11067        case PTR_TO_BTF_ID:
11068        case PTR_TO_BTF_ID_OR_NULL:
11069                return false;
11070        default:
11071                return true;
11072        }
11073}
11074
11075/* If an instruction was previously used with particular pointer types, then we
11076 * need to be careful to avoid cases such as the below, where it may be ok
11077 * for one branch accessing the pointer, but not ok for the other branch:
11078 *
11079 * R1 = sock_ptr
11080 * goto X;
11081 * ...
11082 * R1 = some_other_valid_ptr;
11083 * goto X;
11084 * ...
11085 * R2 = *(u32 *)(R1 + 0);
11086 */
11087static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11088{
11089        return src != prev && (!reg_type_mismatch_ok(src) ||
11090                               !reg_type_mismatch_ok(prev));
11091}
11092
11093static int do_check(struct bpf_verifier_env *env)
11094{
11095        bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11096        struct bpf_verifier_state *state = env->cur_state;
11097        struct bpf_insn *insns = env->prog->insnsi;
11098        struct bpf_reg_state *regs;
11099        int insn_cnt = env->prog->len;
11100        bool do_print_state = false;
11101        int prev_insn_idx = -1;
11102
11103        for (;;) {
11104                struct bpf_insn *insn;
11105                u8 class;
11106                int err;
11107
11108                env->prev_insn_idx = prev_insn_idx;
11109                if (env->insn_idx >= insn_cnt) {
11110                        verbose(env, "invalid insn idx %d insn_cnt %d\n",
11111                                env->insn_idx, insn_cnt);
11112                        return -EFAULT;
11113                }
11114
11115                insn = &insns[env->insn_idx];
11116                class = BPF_CLASS(insn->code);
11117
11118                if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
11119                        verbose(env,
11120                                "BPF program is too large. Processed %d insn\n",
11121                                env->insn_processed);
11122                        return -E2BIG;
11123                }
11124
11125                err = is_state_visited(env, env->insn_idx);
11126                if (err < 0)
11127                        return err;
11128                if (err == 1) {
11129                        /* found equivalent state, can prune the search */
11130                        if (env->log.level & BPF_LOG_LEVEL) {
11131                                if (do_print_state)
11132                                        verbose(env, "\nfrom %d to %d%s: safe\n",
11133                                                env->prev_insn_idx, env->insn_idx,
11134                                                env->cur_state->speculative ?
11135                                                " (speculative execution)" : "");
11136                                else
11137                                        verbose(env, "%d: safe\n", env->insn_idx);
11138                        }
11139                        goto process_bpf_exit;
11140                }
11141
11142                if (signal_pending(current))
11143                        return -EAGAIN;
11144
11145                if (need_resched())
11146                        cond_resched();
11147
11148                if (env->log.level & BPF_LOG_LEVEL2 ||
11149                    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
11150                        if (env->log.level & BPF_LOG_LEVEL2)
11151                                verbose(env, "%d:", env->insn_idx);
11152                        else
11153                                verbose(env, "\nfrom %d to %d%s:",
11154                                        env->prev_insn_idx, env->insn_idx,
11155                                        env->cur_state->speculative ?
11156                                        " (speculative execution)" : "");
11157                        print_verifier_state(env, state->frame[state->curframe]);
11158                        do_print_state = false;
11159                }
11160
11161                if (env->log.level & BPF_LOG_LEVEL) {
11162                        const struct bpf_insn_cbs cbs = {
11163                                .cb_call        = disasm_kfunc_name,
11164                                .cb_print       = verbose,
11165                                .private_data   = env,
11166                        };
11167
11168                        verbose_linfo(env, env->insn_idx, "; ");
11169                        verbose(env, "%d: ", env->insn_idx);
11170                        print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
11171                }
11172
11173                if (bpf_prog_is_dev_bound(env->prog->aux)) {
11174                        err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11175                                                           env->prev_insn_idx);
11176                        if (err)
11177                                return err;
11178                }
11179
11180                regs = cur_regs(env);
11181                sanitize_mark_insn_seen(env);
11182                prev_insn_idx = env->insn_idx;
11183
11184                if (class == BPF_ALU || class == BPF_ALU64) {
11185                        err = check_alu_op(env, insn);
11186                        if (err)
11187                                return err;
11188
11189                } else if (class == BPF_LDX) {
11190                        enum bpf_reg_type *prev_src_type, src_reg_type;
11191
11192                        /* check for reserved fields is already done */
11193
11194                        /* check src operand */
11195                        err = check_reg_arg(env, insn->src_reg, SRC_OP);
11196                        if (err)
11197                                return err;
11198
11199                        err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11200                        if (err)
11201                                return err;
11202
11203                        src_reg_type = regs[insn->src_reg].type;
11204
11205                        /* check that memory (src_reg + off) is readable,
11206                         * the state of dst_reg will be updated by this func
11207                         */
11208                        err = check_mem_access(env, env->insn_idx, insn->src_reg,
11209                                               insn->off, BPF_SIZE(insn->code),
11210                                               BPF_READ, insn->dst_reg, false);
11211                        if (err)
11212                                return err;
11213
11214                        prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11215
11216                        if (*prev_src_type == NOT_INIT) {
11217                                /* saw a valid insn
11218                                 * dst_reg = *(u32 *)(src_reg + off)
11219                                 * save type to validate intersecting paths
11220                                 */
11221                                *prev_src_type = src_reg_type;
11222
11223                        } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
11224                                /* ABuser program is trying to use the same insn
11225                                 * dst_reg = *(u32*) (src_reg + off)
11226                                 * with different pointer types:
11227                                 * src_reg == ctx in one branch and
11228                                 * src_reg == stack|map in some other branch.
11229                                 * Reject it.
11230                                 */
11231                                verbose(env, "same insn cannot be used with different pointers\n");
11232                                return -EINVAL;
11233                        }
11234
11235                } else if (class == BPF_STX) {
11236                        enum bpf_reg_type *prev_dst_type, dst_reg_type;
11237
11238                        if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11239                                err = check_atomic(env, env->insn_idx, insn);
11240                                if (err)
11241                                        return err;
11242                                env->insn_idx++;
11243                                continue;
11244                        }
11245
11246                        if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11247                                verbose(env, "BPF_STX uses reserved fields\n");
11248                                return -EINVAL;
11249                        }
11250
11251                        /* check src1 operand */
11252                        err = check_reg_arg(env, insn->src_reg, SRC_OP);
11253                        if (err)
11254                                return err;
11255                        /* check src2 operand */
11256                        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11257                        if (err)
11258                                return err;
11259
11260                        dst_reg_type = regs[insn->dst_reg].type;
11261
11262                        /* check that memory (dst_reg + off) is writeable */
11263                        err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11264                                               insn->off, BPF_SIZE(insn->code),
11265                                               BPF_WRITE, insn->src_reg, false);
11266                        if (err)
11267                                return err;
11268
11269                        prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11270
11271                        if (*prev_dst_type == NOT_INIT) {
11272                                *prev_dst_type = dst_reg_type;
11273                        } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
11274                                verbose(env, "same insn cannot be used with different pointers\n");
11275                                return -EINVAL;
11276                        }
11277
11278                } else if (class == BPF_ST) {
11279                        if (BPF_MODE(insn->code) != BPF_MEM ||
11280                            insn->src_reg != BPF_REG_0) {
11281                                verbose(env, "BPF_ST uses reserved fields\n");
11282                                return -EINVAL;
11283                        }
11284                        /* check src operand */
11285                        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11286                        if (err)
11287                                return err;
11288
11289                        if (is_ctx_reg(env, insn->dst_reg)) {
11290                                verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
11291                                        insn->dst_reg,
11292                                        reg_type_str[reg_state(env, insn->dst_reg)->type]);
11293                                return -EACCES;
11294                        }
11295
11296                        /* check that memory (dst_reg + off) is writeable */
11297                        err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11298                                               insn->off, BPF_SIZE(insn->code),
11299                                               BPF_WRITE, -1, false);
11300                        if (err)
11301                                return err;
11302
11303                } else if (class == BPF_JMP || class == BPF_JMP32) {
11304                        u8 opcode = BPF_OP(insn->code);
11305
11306                        env->jmps_processed++;
11307                        if (opcode == BPF_CALL) {
11308                                if (BPF_SRC(insn->code) != BPF_K ||
11309                                    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11310                                     && insn->off != 0) ||
11311                                    (insn->src_reg != BPF_REG_0 &&
11312                                     insn->src_reg != BPF_PSEUDO_CALL &&
11313                                     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
11314                                    insn->dst_reg != BPF_REG_0 ||
11315                                    class == BPF_JMP32) {
11316                                        verbose(env, "BPF_CALL uses reserved fields\n");
11317                                        return -EINVAL;
11318                                }
11319
11320                                if (env->cur_state->active_spin_lock &&
11321                                    (insn->src_reg == BPF_PSEUDO_CALL ||
11322                                     insn->imm != BPF_FUNC_spin_unlock)) {
11323                                        verbose(env, "function calls are not allowed while holding a lock\n");
11324                                        return -EINVAL;
11325                                }
11326                                if (insn->src_reg == BPF_PSEUDO_CALL)
11327                                        err = check_func_call(env, insn, &env->insn_idx);
11328                                else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
11329                                        err = check_kfunc_call(env, insn);
11330                                else
11331                                        err = check_helper_call(env, insn, &env->insn_idx);
11332                                if (err)
11333                                        return err;
11334                        } else if (opcode == BPF_JA) {
11335                                if (BPF_SRC(insn->code) != BPF_K ||
11336                                    insn->imm != 0 ||
11337                                    insn->src_reg != BPF_REG_0 ||
11338                                    insn->dst_reg != BPF_REG_0 ||
11339                                    class == BPF_JMP32) {
11340                                        verbose(env, "BPF_JA uses reserved fields\n");
11341                                        return -EINVAL;
11342                                }
11343
11344                                env->insn_idx += insn->off + 1;
11345                                continue;
11346
11347                        } else if (opcode == BPF_EXIT) {
11348                                if (BPF_SRC(insn->code) != BPF_K ||
11349                                    insn->imm != 0 ||
11350                                    insn->src_reg != BPF_REG_0 ||
11351                                    insn->dst_reg != BPF_REG_0 ||
11352                                    class == BPF_JMP32) {
11353                                        verbose(env, "BPF_EXIT uses reserved fields\n");
11354                                        return -EINVAL;
11355                                }
11356
11357                                if (env->cur_state->active_spin_lock) {
11358                                        verbose(env, "bpf_spin_unlock is missing\n");
11359                                        return -EINVAL;
11360                                }
11361
11362                                if (state->curframe) {
11363                                        /* exit from nested function */
11364                                        err = prepare_func_exit(env, &env->insn_idx);
11365                                        if (err)
11366                                                return err;
11367                                        do_print_state = true;
11368                                        continue;
11369                                }
11370
11371                                err = check_reference_leak(env);
11372                                if (err)
11373                                        return err;
11374
11375                                err = check_return_code(env);
11376                                if (err)
11377                                        return err;
11378process_bpf_exit:
11379                                update_branch_counts(env, env->cur_state);
11380                                err = pop_stack(env, &prev_insn_idx,
11381                                                &env->insn_idx, pop_log);
11382                                if (err < 0) {
11383                                        if (err != -ENOENT)
11384                                                return err;
11385                                        break;
11386                                } else {
11387                                        do_print_state = true;
11388                                        continue;
11389                                }
11390                        } else {
11391                                err = check_cond_jmp_op(env, insn, &env->insn_idx);
11392                                if (err)
11393                                        return err;
11394                        }
11395                } else if (class == BPF_LD) {
11396                        u8 mode = BPF_MODE(insn->code);
11397
11398                        if (mode == BPF_ABS || mode == BPF_IND) {
11399                                err = check_ld_abs(env, insn);
11400                                if (err)
11401                                        return err;
11402
11403                        } else if (mode == BPF_IMM) {
11404                                err = check_ld_imm(env, insn);
11405                                if (err)
11406                                        return err;
11407
11408                                env->insn_idx++;
11409                                sanitize_mark_insn_seen(env);
11410                        } else {
11411                                verbose(env, "invalid BPF_LD mode\n");
11412                                return -EINVAL;
11413                        }
11414                } else {
11415                        verbose(env, "unknown insn class %d\n", class);
11416                        return -EINVAL;
11417                }
11418
11419                env->insn_idx++;
11420        }
11421
11422        return 0;
11423}
11424
11425static int find_btf_percpu_datasec(struct btf *btf)
11426{
11427        const struct btf_type *t;
11428        const char *tname;
11429        int i, n;
11430
11431        /*
11432         * Both vmlinux and module each have their own ".data..percpu"
11433         * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
11434         * types to look at only module's own BTF types.
11435         */
11436        n = btf_nr_types(btf);
11437        if (btf_is_module(btf))
11438                i = btf_nr_types(btf_vmlinux);
11439        else
11440                i = 1;
11441
11442        for(; i < n; i++) {
11443                t = btf_type_by_id(btf, i);
11444                if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
11445                        continue;
11446
11447                tname = btf_name_by_offset(btf, t->name_off);
11448                if (!strcmp(tname, ".data..percpu"))
11449                        return i;
11450        }
11451
11452        return -ENOENT;
11453}
11454
11455/* replace pseudo btf_id with kernel symbol address */
11456static int check_pseudo_btf_id(struct bpf_verifier_env *env,
11457                               struct bpf_insn *insn,
11458                               struct bpf_insn_aux_data *aux)
11459{
11460        const struct btf_var_secinfo *vsi;
11461        const struct btf_type *datasec;
11462        struct btf_mod_pair *btf_mod;
11463        const struct btf_type *t;
11464        const char *sym_name;
11465        bool percpu = false;
11466        u32 type, id = insn->imm;
11467        struct btf *btf;
11468        s32 datasec_id;
11469        u64 addr;
11470        int i, btf_fd, err;
11471
11472        btf_fd = insn[1].imm;
11473        if (btf_fd) {
11474                btf = btf_get_by_fd(btf_fd);
11475                if (IS_ERR(btf)) {
11476                        verbose(env, "invalid module BTF object FD specified.\n");
11477                        return -EINVAL;
11478                }
11479        } else {
11480                if (!btf_vmlinux) {
11481                        verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
11482                        return -EINVAL;
11483                }
11484                btf = btf_vmlinux;
11485                btf_get(btf);
11486        }
11487
11488        t = btf_type_by_id(btf, id);
11489        if (!t) {
11490                verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
11491                err = -ENOENT;
11492                goto err_put;
11493        }
11494
11495        if (!btf_type_is_var(t)) {
11496                verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
11497                err = -EINVAL;
11498                goto err_put;
11499        }
11500
11501        sym_name = btf_name_by_offset(btf, t->name_off);
11502        addr = kallsyms_lookup_name(sym_name);
11503        if (!addr) {
11504                verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
11505                        sym_name);
11506                err = -ENOENT;
11507                goto err_put;
11508        }
11509
11510        datasec_id = find_btf_percpu_datasec(btf);
11511        if (datasec_id > 0) {
11512                datasec = btf_type_by_id(btf, datasec_id);
11513                for_each_vsi(i, datasec, vsi) {
11514                        if (vsi->type == id) {
11515                                percpu = true;
11516                                break;
11517                        }
11518                }
11519        }
11520
11521        insn[0].imm = (u32)addr;
11522        insn[1].imm = addr >> 32;
11523
11524        type = t->type;
11525        t = btf_type_skip_modifiers(btf, type, NULL);
11526        if (percpu) {
11527                aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
11528                aux->btf_var.btf = btf;
11529                aux->btf_var.btf_id = type;
11530        } else if (!btf_type_is_struct(t)) {
11531                const struct btf_type *ret;
11532                const char *tname;
11533                u32 tsize;
11534
11535                /* resolve the type size of ksym. */
11536                ret = btf_resolve_size(btf, t, &tsize);
11537                if (IS_ERR(ret)) {
11538                        tname = btf_name_by_offset(btf, t->name_off);
11539                        verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
11540                                tname, PTR_ERR(ret));
11541                        err = -EINVAL;
11542                        goto err_put;
11543                }
11544                aux->btf_var.reg_type = PTR_TO_MEM;
11545                aux->btf_var.mem_size = tsize;
11546        } else {
11547                aux->btf_var.reg_type = PTR_TO_BTF_ID;
11548                aux->btf_var.btf = btf;
11549                aux->btf_var.btf_id = type;
11550        }
11551
11552        /* check whether we recorded this BTF (and maybe module) already */
11553        for (i = 0; i < env->used_btf_cnt; i++) {
11554                if (env->used_btfs[i].btf == btf) {
11555                        btf_put(btf);
11556                        return 0;
11557                }
11558        }
11559
11560        if (env->used_btf_cnt >= MAX_USED_BTFS) {
11561                err = -E2BIG;
11562                goto err_put;
11563        }
11564
11565        btf_mod = &env->used_btfs[env->used_btf_cnt];
11566        btf_mod->btf = btf;
11567        btf_mod->module = NULL;
11568
11569        /* if we reference variables from kernel module, bump its refcount */
11570        if (btf_is_module(btf)) {
11571                btf_mod->module = btf_try_get_module(btf);
11572                if (!btf_mod->module) {
11573                        err = -ENXIO;
11574                        goto err_put;
11575                }
11576        }
11577
11578        env->used_btf_cnt++;
11579
11580        return 0;
11581err_put:
11582        btf_put(btf);
11583        return err;
11584}
11585
11586static int check_map_prealloc(struct bpf_map *map)
11587{
11588        return (map->map_type != BPF_MAP_TYPE_HASH &&
11589                map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
11590                map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
11591                !(map->map_flags & BPF_F_NO_PREALLOC);
11592}
11593
11594static bool is_tracing_prog_type(enum bpf_prog_type type)
11595{
11596        switch (type) {
11597        case BPF_PROG_TYPE_KPROBE:
11598        case BPF_PROG_TYPE_TRACEPOINT:
11599        case BPF_PROG_TYPE_PERF_EVENT:
11600        case BPF_PROG_TYPE_RAW_TRACEPOINT:
11601                return true;
11602        default:
11603                return false;
11604        }
11605}
11606
11607static bool is_preallocated_map(struct bpf_map *map)
11608{
11609        if (!check_map_prealloc(map))
11610                return false;
11611        if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
11612                return false;
11613        return true;
11614}
11615
11616static int check_map_prog_compatibility(struct bpf_verifier_env *env,
11617                                        struct bpf_map *map,
11618                                        struct bpf_prog *prog)
11619
11620{
11621        enum bpf_prog_type prog_type = resolve_prog_type(prog);
11622        /*
11623         * Validate that trace type programs use preallocated hash maps.
11624         *
11625         * For programs attached to PERF events this is mandatory as the
11626         * perf NMI can hit any arbitrary code sequence.
11627         *
11628         * All other trace types using preallocated hash maps are unsafe as
11629         * well because tracepoint or kprobes can be inside locked regions
11630         * of the memory allocator or at a place where a recursion into the
11631         * memory allocator would see inconsistent state.
11632         *
11633         * On RT enabled kernels run-time allocation of all trace type
11634         * programs is strictly prohibited due to lock type constraints. On
11635         * !RT kernels it is allowed for backwards compatibility reasons for
11636         * now, but warnings are emitted so developers are made aware of
11637         * the unsafety and can fix their programs before this is enforced.
11638         */
11639        if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
11640                if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
11641                        verbose(env, "perf_event programs can only use preallocated hash map\n");
11642                        return -EINVAL;
11643                }
11644                if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
11645                        verbose(env, "trace type programs can only use preallocated hash map\n");
11646                        return -EINVAL;
11647                }
11648                WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
11649                verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
11650        }
11651
11652        if (map_value_has_spin_lock(map)) {
11653                if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
11654                        verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
11655                        return -EINVAL;
11656                }
11657
11658                if (is_tracing_prog_type(prog_type)) {
11659                        verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
11660                        return -EINVAL;
11661                }
11662
11663                if (prog->aux->sleepable) {
11664                        verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
11665                        return -EINVAL;
11666                }
11667        }
11668
11669        if (map_value_has_timer(map)) {
11670                if (is_tracing_prog_type(prog_type)) {
11671                        verbose(env, "tracing progs cannot use bpf_timer yet\n");
11672                        return -EINVAL;
11673                }
11674        }
11675
11676        if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
11677            !bpf_offload_prog_map_match(prog, map)) {
11678                verbose(env, "offload device mismatch between prog and map\n");
11679                return -EINVAL;
11680        }
11681
11682        if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
11683                verbose(env, "bpf_struct_ops map cannot be used in prog\n");
11684                return -EINVAL;
11685        }
11686
11687        if (prog->aux->sleepable)
11688                switch (map->map_type) {
11689                case BPF_MAP_TYPE_HASH:
11690                case BPF_MAP_TYPE_LRU_HASH:
11691                case BPF_MAP_TYPE_ARRAY:
11692                case BPF_MAP_TYPE_PERCPU_HASH:
11693                case BPF_MAP_TYPE_PERCPU_ARRAY:
11694                case BPF_MAP_TYPE_LRU_PERCPU_HASH:
11695                case BPF_MAP_TYPE_ARRAY_OF_MAPS:
11696                case BPF_MAP_TYPE_HASH_OF_MAPS:
11697                        if (!is_preallocated_map(map)) {
11698                                verbose(env,
11699                                        "Sleepable programs can only use preallocated maps\n");
11700                                return -EINVAL;
11701                        }
11702                        break;
11703                case BPF_MAP_TYPE_RINGBUF:
11704                        break;
11705                default:
11706                        verbose(env,
11707                                "Sleepable programs can only use array, hash, and ringbuf maps\n");
11708                        return -EINVAL;
11709                }
11710
11711        return 0;
11712}
11713
11714static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
11715{
11716        return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
11717                map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
11718}
11719
11720/* find and rewrite pseudo imm in ld_imm64 instructions:
11721 *
11722 * 1. if it accesses map FD, replace it with actual map pointer.
11723 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
11724 *
11725 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
11726 */
11727static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
11728{
11729        struct bpf_insn *insn = env->prog->insnsi;
11730        int insn_cnt = env->prog->len;
11731        int i, j, err;
11732
11733        err = bpf_prog_calc_tag(env->prog);
11734        if (err)
11735                return err;
11736
11737        for (i = 0; i < insn_cnt; i++, insn++) {
11738                if (BPF_CLASS(insn->code) == BPF_LDX &&
11739                    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
11740                        verbose(env, "BPF_LDX uses reserved fields\n");
11741                        return -EINVAL;
11742                }
11743
11744                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
11745                        struct bpf_insn_aux_data *aux;
11746                        struct bpf_map *map;
11747                        struct fd f;
11748                        u64 addr;
11749                        u32 fd;
11750
11751                        if (i == insn_cnt - 1 || insn[1].code != 0 ||
11752                            insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
11753                            insn[1].off != 0) {
11754                                verbose(env, "invalid bpf_ld_imm64 insn\n");
11755                                return -EINVAL;
11756                        }
11757
11758                        if (insn[0].src_reg == 0)
11759                                /* valid generic load 64-bit imm */
11760                                goto next_insn;
11761
11762                        if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
11763                                aux = &env->insn_aux_data[i];
11764                                err = check_pseudo_btf_id(env, insn, aux);
11765                                if (err)
11766                                        return err;
11767                                goto next_insn;
11768                        }
11769
11770                        if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
11771                                aux = &env->insn_aux_data[i];
11772                                aux->ptr_type = PTR_TO_FUNC;
11773                                goto next_insn;
11774                        }
11775
11776                        /* In final convert_pseudo_ld_imm64() step, this is
11777                         * converted into regular 64-bit imm load insn.
11778                         */
11779                        switch (insn[0].src_reg) {
11780                        case BPF_PSEUDO_MAP_VALUE:
11781                        case BPF_PSEUDO_MAP_IDX_VALUE:
11782                                break;
11783                        case BPF_PSEUDO_MAP_FD:
11784                        case BPF_PSEUDO_MAP_IDX:
11785                                if (insn[1].imm == 0)
11786                                        break;
11787                                fallthrough;
11788                        default:
11789                                verbose(env, "unrecognized bpf_ld_imm64 insn\n");
11790                                return -EINVAL;
11791                        }
11792
11793                        switch (insn[0].src_reg) {
11794                        case BPF_PSEUDO_MAP_IDX_VALUE:
11795                        case BPF_PSEUDO_MAP_IDX:
11796                                if (bpfptr_is_null(env->fd_array)) {
11797                                        verbose(env, "fd_idx without fd_array is invalid\n");
11798                                        return -EPROTO;
11799                                }
11800                                if (copy_from_bpfptr_offset(&fd, env->fd_array,
11801                                                            insn[0].imm * sizeof(fd),
11802                                                            sizeof(fd)))
11803                                        return -EFAULT;
11804                                break;
11805                        default:
11806                                fd = insn[0].imm;
11807                                break;
11808                        }
11809
11810                        f = fdget(fd);
11811                        map = __bpf_map_get(f);
11812                        if (IS_ERR(map)) {
11813                                verbose(env, "fd %d is not pointing to valid bpf_map\n",
11814                                        insn[0].imm);
11815                                return PTR_ERR(map);
11816                        }
11817
11818                        err = check_map_prog_compatibility(env, map, env->prog);
11819                        if (err) {
11820                                fdput(f);
11821                                return err;
11822                        }
11823
11824                        aux = &env->insn_aux_data[i];
11825                        if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
11826                            insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
11827                                addr = (unsigned long)map;
11828                        } else {
11829                                u32 off = insn[1].imm;
11830
11831                                if (off >= BPF_MAX_VAR_OFF) {
11832                                        verbose(env, "direct value offset of %u is not allowed\n", off);
11833                                        fdput(f);
11834                                        return -EINVAL;
11835                                }
11836
11837                                if (!map->ops->map_direct_value_addr) {
11838                                        verbose(env, "no direct value access support for this map type\n");
11839                                        fdput(f);
11840                                        return -EINVAL;
11841                                }
11842
11843                                err = map->ops->map_direct_value_addr(map, &addr, off);
11844                                if (err) {
11845                                        verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
11846                                                map->value_size, off);
11847                                        fdput(f);
11848                                        return err;
11849                                }
11850
11851                                aux->map_off = off;
11852                                addr += off;
11853                        }
11854
11855                        insn[0].imm = (u32)addr;
11856                        insn[1].imm = addr >> 32;
11857
11858                        /* check whether we recorded this map already */
11859                        for (j = 0; j < env->used_map_cnt; j++) {
11860                                if (env->used_maps[j] == map) {
11861                                        aux->map_index = j;
11862                                        fdput(f);
11863                                        goto next_insn;
11864                                }
11865                        }
11866
11867                        if (env->used_map_cnt >= MAX_USED_MAPS) {
11868                                fdput(f);
11869                                return -E2BIG;
11870                        }
11871
11872                        /* hold the map. If the program is rejected by verifier,
11873                         * the map will be released by release_maps() or it
11874                         * will be used by the valid program until it's unloaded
11875                         * and all maps are released in free_used_maps()
11876                         */
11877                        bpf_map_inc(map);
11878
11879                        aux->map_index = env->used_map_cnt;
11880                        env->used_maps[env->used_map_cnt++] = map;
11881
11882                        if (bpf_map_is_cgroup_storage(map) &&
11883                            bpf_cgroup_storage_assign(env->prog->aux, map)) {
11884                                verbose(env, "only one cgroup storage of each type is allowed\n");
11885                                fdput(f);
11886                                return -EBUSY;
11887                        }
11888
11889                        fdput(f);
11890next_insn:
11891                        insn++;
11892                        i++;
11893                        continue;
11894                }
11895
11896                /* Basic sanity check before we invest more work here. */
11897                if (!bpf_opcode_in_insntable(insn->code)) {
11898                        verbose(env, "unknown opcode %02x\n", insn->code);
11899                        return -EINVAL;
11900                }
11901        }
11902
11903        /* now all pseudo BPF_LD_IMM64 instructions load valid
11904         * 'struct bpf_map *' into a register instead of user map_fd.
11905         * These pointers will be used later by verifier to validate map access.
11906         */
11907        return 0;
11908}
11909
11910/* drop refcnt of maps used by the rejected program */
11911static void release_maps(struct bpf_verifier_env *env)
11912{
11913        __bpf_free_used_maps(env->prog->aux, env->used_maps,
11914                             env->used_map_cnt);
11915}
11916
11917/* drop refcnt of maps used by the rejected program */
11918static void release_btfs(struct bpf_verifier_env *env)
11919{
11920        __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
11921                             env->used_btf_cnt);
11922}
11923
11924/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
11925static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
11926{
11927        struct bpf_insn *insn = env->prog->insnsi;
11928        int insn_cnt = env->prog->len;
11929        int i;
11930
11931        for (i = 0; i < insn_cnt; i++, insn++) {
11932                if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
11933                        continue;
11934                if (insn->src_reg == BPF_PSEUDO_FUNC)
11935                        continue;
11936                insn->src_reg = 0;
11937        }
11938}
11939
11940/* single env->prog->insni[off] instruction was replaced with the range
11941 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
11942 * [0, off) and [off, end) to new locations, so the patched range stays zero
11943 */
11944static void adjust_insn_aux_data(struct bpf_verifier_env *env,
11945                                 struct bpf_insn_aux_data *new_data,
11946                                 struct bpf_prog *new_prog, u32 off, u32 cnt)
11947{
11948        struct bpf_insn_aux_data *old_data = env->insn_aux_data;
11949        struct bpf_insn *insn = new_prog->insnsi;
11950        u32 old_seen = old_data[off].seen;
11951        u32 prog_len;
11952        int i;
11953
11954        /* aux info at OFF always needs adjustment, no matter fast path
11955         * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
11956         * original insn at old prog.
11957         */
11958        old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
11959
11960        if (cnt == 1)
11961                return;
11962        prog_len = new_prog->len;
11963
11964        memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
11965        memcpy(new_data + off + cnt - 1, old_data + off,
11966               sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
11967        for (i = off; i < off + cnt - 1; i++) {
11968                /* Expand insni[off]'s seen count to the patched range. */
11969                new_data[i].seen = old_seen;
11970                new_data[i].zext_dst = insn_has_def32(env, insn + i);
11971        }
11972        env->insn_aux_data = new_data;
11973        vfree(old_data);
11974}
11975
11976static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
11977{
11978        int i;
11979
11980        if (len == 1)
11981                return;
11982        /* NOTE: fake 'exit' subprog should be updated as well. */
11983        for (i = 0; i <= env->subprog_cnt; i++) {
11984                if (env->subprog_info[i].start <= off)
11985                        continue;
11986                env->subprog_info[i].start += len - 1;
11987        }
11988}
11989
11990static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
11991{
11992        struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
11993        int i, sz = prog->aux->size_poke_tab;
11994        struct bpf_jit_poke_descriptor *desc;
11995
11996        for (i = 0; i < sz; i++) {
11997                desc = &tab[i];
11998                if (desc->insn_idx <= off)
11999                        continue;
12000                desc->insn_idx += len - 1;
12001        }
12002}
12003
12004static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
12005                                            const struct bpf_insn *patch, u32 len)
12006{
12007        struct bpf_prog *new_prog;
12008        struct bpf_insn_aux_data *new_data = NULL;
12009
12010        if (len > 1) {
12011                new_data = vzalloc(array_size(env->prog->len + len - 1,
12012                                              sizeof(struct bpf_insn_aux_data)));
12013                if (!new_data)
12014                        return NULL;
12015        }
12016
12017        new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
12018        if (IS_ERR(new_prog)) {
12019                if (PTR_ERR(new_prog) == -ERANGE)
12020                        verbose(env,
12021                                "insn %d cannot be patched due to 16-bit range\n",
12022                                env->insn_aux_data[off].orig_idx);
12023                vfree(new_data);
12024                return NULL;
12025        }
12026        adjust_insn_aux_data(env, new_data, new_prog, off, len);
12027        adjust_subprog_starts(env, off, len);
12028        adjust_poke_descs(new_prog, off, len);
12029        return new_prog;
12030}
12031
12032static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
12033                                              u32 off, u32 cnt)
12034{
12035        int i, j;
12036
12037        /* find first prog starting at or after off (first to remove) */
12038        for (i = 0; i < env->subprog_cnt; i++)
12039                if (env->subprog_info[i].start >= off)
12040                        break;
12041        /* find first prog starting at or after off + cnt (first to stay) */
12042        for (j = i; j < env->subprog_cnt; j++)
12043                if (env->subprog_info[j].start >= off + cnt)
12044                        break;
12045        /* if j doesn't start exactly at off + cnt, we are just removing
12046         * the front of previous prog
12047         */
12048        if (env->subprog_info[j].start != off + cnt)
12049                j--;
12050
12051        if (j > i) {
12052                struct bpf_prog_aux *aux = env->prog->aux;
12053                int move;
12054
12055                /* move fake 'exit' subprog as well */
12056                move = env->subprog_cnt + 1 - j;
12057
12058                memmove(env->subprog_info + i,
12059                        env->subprog_info + j,
12060                        sizeof(*env->subprog_info) * move);
12061                env->subprog_cnt -= j - i;
12062
12063                /* remove func_info */
12064                if (aux->func_info) {
12065                        move = aux->func_info_cnt - j;
12066
12067                        memmove(aux->func_info + i,
12068                                aux->func_info + j,
12069                                sizeof(*aux->func_info) * move);
12070                        aux->func_info_cnt -= j - i;
12071                        /* func_info->insn_off is set after all code rewrites,
12072                         * in adjust_btf_func() - no need to adjust
12073                         */
12074                }
12075        } else {
12076                /* convert i from "first prog to remove" to "first to adjust" */
12077                if (env->subprog_info[i].start == off)
12078                        i++;
12079        }
12080
12081        /* update fake 'exit' subprog as well */
12082        for (; i <= env->subprog_cnt; i++)
12083                env->subprog_info[i].start -= cnt;
12084
12085        return 0;
12086}
12087
12088static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12089                                      u32 cnt)
12090{
12091        struct bpf_prog *prog = env->prog;
12092        u32 i, l_off, l_cnt, nr_linfo;
12093        struct bpf_line_info *linfo;
12094
12095        nr_linfo = prog->aux->nr_linfo;
12096        if (!nr_linfo)
12097                return 0;
12098
12099        linfo = prog->aux->linfo;
12100
12101        /* find first line info to remove, count lines to be removed */
12102        for (i = 0; i < nr_linfo; i++)
12103                if (linfo[i].insn_off >= off)
12104                        break;
12105
12106        l_off = i;
12107        l_cnt = 0;
12108        for (; i < nr_linfo; i++)
12109                if (linfo[i].insn_off < off + cnt)
12110                        l_cnt++;
12111                else
12112                        break;
12113
12114        /* First live insn doesn't match first live linfo, it needs to "inherit"
12115         * last removed linfo.  prog is already modified, so prog->len == off
12116         * means no live instructions after (tail of the program was removed).
12117         */
12118        if (prog->len != off && l_cnt &&
12119            (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12120                l_cnt--;
12121                linfo[--i].insn_off = off + cnt;
12122        }
12123
12124        /* remove the line info which refer to the removed instructions */
12125        if (l_cnt) {
12126                memmove(linfo + l_off, linfo + i,
12127                        sizeof(*linfo) * (nr_linfo - i));
12128
12129                prog->aux->nr_linfo -= l_cnt;
12130                nr_linfo = prog->aux->nr_linfo;
12131        }
12132
12133        /* pull all linfo[i].insn_off >= off + cnt in by cnt */
12134        for (i = l_off; i < nr_linfo; i++)
12135                linfo[i].insn_off -= cnt;
12136
12137        /* fix up all subprogs (incl. 'exit') which start >= off */
12138        for (i = 0; i <= env->subprog_cnt; i++)
12139                if (env->subprog_info[i].linfo_idx > l_off) {
12140                        /* program may have started in the removed region but
12141                         * may not be fully removed
12142                         */
12143                        if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12144                                env->subprog_info[i].linfo_idx -= l_cnt;
12145                        else
12146                                env->subprog_info[i].linfo_idx = l_off;
12147                }
12148
12149        return 0;
12150}
12151
12152static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12153{
12154        struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12155        unsigned int orig_prog_len = env->prog->len;
12156        int err;
12157
12158        if (bpf_prog_is_dev_bound(env->prog->aux))
12159                bpf_prog_offload_remove_insns(env, off, cnt);
12160
12161        err = bpf_remove_insns(env->prog, off, cnt);
12162        if (err)
12163                return err;
12164
12165        err = adjust_subprog_starts_after_remove(env, off, cnt);
12166        if (err)
12167                return err;
12168
12169        err = bpf_adj_linfo_after_remove(env, off, cnt);
12170        if (err)
12171                return err;
12172
12173        memmove(aux_data + off, aux_data + off + cnt,
12174                sizeof(*aux_data) * (orig_prog_len - off - cnt));
12175
12176        return 0;
12177}
12178
12179/* The verifier does more data flow analysis than llvm and will not
12180 * explore branches that are dead at run time. Malicious programs can
12181 * have dead code too. Therefore replace all dead at-run-time code
12182 * with 'ja -1'.
12183 *
12184 * Just nops are not optimal, e.g. if they would sit at the end of the
12185 * program and through another bug we would manage to jump there, then
12186 * we'd execute beyond program memory otherwise. Returning exception
12187 * code also wouldn't work since we can have subprogs where the dead
12188 * code could be located.
12189 */
12190static void sanitize_dead_code(struct bpf_verifier_env *env)
12191{
12192        struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12193        struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
12194        struct bpf_insn *insn = env->prog->insnsi;
12195        const int insn_cnt = env->prog->len;
12196        int i;
12197
12198        for (i = 0; i < insn_cnt; i++) {
12199                if (aux_data[i].seen)
12200                        continue;
12201                memcpy(insn + i, &trap, sizeof(trap));
12202                aux_data[i].zext_dst = false;
12203        }
12204}
12205
12206static bool insn_is_cond_jump(u8 code)
12207{
12208        u8 op;
12209
12210        if (BPF_CLASS(code) == BPF_JMP32)
12211                return true;
12212
12213        if (BPF_CLASS(code) != BPF_JMP)
12214                return false;
12215
12216        op = BPF_OP(code);
12217        return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12218}
12219
12220static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12221{
12222        struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12223        struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12224        struct bpf_insn *insn = env->prog->insnsi;
12225        const int insn_cnt = env->prog->len;
12226        int i;
12227
12228        for (i = 0; i < insn_cnt; i++, insn++) {
12229                if (!insn_is_cond_jump(insn->code))
12230                        continue;
12231
12232                if (!aux_data[i + 1].seen)
12233                        ja.off = insn->off;
12234                else if (!aux_data[i + 1 + insn->off].seen)
12235                        ja.off = 0;
12236                else
12237                        continue;
12238
12239                if (bpf_prog_is_dev_bound(env->prog->aux))
12240                        bpf_prog_offload_replace_insn(env, i, &ja);
12241
12242                memcpy(insn, &ja, sizeof(ja));
12243        }
12244}
12245
12246static int opt_remove_dead_code(struct bpf_verifier_env *env)
12247{
12248        struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12249        int insn_cnt = env->prog->len;
12250        int i, err;
12251
12252        for (i = 0; i < insn_cnt; i++) {
12253                int j;
12254
12255                j = 0;
12256                while (i + j < insn_cnt && !aux_data[i + j].seen)
12257                        j++;
12258                if (!j)
12259                        continue;
12260
12261                err = verifier_remove_insns(env, i, j);
12262                if (err)
12263                        return err;
12264                insn_cnt = env->prog->len;
12265        }
12266
12267        return 0;
12268}
12269
12270static int opt_remove_nops(struct bpf_verifier_env *env)
12271{
12272        const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12273        struct bpf_insn *insn = env->prog->insnsi;
12274        int insn_cnt = env->prog->len;
12275        int i, err;
12276
12277        for (i = 0; i < insn_cnt; i++) {
12278                if (memcmp(&insn[i], &ja, sizeof(ja)))
12279                        continue;
12280
12281                err = verifier_remove_insns(env, i, 1);
12282                if (err)
12283                        return err;
12284                insn_cnt--;
12285                i--;
12286        }
12287
12288        return 0;
12289}
12290
12291static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12292                                         const union bpf_attr *attr)
12293{
12294        struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
12295        struct bpf_insn_aux_data *aux = env->insn_aux_data;
12296        int i, patch_len, delta = 0, len = env->prog->len;
12297        struct bpf_insn *insns = env->prog->insnsi;
12298        struct bpf_prog *new_prog;
12299        bool rnd_hi32;
12300
12301        rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
12302        zext_patch[1] = BPF_ZEXT_REG(0);
12303        rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12304        rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12305        rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
12306        for (i = 0; i < len; i++) {
12307                int adj_idx = i + delta;
12308                struct bpf_insn insn;
12309                int load_reg;
12310
12311                insn = insns[adj_idx];
12312                load_reg = insn_def_regno(&insn);
12313                if (!aux[adj_idx].zext_dst) {
12314                        u8 code, class;
12315                        u32 imm_rnd;
12316
12317                        if (!rnd_hi32)
12318                                continue;
12319
12320                        code = insn.code;
12321                        class = BPF_CLASS(code);
12322                        if (load_reg == -1)
12323                                continue;
12324
12325                        /* NOTE: arg "reg" (the fourth one) is only used for
12326                         *       BPF_STX + SRC_OP, so it is safe to pass NULL
12327                         *       here.
12328                         */
12329                        if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
12330                                if (class == BPF_LD &&
12331                                    BPF_MODE(code) == BPF_IMM)
12332                                        i++;
12333                                continue;
12334                        }
12335
12336                        /* ctx load could be transformed into wider load. */
12337                        if (class == BPF_LDX &&
12338                            aux[adj_idx].ptr_type == PTR_TO_CTX)
12339                                continue;
12340
12341                        imm_rnd = get_random_int();
12342                        rnd_hi32_patch[0] = insn;
12343                        rnd_hi32_patch[1].imm = imm_rnd;
12344                        rnd_hi32_patch[3].dst_reg = load_reg;
12345                        patch = rnd_hi32_patch;
12346                        patch_len = 4;
12347                        goto apply_patch_buffer;
12348                }
12349
12350                /* Add in an zero-extend instruction if a) the JIT has requested
12351                 * it or b) it's a CMPXCHG.
12352                 *
12353                 * The latter is because: BPF_CMPXCHG always loads a value into
12354                 * R0, therefore always zero-extends. However some archs'
12355                 * equivalent instruction only does this load when the
12356                 * comparison is successful. This detail of CMPXCHG is
12357                 * orthogonal to the general zero-extension behaviour of the
12358                 * CPU, so it's treated independently of bpf_jit_needs_zext.
12359                 */
12360                if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
12361                        continue;
12362
12363                if (WARN_ON(load_reg == -1)) {
12364                        verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
12365                        return -EFAULT;
12366                }
12367
12368                zext_patch[0] = insn;
12369                zext_patch[1].dst_reg = load_reg;
12370                zext_patch[1].src_reg = load_reg;
12371                patch = zext_patch;
12372                patch_len = 2;
12373apply_patch_buffer:
12374                new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
12375                if (!new_prog)
12376                        return -ENOMEM;
12377                env->prog = new_prog;
12378                insns = new_prog->insnsi;
12379                aux = env->insn_aux_data;
12380                delta += patch_len - 1;
12381        }
12382
12383        return 0;
12384}
12385
12386/* convert load instructions that access fields of a context type into a
12387 * sequence of instructions that access fields of the underlying structure:
12388 *     struct __sk_buff    -> struct sk_buff
12389 *     struct bpf_sock_ops -> struct sock
12390 */
12391static int convert_ctx_accesses(struct bpf_verifier_env *env)
12392{
12393        const struct bpf_verifier_ops *ops = env->ops;
12394        int i, cnt, size, ctx_field_size, delta = 0;
12395        const int insn_cnt = env->prog->len;
12396        struct bpf_insn insn_buf[16], *insn;
12397        u32 target_size, size_default, off;
12398        struct bpf_prog *new_prog;
12399        enum bpf_access_type type;
12400        bool is_narrower_load;
12401
12402        if (ops->gen_prologue || env->seen_direct_write) {
12403                if (!ops->gen_prologue) {
12404                        verbose(env, "bpf verifier is misconfigured\n");
12405                        return -EINVAL;
12406                }
12407                cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
12408                                        env->prog);
12409                if (cnt >= ARRAY_SIZE(insn_buf)) {
12410                        verbose(env, "bpf verifier is misconfigured\n");
12411                        return -EINVAL;
12412                } else if (cnt) {
12413                        new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
12414                        if (!new_prog)
12415                                return -ENOMEM;
12416
12417                        env->prog = new_prog;
12418                        delta += cnt - 1;
12419                }
12420        }
12421
12422        if (bpf_prog_is_dev_bound(env->prog->aux))
12423                return 0;
12424
12425        insn = env->prog->insnsi + delta;
12426
12427        for (i = 0; i < insn_cnt; i++, insn++) {
12428                bpf_convert_ctx_access_t convert_ctx_access;
12429                bool ctx_access;
12430
12431                if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
12432                    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
12433                    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
12434                    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
12435                        type = BPF_READ;
12436                        ctx_access = true;
12437                } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
12438                           insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
12439                           insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
12440                           insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
12441                           insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
12442                           insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
12443                           insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
12444                           insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
12445                        type = BPF_WRITE;
12446                        ctx_access = BPF_CLASS(insn->code) == BPF_STX;
12447                } else {
12448                        continue;
12449                }
12450
12451                if (type == BPF_WRITE &&
12452                    env->insn_aux_data[i + delta].sanitize_stack_spill) {
12453                        struct bpf_insn patch[] = {
12454                                *insn,
12455                                BPF_ST_NOSPEC(),
12456                        };
12457
12458                        cnt = ARRAY_SIZE(patch);
12459                        new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
12460                        if (!new_prog)
12461                                return -ENOMEM;
12462
12463                        delta    += cnt - 1;
12464                        env->prog = new_prog;
12465                        insn      = new_prog->insnsi + i + delta;
12466                        continue;
12467                }
12468
12469                if (!ctx_access)
12470                        continue;
12471
12472                switch (env->insn_aux_data[i + delta].ptr_type) {
12473                case PTR_TO_CTX:
12474                        if (!ops->convert_ctx_access)
12475                                continue;
12476                        convert_ctx_access = ops->convert_ctx_access;
12477                        break;
12478                case PTR_TO_SOCKET:
12479                case PTR_TO_SOCK_COMMON:
12480                        convert_ctx_access = bpf_sock_convert_ctx_access;
12481                        break;
12482                case PTR_TO_TCP_SOCK:
12483                        convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
12484                        break;
12485                case PTR_TO_XDP_SOCK:
12486                        convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
12487                        break;
12488                case PTR_TO_BTF_ID:
12489                        if (type == BPF_READ) {
12490                                insn->code = BPF_LDX | BPF_PROBE_MEM |
12491                                        BPF_SIZE((insn)->code);
12492                                env->prog->aux->num_exentries++;
12493                        } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
12494                                verbose(env, "Writes through BTF pointers are not allowed\n");
12495                                return -EINVAL;
12496                        }
12497                        continue;
12498                default:
12499                        continue;
12500                }
12501
12502                ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
12503                size = BPF_LDST_BYTES(insn);
12504
12505                /* If the read access is a narrower load of the field,
12506                 * convert to a 4/8-byte load, to minimum program type specific
12507                 * convert_ctx_access changes. If conversion is successful,
12508                 * we will apply proper mask to the result.
12509                 */
12510                is_narrower_load = size < ctx_field_size;
12511                size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
12512                off = insn->off;
12513                if (is_narrower_load) {
12514                        u8 size_code;
12515
12516                        if (type == BPF_WRITE) {
12517                                verbose(env, "bpf verifier narrow ctx access misconfigured\n");
12518                                return -EINVAL;
12519                        }
12520
12521                        size_code = BPF_H;
12522                        if (ctx_field_size == 4)
12523                                size_code = BPF_W;
12524                        else if (ctx_field_size == 8)
12525                                size_code = BPF_DW;
12526
12527                        insn->off = off & ~(size_default - 1);
12528                        insn->code = BPF_LDX | BPF_MEM | size_code;
12529                }
12530
12531                target_size = 0;
12532                cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
12533                                         &target_size);
12534                if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
12535                    (ctx_field_size && !target_size)) {
12536                        verbose(env, "bpf verifier is misconfigured\n");
12537                        return -EINVAL;
12538                }
12539
12540                if (is_narrower_load && size < target_size) {
12541                        u8 shift = bpf_ctx_narrow_access_offset(
12542                                off, size, size_default) * 8;
12543                        if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
12544                                verbose(env, "bpf verifier narrow ctx load misconfigured\n");
12545                                return -EINVAL;
12546                        }
12547                        if (ctx_field_size <= 4) {
12548                                if (shift)
12549                                        insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
12550                                                                        insn->dst_reg,
12551                                                                        shift);
12552                                insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
12553                                                                (1 << size * 8) - 1);
12554                        } else {
12555                                if (shift)
12556                                        insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
12557                                                                        insn->dst_reg,
12558                                                                        shift);
12559                                insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
12560                                                                (1ULL << size * 8) - 1);
12561                        }
12562                }
12563
12564                new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12565                if (!new_prog)
12566                        return -ENOMEM;
12567
12568                delta += cnt - 1;
12569
12570                /* keep walking new program and skip insns we just inserted */
12571                env->prog = new_prog;
12572                insn      = new_prog->insnsi + i + delta;
12573        }
12574
12575        return 0;
12576}
12577
12578static int jit_subprogs(struct bpf_verifier_env *env)
12579{
12580        struct bpf_prog *prog = env->prog, **func, *tmp;
12581        int i, j, subprog_start, subprog_end = 0, len, subprog;
12582        struct bpf_map *map_ptr;
12583        struct bpf_insn *insn;
12584        void *old_bpf_func;
12585        int err, num_exentries;
12586
12587        if (env->subprog_cnt <= 1)
12588                return 0;
12589
12590        for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12591                if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
12592                        continue;
12593
12594                /* Upon error here we cannot fall back to interpreter but
12595                 * need a hard reject of the program. Thus -EFAULT is
12596                 * propagated in any case.
12597                 */
12598                subprog = find_subprog(env, i + insn->imm + 1);
12599                if (subprog < 0) {
12600                        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
12601                                  i + insn->imm + 1);
12602                        return -EFAULT;
12603                }
12604                /* temporarily remember subprog id inside insn instead of
12605                 * aux_data, since next loop will split up all insns into funcs
12606                 */
12607                insn->off = subprog;
12608                /* remember original imm in case JIT fails and fallback
12609                 * to interpreter will be needed
12610                 */
12611                env->insn_aux_data[i].call_imm = insn->imm;
12612                /* point imm to __bpf_call_base+1 from JITs point of view */
12613                insn->imm = 1;
12614                if (bpf_pseudo_func(insn))
12615                        /* jit (e.g. x86_64) may emit fewer instructions
12616                         * if it learns a u32 imm is the same as a u64 imm.
12617                         * Force a non zero here.
12618                         */
12619                        insn[1].imm = 1;
12620        }
12621
12622        err = bpf_prog_alloc_jited_linfo(prog);
12623        if (err)
12624                goto out_undo_insn;
12625
12626        err = -ENOMEM;
12627        func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
12628        if (!func)
12629                goto out_undo_insn;
12630
12631        for (i = 0; i < env->subprog_cnt; i++) {
12632                subprog_start = subprog_end;
12633                subprog_end = env->subprog_info[i + 1].start;
12634
12635                len = subprog_end - subprog_start;
12636                /* bpf_prog_run() doesn't call subprogs directly,
12637                 * hence main prog stats include the runtime of subprogs.
12638                 * subprogs don't have IDs and not reachable via prog_get_next_id
12639                 * func[i]->stats will never be accessed and stays NULL
12640                 */
12641                func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
12642                if (!func[i])
12643                        goto out_free;
12644                memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
12645                       len * sizeof(struct bpf_insn));
12646                func[i]->type = prog->type;
12647                func[i]->len = len;
12648                if (bpf_prog_calc_tag(func[i]))
12649                        goto out_free;
12650                func[i]->is_func = 1;
12651                func[i]->aux->func_idx = i;
12652                /* Below members will be freed only at prog->aux */
12653                func[i]->aux->btf = prog->aux->btf;
12654                func[i]->aux->func_info = prog->aux->func_info;
12655                func[i]->aux->poke_tab = prog->aux->poke_tab;
12656                func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
12657
12658                for (j = 0; j < prog->aux->size_poke_tab; j++) {
12659                        struct bpf_jit_poke_descriptor *poke;
12660
12661                        poke = &prog->aux->poke_tab[j];
12662                        if (poke->insn_idx < subprog_end &&
12663                            poke->insn_idx >= subprog_start)
12664                                poke->aux = func[i]->aux;
12665                }
12666
12667                /* Use bpf_prog_F_tag to indicate functions in stack traces.
12668                 * Long term would need debug info to populate names
12669                 */
12670                func[i]->aux->name[0] = 'F';
12671                func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
12672                func[i]->jit_requested = 1;
12673                func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
12674                func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
12675                func[i]->aux->linfo = prog->aux->linfo;
12676                func[i]->aux->nr_linfo = prog->aux->nr_linfo;
12677                func[i]->aux->jited_linfo = prog->aux->jited_linfo;
12678                func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
12679                num_exentries = 0;
12680                insn = func[i]->insnsi;
12681                for (j = 0; j < func[i]->len; j++, insn++) {
12682                        if (BPF_CLASS(insn->code) == BPF_LDX &&
12683                            BPF_MODE(insn->code) == BPF_PROBE_MEM)
12684                                num_exentries++;
12685                }
12686                func[i]->aux->num_exentries = num_exentries;
12687                func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
12688                func[i] = bpf_int_jit_compile(func[i]);
12689                if (!func[i]->jited) {
12690                        err = -ENOTSUPP;
12691                        goto out_free;
12692                }
12693                cond_resched();
12694        }
12695
12696        /* at this point all bpf functions were successfully JITed
12697         * now populate all bpf_calls with correct addresses and
12698         * run last pass of JIT
12699         */
12700        for (i = 0; i < env->subprog_cnt; i++) {
12701                insn = func[i]->insnsi;
12702                for (j = 0; j < func[i]->len; j++, insn++) {
12703                        if (bpf_pseudo_func(insn)) {
12704                                subprog = insn->off;
12705                                insn[0].imm = (u32)(long)func[subprog]->bpf_func;
12706                                insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
12707                                continue;
12708                        }
12709                        if (!bpf_pseudo_call(insn))
12710                                continue;
12711                        subprog = insn->off;
12712                        insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
12713                }
12714
12715                /* we use the aux data to keep a list of the start addresses
12716                 * of the JITed images for each function in the program
12717                 *
12718                 * for some architectures, such as powerpc64, the imm field
12719                 * might not be large enough to hold the offset of the start
12720                 * address of the callee's JITed image from __bpf_call_base
12721                 *
12722                 * in such cases, we can lookup the start address of a callee
12723                 * by using its subprog id, available from the off field of
12724                 * the call instruction, as an index for this list
12725                 */
12726                func[i]->aux->func = func;
12727                func[i]->aux->func_cnt = env->subprog_cnt;
12728        }
12729        for (i = 0; i < env->subprog_cnt; i++) {
12730                old_bpf_func = func[i]->bpf_func;
12731                tmp = bpf_int_jit_compile(func[i]);
12732                if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
12733                        verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
12734                        err = -ENOTSUPP;
12735                        goto out_free;
12736                }
12737                cond_resched();
12738        }
12739
12740        /* finally lock prog and jit images for all functions and
12741         * populate kallsysm
12742         */
12743        for (i = 0; i < env->subprog_cnt; i++) {
12744                bpf_prog_lock_ro(func[i]);
12745                bpf_prog_kallsyms_add(func[i]);
12746        }
12747
12748        /* Last step: make now unused interpreter insns from main
12749         * prog consistent for later dump requests, so they can
12750         * later look the same as if they were interpreted only.
12751         */
12752        for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12753                if (bpf_pseudo_func(insn)) {
12754                        insn[0].imm = env->insn_aux_data[i].call_imm;
12755                        insn[1].imm = insn->off;
12756                        insn->off = 0;
12757                        continue;
12758                }
12759                if (!bpf_pseudo_call(insn))
12760                        continue;
12761                insn->off = env->insn_aux_data[i].call_imm;
12762                subprog = find_subprog(env, i + insn->off + 1);
12763                insn->imm = subprog;
12764        }
12765
12766        prog->jited = 1;
12767        prog->bpf_func = func[0]->bpf_func;
12768        prog->aux->func = func;
12769        prog->aux->func_cnt = env->subprog_cnt;
12770        bpf_prog_jit_attempt_done(prog);
12771        return 0;
12772out_free:
12773        /* We failed JIT'ing, so at this point we need to unregister poke
12774         * descriptors from subprogs, so that kernel is not attempting to
12775         * patch it anymore as we're freeing the subprog JIT memory.
12776         */
12777        for (i = 0; i < prog->aux->size_poke_tab; i++) {
12778                map_ptr = prog->aux->poke_tab[i].tail_call.map;
12779                map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
12780        }
12781        /* At this point we're guaranteed that poke descriptors are not
12782         * live anymore. We can just unlink its descriptor table as it's
12783         * released with the main prog.
12784         */
12785        for (i = 0; i < env->subprog_cnt; i++) {
12786                if (!func[i])
12787                        continue;
12788                func[i]->aux->poke_tab = NULL;
12789                bpf_jit_free(func[i]);
12790        }
12791        kfree(func);
12792out_undo_insn:
12793        /* cleanup main prog to be interpreted */
12794        prog->jit_requested = 0;
12795        for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12796                if (!bpf_pseudo_call(insn))
12797                        continue;
12798                insn->off = 0;
12799                insn->imm = env->insn_aux_data[i].call_imm;
12800        }
12801        bpf_prog_jit_attempt_done(prog);
12802        return err;
12803}
12804
12805static int fixup_call_args(struct bpf_verifier_env *env)
12806{
12807#ifndef CONFIG_BPF_JIT_ALWAYS_ON
12808        struct bpf_prog *prog = env->prog;
12809        struct bpf_insn *insn = prog->insnsi;
12810        bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
12811        int i, depth;
12812#endif
12813        int err = 0;
12814
12815        if (env->prog->jit_requested &&
12816            !bpf_prog_is_dev_bound(env->prog->aux)) {
12817                err = jit_subprogs(env);
12818                if (err == 0)
12819                        return 0;
12820                if (err == -EFAULT)
12821                        return err;
12822        }
12823#ifndef CONFIG_BPF_JIT_ALWAYS_ON
12824        if (has_kfunc_call) {
12825                verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
12826                return -EINVAL;
12827        }
12828        if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
12829                /* When JIT fails the progs with bpf2bpf calls and tail_calls
12830                 * have to be rejected, since interpreter doesn't support them yet.
12831                 */
12832                verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
12833                return -EINVAL;
12834        }
12835        for (i = 0; i < prog->len; i++, insn++) {
12836                if (bpf_pseudo_func(insn)) {
12837                        /* When JIT fails the progs with callback calls
12838                         * have to be rejected, since interpreter doesn't support them yet.
12839                         */
12840                        verbose(env, "callbacks are not allowed in non-JITed programs\n");
12841                        return -EINVAL;
12842                }
12843
12844                if (!bpf_pseudo_call(insn))
12845                        continue;
12846                depth = get_callee_stack_depth(env, insn, i);
12847                if (depth < 0)
12848                        return depth;
12849                bpf_patch_call_args(insn, depth);
12850        }
12851        err = 0;
12852#endif
12853        return err;
12854}
12855
12856static int fixup_kfunc_call(struct bpf_verifier_env *env,
12857                            struct bpf_insn *insn)
12858{
12859        const struct bpf_kfunc_desc *desc;
12860
12861        if (!insn->imm) {
12862                verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
12863                return -EINVAL;
12864        }
12865
12866        /* insn->imm has the btf func_id. Replace it with
12867         * an address (relative to __bpf_base_call).
12868         */
12869        desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
12870        if (!desc) {
12871                verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
12872                        insn->imm);
12873                return -EFAULT;
12874        }
12875
12876        insn->imm = desc->imm;
12877
12878        return 0;
12879}
12880
12881/* Do various post-verification rewrites in a single program pass.
12882 * These rewrites simplify JIT and interpreter implementations.
12883 */
12884static int do_misc_fixups(struct bpf_verifier_env *env)
12885{
12886        struct bpf_prog *prog = env->prog;
12887        bool expect_blinding = bpf_jit_blinding_enabled(prog);
12888        enum bpf_prog_type prog_type = resolve_prog_type(prog);
12889        struct bpf_insn *insn = prog->insnsi;
12890        const struct bpf_func_proto *fn;
12891        const int insn_cnt = prog->len;
12892        const struct bpf_map_ops *ops;
12893        struct bpf_insn_aux_data *aux;
12894        struct bpf_insn insn_buf[16];
12895        struct bpf_prog *new_prog;
12896        struct bpf_map *map_ptr;
12897        int i, ret, cnt, delta = 0;
12898
12899        for (i = 0; i < insn_cnt; i++, insn++) {
12900                /* Make divide-by-zero exceptions impossible. */
12901                if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
12902                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
12903                    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
12904                    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
12905                        bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
12906                        bool isdiv = BPF_OP(insn->code) == BPF_DIV;
12907                        struct bpf_insn *patchlet;
12908                        struct bpf_insn chk_and_div[] = {
12909                                /* [R,W]x div 0 -> 0 */
12910                                BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12911                                             BPF_JNE | BPF_K, insn->src_reg,
12912                                             0, 2, 0),
12913                                BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
12914                                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12915                                *insn,
12916                        };
12917                        struct bpf_insn chk_and_mod[] = {
12918                                /* [R,W]x mod 0 -> [R,W]x */
12919                                BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12920                                             BPF_JEQ | BPF_K, insn->src_reg,
12921                                             0, 1 + (is64 ? 0 : 1), 0),
12922                                *insn,
12923                                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12924                                BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
12925                        };
12926
12927                        patchlet = isdiv ? chk_and_div : chk_and_mod;
12928                        cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
12929                                      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
12930
12931                        new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
12932                        if (!new_prog)
12933                                return -ENOMEM;
12934
12935                        delta    += cnt - 1;
12936                        env->prog = prog = new_prog;
12937                        insn      = new_prog->insnsi + i + delta;
12938                        continue;
12939                }
12940
12941                /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
12942                if (BPF_CLASS(insn->code) == BPF_LD &&
12943                    (BPF_MODE(insn->code) == BPF_ABS ||
12944                     BPF_MODE(insn->code) == BPF_IND)) {
12945                        cnt = env->ops->gen_ld_abs(insn, insn_buf);
12946                        if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
12947                                verbose(env, "bpf verifier is misconfigured\n");
12948                                return -EINVAL;
12949                        }
12950
12951                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12952                        if (!new_prog)
12953                                return -ENOMEM;
12954
12955                        delta    += cnt - 1;
12956                        env->prog = prog = new_prog;
12957                        insn      = new_prog->insnsi + i + delta;
12958                        continue;
12959                }
12960
12961                /* Rewrite pointer arithmetic to mitigate speculation attacks. */
12962                if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
12963                    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
12964                        const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
12965                        const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
12966                        struct bpf_insn *patch = &insn_buf[0];
12967                        bool issrc, isneg, isimm;
12968                        u32 off_reg;
12969
12970                        aux = &env->insn_aux_data[i + delta];
12971                        if (!aux->alu_state ||
12972                            aux->alu_state == BPF_ALU_NON_POINTER)
12973                                continue;
12974
12975                        isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
12976                        issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
12977                                BPF_ALU_SANITIZE_SRC;
12978                        isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
12979
12980                        off_reg = issrc ? insn->src_reg : insn->dst_reg;
12981                        if (isimm) {
12982                                *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12983                        } else {
12984                                if (isneg)
12985                                        *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
12986                                *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12987                                *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
12988                                *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
12989                                *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
12990                                *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
12991                                *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
12992                        }
12993                        if (!issrc)
12994                                *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
12995                        insn->src_reg = BPF_REG_AX;
12996                        if (isneg)
12997                                insn->code = insn->code == code_add ?
12998                                             code_sub : code_add;
12999                        *patch++ = *insn;
13000                        if (issrc && isneg && !isimm)
13001                                *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13002                        cnt = patch - insn_buf;
13003
13004                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13005                        if (!new_prog)
13006                                return -ENOMEM;
13007
13008                        delta    += cnt - 1;
13009                        env->prog = prog = new_prog;
13010                        insn      = new_prog->insnsi + i + delta;
13011                        continue;
13012                }
13013
13014                if (insn->code != (BPF_JMP | BPF_CALL))
13015                        continue;
13016                if (insn->src_reg == BPF_PSEUDO_CALL)
13017                        continue;
13018                if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
13019                        ret = fixup_kfunc_call(env, insn);
13020                        if (ret)
13021                                return ret;
13022                        continue;
13023                }
13024
13025                if (insn->imm == BPF_FUNC_get_route_realm)
13026                        prog->dst_needed = 1;
13027                if (insn->imm == BPF_FUNC_get_prandom_u32)
13028                        bpf_user_rnd_init_once();
13029                if (insn->imm == BPF_FUNC_override_return)
13030                        prog->kprobe_override = 1;
13031                if (insn->imm == BPF_FUNC_tail_call) {
13032                        /* If we tail call into other programs, we
13033                         * cannot make any assumptions since they can
13034                         * be replaced dynamically during runtime in
13035                         * the program array.
13036                         */
13037                        prog->cb_access = 1;
13038                        if (!allow_tail_call_in_subprogs(env))
13039                                prog->aux->stack_depth = MAX_BPF_STACK;
13040                        prog->aux->max_pkt_offset = MAX_PACKET_OFF;
13041
13042                        /* mark bpf_tail_call as different opcode to avoid
13043                         * conditional branch in the interpreter for every normal
13044                         * call and to prevent accidental JITing by JIT compiler
13045                         * that doesn't support bpf_tail_call yet
13046                         */
13047                        insn->imm = 0;
13048                        insn->code = BPF_JMP | BPF_TAIL_CALL;
13049
13050                        aux = &env->insn_aux_data[i + delta];
13051                        if (env->bpf_capable && !expect_blinding &&
13052                            prog->jit_requested &&
13053                            !bpf_map_key_poisoned(aux) &&
13054                            !bpf_map_ptr_poisoned(aux) &&
13055                            !bpf_map_ptr_unpriv(aux)) {
13056                                struct bpf_jit_poke_descriptor desc = {
13057                                        .reason = BPF_POKE_REASON_TAIL_CALL,
13058                                        .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13059                                        .tail_call.key = bpf_map_key_immediate(aux),
13060                                        .insn_idx = i + delta,
13061                                };
13062
13063                                ret = bpf_jit_add_poke_descriptor(prog, &desc);
13064                                if (ret < 0) {
13065                                        verbose(env, "adding tail call poke descriptor failed\n");
13066                                        return ret;
13067                                }
13068
13069                                insn->imm = ret + 1;
13070                                continue;
13071                        }
13072
13073                        if (!bpf_map_ptr_unpriv(aux))
13074                                continue;
13075
13076                        /* instead of changing every JIT dealing with tail_call
13077                         * emit two extra insns:
13078                         * if (index >= max_entries) goto out;
13079                         * index &= array->index_mask;
13080                         * to avoid out-of-bounds cpu speculation
13081                         */
13082                        if (bpf_map_ptr_poisoned(aux)) {
13083                                verbose(env, "tail_call abusing map_ptr\n");
13084                                return -EINVAL;
13085                        }
13086
13087                        map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13088                        insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13089                                                  map_ptr->max_entries, 2);
13090                        insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13091                                                    container_of(map_ptr,
13092                                                                 struct bpf_array,
13093                                                                 map)->index_mask);
13094                        insn_buf[2] = *insn;
13095                        cnt = 3;
13096                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13097                        if (!new_prog)
13098                                return -ENOMEM;
13099
13100                        delta    += cnt - 1;
13101                        env->prog = prog = new_prog;
13102                        insn      = new_prog->insnsi + i + delta;
13103                        continue;
13104                }
13105
13106                if (insn->imm == BPF_FUNC_timer_set_callback) {
13107                        /* The verifier will process callback_fn as many times as necessary
13108                         * with different maps and the register states prepared by
13109                         * set_timer_callback_state will be accurate.
13110                         *
13111                         * The following use case is valid:
13112                         *   map1 is shared by prog1, prog2, prog3.
13113                         *   prog1 calls bpf_timer_init for some map1 elements
13114                         *   prog2 calls bpf_timer_set_callback for some map1 elements.
13115                         *     Those that were not bpf_timer_init-ed will return -EINVAL.
13116                         *   prog3 calls bpf_timer_start for some map1 elements.
13117                         *     Those that were not both bpf_timer_init-ed and
13118                         *     bpf_timer_set_callback-ed will return -EINVAL.
13119                         */
13120                        struct bpf_insn ld_addrs[2] = {
13121                                BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13122                        };
13123
13124                        insn_buf[0] = ld_addrs[0];
13125                        insn_buf[1] = ld_addrs[1];
13126                        insn_buf[2] = *insn;
13127                        cnt = 3;
13128
13129                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13130                        if (!new_prog)
13131                                return -ENOMEM;
13132
13133                        delta    += cnt - 1;
13134                        env->prog = prog = new_prog;
13135                        insn      = new_prog->insnsi + i + delta;
13136                        goto patch_call_imm;
13137                }
13138
13139                /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
13140                 * and other inlining handlers are currently limited to 64 bit
13141                 * only.
13142                 */
13143                if (prog->jit_requested && BITS_PER_LONG == 64 &&
13144                    (insn->imm == BPF_FUNC_map_lookup_elem ||
13145                     insn->imm == BPF_FUNC_map_update_elem ||
13146                     insn->imm == BPF_FUNC_map_delete_elem ||
13147                     insn->imm == BPF_FUNC_map_push_elem   ||
13148                     insn->imm == BPF_FUNC_map_pop_elem    ||
13149                     insn->imm == BPF_FUNC_map_peek_elem   ||
13150                     insn->imm == BPF_FUNC_redirect_map    ||
13151                     insn->imm == BPF_FUNC_for_each_map_elem)) {
13152                        aux = &env->insn_aux_data[i + delta];
13153                        if (bpf_map_ptr_poisoned(aux))
13154                                goto patch_call_imm;
13155
13156                        map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13157                        ops = map_ptr->ops;
13158                        if (insn->imm == BPF_FUNC_map_lookup_elem &&
13159                            ops->map_gen_lookup) {
13160                                cnt = ops->map_gen_lookup(map_ptr, insn_buf);
13161                                if (cnt == -EOPNOTSUPP)
13162                                        goto patch_map_ops_generic;
13163                                if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13164                                        verbose(env, "bpf verifier is misconfigured\n");
13165                                        return -EINVAL;
13166                                }
13167
13168                                new_prog = bpf_patch_insn_data(env, i + delta,
13169                                                               insn_buf, cnt);
13170                                if (!new_prog)
13171                                        return -ENOMEM;
13172
13173                                delta    += cnt - 1;
13174                                env->prog = prog = new_prog;
13175                                insn      = new_prog->insnsi + i + delta;
13176                                continue;
13177                        }
13178
13179                        BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13180                                     (void *(*)(struct bpf_map *map, void *key))NULL));
13181                        BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13182                                     (int (*)(struct bpf_map *map, void *key))NULL));
13183                        BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13184                                     (int (*)(struct bpf_map *map, void *key, void *value,
13185                                              u64 flags))NULL));
13186                        BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13187                                     (int (*)(struct bpf_map *map, void *value,
13188                                              u64 flags))NULL));
13189                        BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13190                                     (int (*)(struct bpf_map *map, void *value))NULL));
13191                        BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13192                                     (int (*)(struct bpf_map *map, void *value))NULL));
13193                        BUILD_BUG_ON(!__same_type(ops->map_redirect,
13194                                     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
13195                        BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13196                                     (int (*)(struct bpf_map *map,
13197                                              bpf_callback_t callback_fn,
13198                                              void *callback_ctx,
13199                                              u64 flags))NULL));
13200
13201patch_map_ops_generic:
13202                        switch (insn->imm) {
13203                        case BPF_FUNC_map_lookup_elem:
13204                                insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
13205                                continue;
13206                        case BPF_FUNC_map_update_elem:
13207                                insn->imm = BPF_CALL_IMM(ops->map_update_elem);
13208                                continue;
13209                        case BPF_FUNC_map_delete_elem:
13210                                insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
13211                                continue;
13212                        case BPF_FUNC_map_push_elem:
13213                                insn->imm = BPF_CALL_IMM(ops->map_push_elem);
13214                                continue;
13215                        case BPF_FUNC_map_pop_elem:
13216                                insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
13217                                continue;
13218                        case BPF_FUNC_map_peek_elem:
13219                                insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
13220                                continue;
13221                        case BPF_FUNC_redirect_map:
13222                                insn->imm = BPF_CALL_IMM(ops->map_redirect);
13223                                continue;
13224                        case BPF_FUNC_for_each_map_elem:
13225                                insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
13226                                continue;
13227                        }
13228
13229                        goto patch_call_imm;
13230                }
13231
13232                /* Implement bpf_jiffies64 inline. */
13233                if (prog->jit_requested && BITS_PER_LONG == 64 &&
13234                    insn->imm == BPF_FUNC_jiffies64) {
13235                        struct bpf_insn ld_jiffies_addr[2] = {
13236                                BPF_LD_IMM64(BPF_REG_0,
13237                                             (unsigned long)&jiffies),
13238                        };
13239
13240                        insn_buf[0] = ld_jiffies_addr[0];
13241                        insn_buf[1] = ld_jiffies_addr[1];
13242                        insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13243                                                  BPF_REG_0, 0);
13244                        cnt = 3;
13245
13246                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13247                                                       cnt);
13248                        if (!new_prog)
13249                                return -ENOMEM;
13250
13251                        delta    += cnt - 1;
13252                        env->prog = prog = new_prog;
13253                        insn      = new_prog->insnsi + i + delta;
13254                        continue;
13255                }
13256
13257                /* Implement bpf_get_func_ip inline. */
13258                if (prog_type == BPF_PROG_TYPE_TRACING &&
13259                    insn->imm == BPF_FUNC_get_func_ip) {
13260                        /* Load IP address from ctx - 8 */
13261                        insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13262
13263                        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13264                        if (!new_prog)
13265                                return -ENOMEM;
13266
13267                        env->prog = prog = new_prog;
13268                        insn      = new_prog->insnsi + i + delta;
13269                        continue;
13270                }
13271
13272patch_call_imm:
13273                fn = env->ops->get_func_proto(insn->imm, env->prog);
13274                /* all functions that have prototype and verifier allowed
13275                 * programs to call them, must be real in-kernel functions
13276                 */
13277                if (!fn->func) {
13278                        verbose(env,
13279                                "kernel subsystem misconfigured func %s#%d\n",
13280                                func_id_name(insn->imm), insn->imm);
13281                        return -EFAULT;
13282                }
13283                insn->imm = fn->func - __bpf_call_base;
13284        }
13285
13286        /* Since poke tab is now finalized, publish aux to tracker. */
13287        for (i = 0; i < prog->aux->size_poke_tab; i++) {
13288                map_ptr = prog->aux->poke_tab[i].tail_call.map;
13289                if (!map_ptr->ops->map_poke_track ||
13290                    !map_ptr->ops->map_poke_untrack ||
13291                    !map_ptr->ops->map_poke_run) {
13292                        verbose(env, "bpf verifier is misconfigured\n");
13293                        return -EINVAL;
13294                }
13295
13296                ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
13297                if (ret < 0) {
13298                        verbose(env, "tracking tail call prog failed\n");
13299                        return ret;
13300                }
13301        }
13302
13303        sort_kfunc_descs_by_imm(env->prog);
13304
13305        return 0;
13306}
13307
13308static void free_states(struct bpf_verifier_env *env)
13309{
13310        struct bpf_verifier_state_list *sl, *sln;
13311        int i;
13312
13313        sl = env->free_list;
13314        while (sl) {
13315                sln = sl->next;
13316                free_verifier_state(&sl->state, false);
13317                kfree(sl);
13318                sl = sln;
13319        }
13320        env->free_list = NULL;
13321
13322        if (!env->explored_states)
13323                return;
13324
13325        for (i = 0; i < state_htab_size(env); i++) {
13326                sl = env->explored_states[i];
13327
13328                while (sl) {
13329                        sln = sl->next;
13330                        free_verifier_state(&sl->state, false);
13331                        kfree(sl);
13332                        sl = sln;
13333                }
13334                env->explored_states[i] = NULL;
13335        }
13336}
13337
13338static int do_check_common(struct bpf_verifier_env *env, int subprog)
13339{
13340        bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13341        struct bpf_verifier_state *state;
13342        struct bpf_reg_state *regs;
13343        int ret, i;
13344
13345        env->prev_linfo = NULL;
13346        env->pass_cnt++;
13347
13348        state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
13349        if (!state)
13350                return -ENOMEM;
13351        state->curframe = 0;
13352        state->speculative = false;
13353        state->branches = 1;
13354        state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
13355        if (!state->frame[0]) {
13356                kfree(state);
13357                return -ENOMEM;
13358        }
13359        env->cur_state = state;
13360        init_func_state(env, state->frame[0],
13361                        BPF_MAIN_FUNC /* callsite */,
13362                        0 /* frameno */,
13363                        subprog);
13364
13365        regs = state->frame[state->curframe]->regs;
13366        if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
13367                ret = btf_prepare_func_args(env, subprog, regs);
13368                if (ret)
13369                        goto out;
13370                for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
13371                        if (regs[i].type == PTR_TO_CTX)
13372                                mark_reg_known_zero(env, regs, i);
13373                        else if (regs[i].type == SCALAR_VALUE)
13374                                mark_reg_unknown(env, regs, i);
13375                        else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
13376                                const u32 mem_size = regs[i].mem_size;
13377
13378                                mark_reg_known_zero(env, regs, i);
13379                                regs[i].mem_size = mem_size;
13380                                regs[i].id = ++env->id_gen;
13381                        }
13382                }
13383        } else {
13384                /* 1st arg to a function */
13385                regs[BPF_REG_1].type = PTR_TO_CTX;
13386                mark_reg_known_zero(env, regs, BPF_REG_1);
13387                ret = btf_check_subprog_arg_match(env, subprog, regs);
13388                if (ret == -EFAULT)
13389                        /* unlikely verifier bug. abort.
13390                         * ret == 0 and ret < 0 are sadly acceptable for
13391                         * main() function due to backward compatibility.
13392                         * Like socket filter program may be written as:
13393                         * int bpf_prog(struct pt_regs *ctx)
13394                         * and never dereference that ctx in the program.
13395                         * 'struct pt_regs' is a type mismatch for socket
13396                         * filter that should be using 'struct __sk_buff'.
13397                         */
13398                        goto out;
13399        }
13400
13401        ret = do_check(env);
13402out:
13403        /* check for NULL is necessary, since cur_state can be freed inside
13404         * do_check() under memory pressure.
13405         */
13406        if (env->cur_state) {
13407                free_verifier_state(env->cur_state, true);
13408                env->cur_state = NULL;
13409        }
13410        while (!pop_stack(env, NULL, NULL, false));
13411        if (!ret && pop_log)
13412                bpf_vlog_reset(&env->log, 0);
13413        free_states(env);
13414        return ret;
13415}
13416
13417/* Verify all global functions in a BPF program one by one based on their BTF.
13418 * All global functions must pass verification. Otherwise the whole program is rejected.
13419 * Consider:
13420 * int bar(int);
13421 * int foo(int f)
13422 * {
13423 *    return bar(f);
13424 * }
13425 * int bar(int b)
13426 * {
13427 *    ...
13428 * }
13429 * foo() will be verified first for R1=any_scalar_value. During verification it
13430 * will be assumed that bar() already verified successfully and call to bar()
13431 * from foo() will be checked for type match only. Later bar() will be verified
13432 * independently to check that it's safe for R1=any_scalar_value.
13433 */
13434static int do_check_subprogs(struct bpf_verifier_env *env)
13435{
13436        struct bpf_prog_aux *aux = env->prog->aux;
13437        int i, ret;
13438
13439        if (!aux->func_info)
13440                return 0;
13441
13442        for (i = 1; i < env->subprog_cnt; i++) {
13443                if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
13444                        continue;
13445                env->insn_idx = env->subprog_info[i].start;
13446                WARN_ON_ONCE(env->insn_idx == 0);
13447                ret = do_check_common(env, i);
13448                if (ret) {
13449                        return ret;
13450                } else if (env->log.level & BPF_LOG_LEVEL) {
13451                        verbose(env,
13452                                "Func#%d is safe for any args that match its prototype\n",
13453                                i);
13454                }
13455        }
13456        return 0;
13457}
13458
13459static int do_check_main(struct bpf_verifier_env *env)
13460{
13461        int ret;
13462
13463        env->insn_idx = 0;
13464        ret = do_check_common(env, 0);
13465        if (!ret)
13466                env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
13467        return ret;
13468}
13469
13470
13471static void print_verification_stats(struct bpf_verifier_env *env)
13472{
13473        int i;
13474
13475        if (env->log.level & BPF_LOG_STATS) {
13476                verbose(env, "verification time %lld usec\n",
13477                        div_u64(env->verification_time, 1000));
13478                verbose(env, "stack depth ");
13479                for (i = 0; i < env->subprog_cnt; i++) {
13480                        u32 depth = env->subprog_info[i].stack_depth;
13481
13482                        verbose(env, "%d", depth);
13483                        if (i + 1 < env->subprog_cnt)
13484                                verbose(env, "+");
13485                }
13486                verbose(env, "\n");
13487        }
13488        verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
13489                "total_states %d peak_states %d mark_read %d\n",
13490                env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
13491                env->max_states_per_insn, env->total_states,
13492                env->peak_states, env->longest_mark_read_walk);
13493}
13494
13495static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
13496{
13497        const struct btf_type *t, *func_proto;
13498        const struct bpf_struct_ops *st_ops;
13499        const struct btf_member *member;
13500        struct bpf_prog *prog = env->prog;
13501        u32 btf_id, member_idx;
13502        const char *mname;
13503
13504        if (!prog->gpl_compatible) {
13505                verbose(env, "struct ops programs must have a GPL compatible license\n");
13506                return -EINVAL;
13507        }
13508
13509        btf_id = prog->aux->attach_btf_id;
13510        st_ops = bpf_struct_ops_find(btf_id);
13511        if (!st_ops) {
13512                verbose(env, "attach_btf_id %u is not a supported struct\n",
13513                        btf_id);
13514                return -ENOTSUPP;
13515        }
13516
13517        t = st_ops->type;
13518        member_idx = prog->expected_attach_type;
13519        if (member_idx >= btf_type_vlen(t)) {
13520                verbose(env, "attach to invalid member idx %u of struct %s\n",
13521                        member_idx, st_ops->name);
13522                return -EINVAL;
13523        }
13524
13525        member = &btf_type_member(t)[member_idx];
13526        mname = btf_name_by_offset(btf_vmlinux, member->name_off);
13527        func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
13528                                               NULL);
13529        if (!func_proto) {
13530                verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
13531                        mname, member_idx, st_ops->name);
13532                return -EINVAL;
13533        }
13534
13535        if (st_ops->check_member) {
13536                int err = st_ops->check_member(t, member);
13537
13538                if (err) {
13539                        verbose(env, "attach to unsupported member %s of struct %s\n",
13540                                mname, st_ops->name);
13541                        return err;
13542                }
13543        }
13544
13545        prog->aux->attach_func_proto = func_proto;
13546        prog->aux->attach_func_name = mname;
13547        env->ops = st_ops->verifier_ops;
13548
13549        return 0;
13550}
13551#define SECURITY_PREFIX "security_"
13552
13553static int check_attach_modify_return(unsigned long addr, const char *func_name)
13554{
13555        if (within_error_injection_list(addr) ||
13556            !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
13557                return 0;
13558
13559        return -EINVAL;
13560}
13561
13562/* list of non-sleepable functions that are otherwise on
13563 * ALLOW_ERROR_INJECTION list
13564 */
13565BTF_SET_START(btf_non_sleepable_error_inject)
13566/* Three functions below can be called from sleepable and non-sleepable context.
13567 * Assume non-sleepable from bpf safety point of view.
13568 */
13569BTF_ID(func, __filemap_add_folio)
13570BTF_ID(func, should_fail_alloc_page)
13571BTF_ID(func, should_failslab)
13572BTF_SET_END(btf_non_sleepable_error_inject)
13573
13574static int check_non_sleepable_error_inject(u32 btf_id)
13575{
13576        return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
13577}
13578
13579int bpf_check_attach_target(struct bpf_verifier_log *log,
13580                            const struct bpf_prog *prog,
13581                            const struct bpf_prog *tgt_prog,
13582                            u32 btf_id,
13583                            struct bpf_attach_target_info *tgt_info)
13584{
13585        bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
13586        const char prefix[] = "btf_trace_";
13587        int ret = 0, subprog = -1, i;
13588        const struct btf_type *t;
13589        bool conservative = true;
13590        const char *tname;
13591        struct btf *btf;
13592        long addr = 0;
13593
13594        if (!btf_id) {
13595                bpf_log(log, "Tracing programs must provide btf_id\n");
13596                return -EINVAL;
13597        }
13598        btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
13599        if (!btf) {
13600                bpf_log(log,
13601                        "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
13602                return -EINVAL;
13603        }
13604        t = btf_type_by_id(btf, btf_id);
13605        if (!t) {
13606                bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
13607                return -EINVAL;
13608        }
13609        tname = btf_name_by_offset(btf, t->name_off);
13610        if (!tname) {
13611                bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
13612                return -EINVAL;
13613        }
13614        if (tgt_prog) {
13615                struct bpf_prog_aux *aux = tgt_prog->aux;
13616
13617                for (i = 0; i < aux->func_info_cnt; i++)
13618                        if (aux->func_info[i].type_id == btf_id) {
13619                                subprog = i;
13620                                break;
13621                        }
13622                if (subprog == -1) {
13623                        bpf_log(log, "Subprog %s doesn't exist\n", tname);
13624                        return -EINVAL;
13625                }
13626                conservative = aux->func_info_aux[subprog].unreliable;
13627                if (prog_extension) {
13628                        if (conservative) {
13629                                bpf_log(log,
13630                                        "Cannot replace static functions\n");
13631                                return -EINVAL;
13632                        }
13633                        if (!prog->jit_requested) {
13634                                bpf_log(log,
13635                                        "Extension programs should be JITed\n");
13636                                return -EINVAL;
13637                        }
13638                }
13639                if (!tgt_prog->jited) {
13640                        bpf_log(log, "Can attach to only JITed progs\n");
13641                        return -EINVAL;
13642                }
13643                if (tgt_prog->type == prog->type) {
13644                        /* Cannot fentry/fexit another fentry/fexit program.
13645                         * Cannot attach program extension to another extension.
13646                         * It's ok to attach fentry/fexit to extension program.
13647                         */
13648                        bpf_log(log, "Cannot recursively attach\n");
13649                        return -EINVAL;
13650                }
13651                if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
13652                    prog_extension &&
13653                    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
13654                     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
13655                        /* Program extensions can extend all program types
13656                         * except fentry/fexit. The reason is the following.
13657                         * The fentry/fexit programs are used for performance
13658                         * analysis, stats and can be attached to any program
13659                         * type except themselves. When extension program is
13660                         * replacing XDP function it is necessary to allow
13661                         * performance analysis of all functions. Both original
13662                         * XDP program and its program extension. Hence
13663                         * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
13664                         * allowed. If extending of fentry/fexit was allowed it
13665                         * would be possible to create long call chain
13666                         * fentry->extension->fentry->extension beyond
13667                         * reasonable stack size. Hence extending fentry is not
13668                         * allowed.
13669                         */
13670                        bpf_log(log, "Cannot extend fentry/fexit\n");
13671                        return -EINVAL;
13672                }
13673        } else {
13674                if (prog_extension) {
13675                        bpf_log(log, "Cannot replace kernel functions\n");
13676                        return -EINVAL;
13677                }
13678        }
13679
13680        switch (prog->expected_attach_type) {
13681        case BPF_TRACE_RAW_TP:
13682                if (tgt_prog) {
13683                        bpf_log(log,
13684                                "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
13685                        return -EINVAL;
13686                }
13687                if (!btf_type_is_typedef(t)) {
13688                        bpf_log(log, "attach_btf_id %u is not a typedef\n",
13689                                btf_id);
13690                        return -EINVAL;
13691                }
13692                if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
13693                        bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
13694                                btf_id, tname);
13695                        return -EINVAL;
13696                }
13697                tname += sizeof(prefix) - 1;
13698                t = btf_type_by_id(btf, t->type);
13699                if (!btf_type_is_ptr(t))
13700                        /* should never happen in valid vmlinux build */
13701                        return -EINVAL;
13702                t = btf_type_by_id(btf, t->type);
13703                if (!btf_type_is_func_proto(t))
13704                        /* should never happen in valid vmlinux build */
13705                        return -EINVAL;
13706
13707                break;
13708        case BPF_TRACE_ITER:
13709                if (!btf_type_is_func(t)) {
13710                        bpf_log(log, "attach_btf_id %u is not a function\n",
13711                                btf_id);
13712                        return -EINVAL;
13713                }
13714                t = btf_type_by_id(btf, t->type);
13715                if (!btf_type_is_func_proto(t))
13716                        return -EINVAL;
13717                ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13718                if (ret)
13719                        return ret;
13720                break;
13721        default:
13722                if (!prog_extension)
13723                        return -EINVAL;
13724                fallthrough;
13725        case BPF_MODIFY_RETURN:
13726        case BPF_LSM_MAC:
13727        case BPF_TRACE_FENTRY:
13728        case BPF_TRACE_FEXIT:
13729                if (!btf_type_is_func(t)) {
13730                        bpf_log(log, "attach_btf_id %u is not a function\n",
13731                                btf_id);
13732                        return -EINVAL;
13733                }
13734                if (prog_extension &&
13735                    btf_check_type_match(log, prog, btf, t))
13736                        return -EINVAL;
13737                t = btf_type_by_id(btf, t->type);
13738                if (!btf_type_is_func_proto(t))
13739                        return -EINVAL;
13740
13741                if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
13742                    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
13743                     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
13744                        return -EINVAL;
13745
13746                if (tgt_prog && conservative)
13747                        t = NULL;
13748
13749                ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13750                if (ret < 0)
13751                        return ret;
13752
13753                if (tgt_prog) {
13754                        if (subprog == 0)
13755                                addr = (long) tgt_prog->bpf_func;
13756                        else
13757                                addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
13758                } else {
13759                        addr = kallsyms_lookup_name(tname);
13760                        if (!addr) {
13761                                bpf_log(log,
13762                                        "The address of function %s cannot be found\n",
13763                                        tname);
13764                                return -ENOENT;
13765                        }
13766                }
13767
13768                if (prog->aux->sleepable) {
13769                        ret = -EINVAL;
13770                        switch (prog->type) {
13771                        case BPF_PROG_TYPE_TRACING:
13772                                /* fentry/fexit/fmod_ret progs can be sleepable only if they are
13773                                 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
13774                                 */
13775                                if (!check_non_sleepable_error_inject(btf_id) &&
13776                                    within_error_injection_list(addr))
13777                                        ret = 0;
13778                                break;
13779                        case BPF_PROG_TYPE_LSM:
13780                                /* LSM progs check that they are attached to bpf_lsm_*() funcs.
13781                                 * Only some of them are sleepable.
13782                                 */
13783                                if (bpf_lsm_is_sleepable_hook(btf_id))
13784                                        ret = 0;
13785                                break;
13786                        default:
13787                                break;
13788                        }
13789                        if (ret) {
13790                                bpf_log(log, "%s is not sleepable\n", tname);
13791                                return ret;
13792                        }
13793                } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
13794                        if (tgt_prog) {
13795                                bpf_log(log, "can't modify return codes of BPF programs\n");
13796                                return -EINVAL;
13797                        }
13798                        ret = check_attach_modify_return(addr, tname);
13799                        if (ret) {
13800                                bpf_log(log, "%s() is not modifiable\n", tname);
13801                                return ret;
13802                        }
13803                }
13804
13805                break;
13806        }
13807        tgt_info->tgt_addr = addr;
13808        tgt_info->tgt_name = tname;
13809        tgt_info->tgt_type = t;
13810        return 0;
13811}
13812
13813BTF_SET_START(btf_id_deny)
13814BTF_ID_UNUSED
13815#ifdef CONFIG_SMP
13816BTF_ID(func, migrate_disable)
13817BTF_ID(func, migrate_enable)
13818#endif
13819#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
13820BTF_ID(func, rcu_read_unlock_strict)
13821#endif
13822BTF_SET_END(btf_id_deny)
13823
13824static int check_attach_btf_id(struct bpf_verifier_env *env)
13825{
13826        struct bpf_prog *prog = env->prog;
13827        struct bpf_prog *tgt_prog = prog->aux->dst_prog;
13828        struct bpf_attach_target_info tgt_info = {};
13829        u32 btf_id = prog->aux->attach_btf_id;
13830        struct bpf_trampoline *tr;
13831        int ret;
13832        u64 key;
13833
13834        if (prog->type == BPF_PROG_TYPE_SYSCALL) {
13835                if (prog->aux->sleepable)
13836                        /* attach_btf_id checked to be zero already */
13837                        return 0;
13838                verbose(env, "Syscall programs can only be sleepable\n");
13839                return -EINVAL;
13840        }
13841
13842        if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
13843            prog->type != BPF_PROG_TYPE_LSM) {
13844                verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
13845                return -EINVAL;
13846        }
13847
13848        if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
13849                return check_struct_ops_btf_id(env);
13850
13851        if (prog->type != BPF_PROG_TYPE_TRACING &&
13852            prog->type != BPF_PROG_TYPE_LSM &&
13853            prog->type != BPF_PROG_TYPE_EXT)
13854                return 0;
13855
13856        ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
13857        if (ret)
13858                return ret;
13859
13860        if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
13861                /* to make freplace equivalent to their targets, they need to
13862                 * inherit env->ops and expected_attach_type for the rest of the
13863                 * verification
13864                 */
13865                env->ops = bpf_verifier_ops[tgt_prog->type];
13866                prog->expected_attach_type = tgt_prog->expected_attach_type;
13867        }
13868
13869        /* store info about the attachment target that will be used later */
13870        prog->aux->attach_func_proto = tgt_info.tgt_type;
13871        prog->aux->attach_func_name = tgt_info.tgt_name;
13872
13873        if (tgt_prog) {
13874                prog->aux->saved_dst_prog_type = tgt_prog->type;
13875                prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
13876        }
13877
13878        if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
13879                prog->aux->attach_btf_trace = true;
13880                return 0;
13881        } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
13882                if (!bpf_iter_prog_supported(prog))
13883                        return -EINVAL;
13884                return 0;
13885        }
13886
13887        if (prog->type == BPF_PROG_TYPE_LSM) {
13888                ret = bpf_lsm_verify_prog(&env->log, prog);
13889                if (ret < 0)
13890                        return ret;
13891        } else if (prog->type == BPF_PROG_TYPE_TRACING &&
13892                   btf_id_set_contains(&btf_id_deny, btf_id)) {
13893                return -EINVAL;
13894        }
13895
13896        key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
13897        tr = bpf_trampoline_get(key, &tgt_info);
13898        if (!tr)
13899                return -ENOMEM;
13900
13901        prog->aux->dst_trampoline = tr;
13902        return 0;
13903}
13904
13905struct btf *bpf_get_btf_vmlinux(void)
13906{
13907        if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
13908                mutex_lock(&bpf_verifier_lock);
13909                if (!btf_vmlinux)
13910                        btf_vmlinux = btf_parse_vmlinux();
13911                mutex_unlock(&bpf_verifier_lock);
13912        }
13913        return btf_vmlinux;
13914}
13915
13916int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
13917{
13918        u64 start_time = ktime_get_ns();
13919        struct bpf_verifier_env *env;
13920        struct bpf_verifier_log *log;
13921        int i, len, ret = -EINVAL;
13922        bool is_priv;
13923
13924        /* no program is valid */
13925        if (ARRAY_SIZE(bpf_verifier_ops) == 0)
13926                return -EINVAL;
13927
13928        /* 'struct bpf_verifier_env' can be global, but since it's not small,
13929         * allocate/free it every time bpf_check() is called
13930         */
13931        env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
13932        if (!env)
13933                return -ENOMEM;
13934        log = &env->log;
13935
13936        len = (*prog)->len;
13937        env->insn_aux_data =
13938                vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
13939        ret = -ENOMEM;
13940        if (!env->insn_aux_data)
13941                goto err_free_env;
13942        for (i = 0; i < len; i++)
13943                env->insn_aux_data[i].orig_idx = i;
13944        env->prog = *prog;
13945        env->ops = bpf_verifier_ops[env->prog->type];
13946        env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
13947        is_priv = bpf_capable();
13948
13949        bpf_get_btf_vmlinux();
13950
13951        /* grab the mutex to protect few globals used by verifier */
13952        if (!is_priv)
13953                mutex_lock(&bpf_verifier_lock);
13954
13955        if (attr->log_level || attr->log_buf || attr->log_size) {
13956                /* user requested verbose verifier output
13957                 * and supplied buffer to store the verification trace
13958                 */
13959                log->level = attr->log_level;
13960                log->ubuf = (char __user *) (unsigned long) attr->log_buf;
13961                log->len_total = attr->log_size;
13962
13963                ret = -EINVAL;
13964                /* log attributes have to be sane */
13965                if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
13966                    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
13967                        goto err_unlock;
13968        }
13969
13970        if (IS_ERR(btf_vmlinux)) {
13971                /* Either gcc or pahole or kernel are broken. */
13972                verbose(env, "in-kernel BTF is malformed\n");
13973                ret = PTR_ERR(btf_vmlinux);
13974                goto skip_full_check;
13975        }
13976
13977        env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
13978        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
13979                env->strict_alignment = true;
13980        if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
13981                env->strict_alignment = false;
13982
13983        env->allow_ptr_leaks = bpf_allow_ptr_leaks();
13984        env->allow_uninit_stack = bpf_allow_uninit_stack();
13985        env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
13986        env->bypass_spec_v1 = bpf_bypass_spec_v1();
13987        env->bypass_spec_v4 = bpf_bypass_spec_v4();
13988        env->bpf_capable = bpf_capable();
13989
13990        if (is_priv)
13991                env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
13992
13993        env->explored_states = kvcalloc(state_htab_size(env),
13994                                       sizeof(struct bpf_verifier_state_list *),
13995                                       GFP_USER);
13996        ret = -ENOMEM;
13997        if (!env->explored_states)
13998                goto skip_full_check;
13999
14000        ret = add_subprog_and_kfunc(env);
14001        if (ret < 0)
14002                goto skip_full_check;
14003
14004        ret = check_subprogs(env);
14005        if (ret < 0)
14006                goto skip_full_check;
14007
14008        ret = check_btf_info(env, attr, uattr);
14009        if (ret < 0)
14010                goto skip_full_check;
14011
14012        ret = check_attach_btf_id(env);
14013        if (ret)
14014                goto skip_full_check;
14015
14016        ret = resolve_pseudo_ldimm64(env);
14017        if (ret < 0)
14018                goto skip_full_check;
14019
14020        if (bpf_prog_is_dev_bound(env->prog->aux)) {
14021                ret = bpf_prog_offload_verifier_prep(env->prog);
14022                if (ret)
14023                        goto skip_full_check;
14024        }
14025
14026        ret = check_cfg(env);
14027        if (ret < 0)
14028                goto skip_full_check;
14029
14030        ret = do_check_subprogs(env);
14031        ret = ret ?: do_check_main(env);
14032
14033        if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
14034                ret = bpf_prog_offload_finalize(env);
14035
14036skip_full_check:
14037        kvfree(env->explored_states);
14038
14039        if (ret == 0)
14040                ret = check_max_stack_depth(env);
14041
14042        /* instruction rewrites happen after this point */
14043        if (is_priv) {
14044                if (ret == 0)
14045                        opt_hard_wire_dead_code_branches(env);
14046                if (ret == 0)
14047                        ret = opt_remove_dead_code(env);
14048                if (ret == 0)
14049                        ret = opt_remove_nops(env);
14050        } else {
14051                if (ret == 0)
14052                        sanitize_dead_code(env);
14053        }
14054
14055        if (ret == 0)
14056                /* program is valid, convert *(u32*)(ctx + off) accesses */
14057                ret = convert_ctx_accesses(env);
14058
14059        if (ret == 0)
14060                ret = do_misc_fixups(env);
14061
14062        /* do 32-bit optimization after insn patching has done so those patched
14063         * insns could be handled correctly.
14064         */
14065        if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14066                ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14067                env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14068                                                                     : false;
14069        }
14070
14071        if (ret == 0)
14072                ret = fixup_call_args(env);
14073
14074        env->verification_time = ktime_get_ns() - start_time;
14075        print_verification_stats(env);
14076        env->prog->aux->verified_insns = env->insn_processed;
14077
14078        if (log->level && bpf_verifier_log_full(log))
14079                ret = -ENOSPC;
14080        if (log->level && !log->ubuf) {
14081                ret = -EFAULT;
14082                goto err_release_maps;
14083        }
14084
14085        if (ret)
14086                goto err_release_maps;
14087
14088        if (env->used_map_cnt) {
14089                /* if program passed verifier, update used_maps in bpf_prog_info */
14090                env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14091                                                          sizeof(env->used_maps[0]),
14092                                                          GFP_KERNEL);
14093
14094                if (!env->prog->aux->used_maps) {
14095                        ret = -ENOMEM;
14096                        goto err_release_maps;
14097                }
14098
14099                memcpy(env->prog->aux->used_maps, env->used_maps,
14100                       sizeof(env->used_maps[0]) * env->used_map_cnt);
14101                env->prog->aux->used_map_cnt = env->used_map_cnt;
14102        }
14103        if (env->used_btf_cnt) {
14104                /* if program passed verifier, update used_btfs in bpf_prog_aux */
14105                env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14106                                                          sizeof(env->used_btfs[0]),
14107                                                          GFP_KERNEL);
14108                if (!env->prog->aux->used_btfs) {
14109                        ret = -ENOMEM;
14110                        goto err_release_maps;
14111                }
14112
14113                memcpy(env->prog->aux->used_btfs, env->used_btfs,
14114                       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14115                env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14116        }
14117        if (env->used_map_cnt || env->used_btf_cnt) {
14118                /* program is valid. Convert pseudo bpf_ld_imm64 into generic
14119                 * bpf_ld_imm64 instructions
14120                 */
14121                convert_pseudo_ld_imm64(env);
14122        }
14123
14124        adjust_btf_func(env);
14125
14126err_release_maps:
14127        if (!env->prog->aux->used_maps)
14128                /* if we didn't copy map pointers into bpf_prog_info, release
14129                 * them now. Otherwise free_used_maps() will release them.
14130                 */
14131                release_maps(env);
14132        if (!env->prog->aux->used_btfs)
14133                release_btfs(env);
14134
14135        /* extension progs temporarily inherit the attach_type of their targets
14136           for verification purposes, so set it back to zero before returning
14137         */
14138        if (env->prog->type == BPF_PROG_TYPE_EXT)
14139                env->prog->expected_attach_type = 0;
14140
14141        *prog = env->prog;
14142err_unlock:
14143        if (!is_priv)
14144                mutex_unlock(&bpf_verifier_lock);
14145        vfree(env->insn_aux_data);
14146err_free_env:
14147        kfree(env);
14148        return ret;
14149}
14150