LXR linux/kernel/bpf/core.c

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Linux Socket Filter - Kernel level socket filtering
   4 *
   5 * Based on the design of the Berkeley Packet Filter. The new
   6 * internal format has been designed by PLUMgrid:
   7 *
   8 *      Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
   9 *
  10 * Authors:
  11 *
  12 *      Jay Schulist <jschlst@samba.org>
  13 *      Alexei Starovoitov <ast@plumgrid.com>
  14 *      Daniel Borkmann <dborkman@redhat.com>
  15 *
  16 * Andi Kleen - Fix a few bad bugs and races.
  17 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  18 */
  19
  20#include <uapi/linux/btf.h>
  21#include <linux/filter.h>
  22#include <linux/skbuff.h>
  23#include <linux/vmalloc.h>
  24#include <linux/random.h>
  25#include <linux/moduleloader.h>
  26#include <linux/bpf.h>
  27#include <linux/btf.h>
  28#include <linux/objtool.h>
  29#include <linux/rbtree_latch.h>
  30#include <linux/kallsyms.h>
  31#include <linux/rcupdate.h>
  32#include <linux/perf_event.h>
  33#include <linux/extable.h>
  34#include <linux/log2.h>
  35#include <linux/bpf_verifier.h>
  36
  37#include <asm/barrier.h>
  38#include <asm/unaligned.h>
  39
  40/* Registers */
  41#define BPF_R0  regs[BPF_REG_0]
  42#define BPF_R1  regs[BPF_REG_1]
  43#define BPF_R2  regs[BPF_REG_2]
  44#define BPF_R3  regs[BPF_REG_3]
  45#define BPF_R4  regs[BPF_REG_4]
  46#define BPF_R5  regs[BPF_REG_5]
  47#define BPF_R6  regs[BPF_REG_6]
  48#define BPF_R7  regs[BPF_REG_7]
  49#define BPF_R8  regs[BPF_REG_8]
  50#define BPF_R9  regs[BPF_REG_9]
  51#define BPF_R10 regs[BPF_REG_10]
  52
  53/* Named registers */
  54#define DST     regs[insn->dst_reg]
  55#define SRC     regs[insn->src_reg]
  56#define FP      regs[BPF_REG_FP]
  57#define AX      regs[BPF_REG_AX]
  58#define ARG1    regs[BPF_REG_ARG1]
  59#define CTX     regs[BPF_REG_CTX]
  60#define IMM     insn->imm
  61
  62/* No hurry in this branch
  63 *
  64 * Exported for the bpf jit load helper.
  65 */
  66void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
  67{
  68        u8 *ptr = NULL;
  69
  70        if (k >= SKF_NET_OFF)
  71                ptr = skb_network_header(skb) + k - SKF_NET_OFF;
  72        else if (k >= SKF_LL_OFF)
  73                ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
  74
  75        if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
  76                return ptr;
  77
  78        return NULL;
  79}
  80
  81struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
  82{
  83        gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
  84        struct bpf_prog_aux *aux;
  85        struct bpf_prog *fp;
  86
  87        size = round_up(size, PAGE_SIZE);
  88        fp = __vmalloc(size, gfp_flags);
  89        if (fp == NULL)
  90                return NULL;
  91
  92        aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
  93        if (aux == NULL) {
  94                vfree(fp);
  95                return NULL;
  96        }
  97        fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
  98        if (!fp->active) {
  99                vfree(fp);
 100                kfree(aux);
 101                return NULL;
 102        }
 103
 104        fp->pages = size / PAGE_SIZE;
 105        fp->aux = aux;
 106        fp->aux->prog = fp;
 107        fp->jit_requested = ebpf_jit_enabled();
 108
 109        INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
 110        mutex_init(&fp->aux->used_maps_mutex);
 111        mutex_init(&fp->aux->dst_mutex);
 112
 113        return fp;
 114}
 115
 116struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 117{
 118        gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
 119        struct bpf_prog *prog;
 120        int cpu;
 121
 122        prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
 123        if (!prog)
 124                return NULL;
 125
 126        prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 127        if (!prog->stats) {
 128                free_percpu(prog->active);
 129                kfree(prog->aux);
 130                vfree(prog);
 131                return NULL;
 132        }
 133
 134        for_each_possible_cpu(cpu) {
 135                struct bpf_prog_stats *pstats;
 136
 137                pstats = per_cpu_ptr(prog->stats, cpu);
 138                u64_stats_init(&pstats->syncp);
 139        }
 140        return prog;
 141}
 142EXPORT_SYMBOL_GPL(bpf_prog_alloc);
 143
 144int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
 145{
 146        if (!prog->aux->nr_linfo || !prog->jit_requested)
 147                return 0;
 148
 149        prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
 150                                          sizeof(*prog->aux->jited_linfo),
 151                                          GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
 152        if (!prog->aux->jited_linfo)
 153                return -ENOMEM;
 154
 155        return 0;
 156}
 157
 158void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
 159{
 160        if (prog->aux->jited_linfo &&
 161            (!prog->jited || !prog->aux->jited_linfo[0])) {
 162                kvfree(prog->aux->jited_linfo);
 163                prog->aux->jited_linfo = NULL;
 164        }
 165
 166        kfree(prog->aux->kfunc_tab);
 167        prog->aux->kfunc_tab = NULL;
 168}
 169
 170/* The jit engine is responsible to provide an array
 171 * for insn_off to the jited_off mapping (insn_to_jit_off).
 172 *
 173 * The idx to this array is the insn_off.  Hence, the insn_off
 174 * here is relative to the prog itself instead of the main prog.
 175 * This array has one entry for each xlated bpf insn.
 176 *
 177 * jited_off is the byte off to the last byte of the jited insn.
 178 *
 179 * Hence, with
 180 * insn_start:
 181 *      The first bpf insn off of the prog.  The insn off
 182 *      here is relative to the main prog.
 183 *      e.g. if prog is a subprog, insn_start > 0
 184 * linfo_idx:
 185 *      The prog's idx to prog->aux->linfo and jited_linfo
 186 *
 187 * jited_linfo[linfo_idx] = prog->bpf_func
 188 *
 189 * For i > linfo_idx,
 190 *
 191 * jited_linfo[i] = prog->bpf_func +
 192 *      insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
 193 */
 194void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
 195                               const u32 *insn_to_jit_off)
 196{
 197        u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
 198        const struct bpf_line_info *linfo;
 199        void **jited_linfo;
 200
 201        if (!prog->aux->jited_linfo)
 202                /* Userspace did not provide linfo */
 203                return;
 204
 205        linfo_idx = prog->aux->linfo_idx;
 206        linfo = &prog->aux->linfo[linfo_idx];
 207        insn_start = linfo[0].insn_off;
 208        insn_end = insn_start + prog->len;
 209
 210        jited_linfo = &prog->aux->jited_linfo[linfo_idx];
 211        jited_linfo[0] = prog->bpf_func;
 212
 213        nr_linfo = prog->aux->nr_linfo - linfo_idx;
 214
 215        for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
 216                /* The verifier ensures that linfo[i].insn_off is
 217                 * strictly increasing
 218                 */
 219                jited_linfo[i] = prog->bpf_func +
 220                        insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
 221}
 222
 223struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 224                                  gfp_t gfp_extra_flags)
 225{
 226        gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
 227        struct bpf_prog *fp;
 228        u32 pages;
 229
 230        size = round_up(size, PAGE_SIZE);
 231        pages = size / PAGE_SIZE;
 232        if (pages <= fp_old->pages)
 233                return fp_old;
 234
 235        fp = __vmalloc(size, gfp_flags);
 236        if (fp) {
 237                memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 238                fp->pages = pages;
 239                fp->aux->prog = fp;
 240
 241                /* We keep fp->aux from fp_old around in the new
 242                 * reallocated structure.
 243                 */
 244                fp_old->aux = NULL;
 245                fp_old->stats = NULL;
 246                fp_old->active = NULL;
 247                __bpf_prog_free(fp_old);
 248        }
 249
 250        return fp;
 251}
 252
 253void __bpf_prog_free(struct bpf_prog *fp)
 254{
 255        if (fp->aux) {
 256                mutex_destroy(&fp->aux->used_maps_mutex);
 257                mutex_destroy(&fp->aux->dst_mutex);
 258                kfree(fp->aux->poke_tab);
 259                kfree(fp->aux);
 260        }
 261        free_percpu(fp->stats);
 262        free_percpu(fp->active);
 263        vfree(fp);
 264}
 265
 266int bpf_prog_calc_tag(struct bpf_prog *fp)
 267{
 268        const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
 269        u32 raw_size = bpf_prog_tag_scratch_size(fp);
 270        u32 digest[SHA1_DIGEST_WORDS];
 271        u32 ws[SHA1_WORKSPACE_WORDS];
 272        u32 i, bsize, psize, blocks;
 273        struct bpf_insn *dst;
 274        bool was_ld_map;
 275        u8 *raw, *todo;
 276        __be32 *result;
 277        __be64 *bits;
 278
 279        raw = vmalloc(raw_size);
 280        if (!raw)
 281                return -ENOMEM;
 282
 283        sha1_init(digest);
 284        memset(ws, 0, sizeof(ws));
 285
 286        /* We need to take out the map fd for the digest calculation
 287         * since they are unstable from user space side.
 288         */
 289        dst = (void *)raw;
 290        for (i = 0, was_ld_map = false; i < fp->len; i++) {
 291                dst[i] = fp->insnsi[i];
 292                if (!was_ld_map &&
 293                    dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
 294                    (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
 295                     dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
 296                        was_ld_map = true;
 297                        dst[i].imm = 0;
 298                } else if (was_ld_map &&
 299                           dst[i].code == 0 &&
 300                           dst[i].dst_reg == 0 &&
 301                           dst[i].src_reg == 0 &&
 302                           dst[i].off == 0) {
 303                        was_ld_map = false;
 304                        dst[i].imm = 0;
 305                } else {
 306                        was_ld_map = false;
 307                }
 308        }
 309
 310        psize = bpf_prog_insn_size(fp);
 311        memset(&raw[psize], 0, raw_size - psize);
 312        raw[psize++] = 0x80;
 313
 314        bsize  = round_up(psize, SHA1_BLOCK_SIZE);
 315        blocks = bsize / SHA1_BLOCK_SIZE;
 316        todo   = raw;
 317        if (bsize - psize >= sizeof(__be64)) {
 318                bits = (__be64 *)(todo + bsize - sizeof(__be64));
 319        } else {
 320                bits = (__be64 *)(todo + bsize + bits_offset);
 321                blocks++;
 322        }
 323        *bits = cpu_to_be64((psize - 1) << 3);
 324
 325        while (blocks--) {
 326                sha1_transform(digest, todo, ws);
 327                todo += SHA1_BLOCK_SIZE;
 328        }
 329
 330        result = (__force __be32 *)digest;
 331        for (i = 0; i < SHA1_DIGEST_WORDS; i++)
 332                result[i] = cpu_to_be32(digest[i]);
 333        memcpy(fp->tag, result, sizeof(fp->tag));
 334
 335        vfree(raw);
 336        return 0;
 337}
 338
 339static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
 340                                s32 end_new, s32 curr, const bool probe_pass)
 341{
 342        const s64 imm_min = S32_MIN, imm_max = S32_MAX;
 343        s32 delta = end_new - end_old;
 344        s64 imm = insn->imm;
 345
 346        if (curr < pos && curr + imm + 1 >= end_old)
 347                imm += delta;
 348        else if (curr >= end_new && curr + imm + 1 < end_new)
 349                imm -= delta;
 350        if (imm < imm_min || imm > imm_max)
 351                return -ERANGE;
 352        if (!probe_pass)
 353                insn->imm = imm;
 354        return 0;
 355}
 356
 357static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
 358                                s32 end_new, s32 curr, const bool probe_pass)
 359{
 360        const s32 off_min = S16_MIN, off_max = S16_MAX;
 361        s32 delta = end_new - end_old;
 362        s32 off = insn->off;
 363
 364        if (curr < pos && curr + off + 1 >= end_old)
 365                off += delta;
 366        else if (curr >= end_new && curr + off + 1 < end_new)
 367                off -= delta;
 368        if (off < off_min || off > off_max)
 369                return -ERANGE;
 370        if (!probe_pass)
 371                insn->off = off;
 372        return 0;
 373}
 374
 375static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
 376                            s32 end_new, const bool probe_pass)
 377{
 378        u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
 379        struct bpf_insn *insn = prog->insnsi;
 380        int ret = 0;
 381
 382        for (i = 0; i < insn_cnt; i++, insn++) {
 383                u8 code;
 384
 385                /* In the probing pass we still operate on the original,
 386                 * unpatched image in order to check overflows before we
 387                 * do any other adjustments. Therefore skip the patchlet.
 388                 */
 389                if (probe_pass && i == pos) {
 390                        i = end_new;
 391                        insn = prog->insnsi + end_old;
 392                }
 393                if (bpf_pseudo_func(insn)) {
 394                        ret = bpf_adj_delta_to_imm(insn, pos, end_old,
 395                                                   end_new, i, probe_pass);
 396                        if (ret)
 397                                return ret;
 398                        continue;
 399                }
 400                code = insn->code;
 401                if ((BPF_CLASS(code) != BPF_JMP &&
 402                     BPF_CLASS(code) != BPF_JMP32) ||
 403                    BPF_OP(code) == BPF_EXIT)
 404                        continue;
 405                /* Adjust offset of jmps if we cross patch boundaries. */
 406                if (BPF_OP(code) == BPF_CALL) {
 407                        if (insn->src_reg != BPF_PSEUDO_CALL)
 408                                continue;
 409                        ret = bpf_adj_delta_to_imm(insn, pos, end_old,
 410                                                   end_new, i, probe_pass);
 411                } else {
 412                        ret = bpf_adj_delta_to_off(insn, pos, end_old,
 413                                                   end_new, i, probe_pass);
 414                }
 415                if (ret)
 416                        break;
 417        }
 418
 419        return ret;
 420}
 421
 422static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
 423{
 424        struct bpf_line_info *linfo;
 425        u32 i, nr_linfo;
 426
 427        nr_linfo = prog->aux->nr_linfo;
 428        if (!nr_linfo || !delta)
 429                return;
 430
 431        linfo = prog->aux->linfo;
 432
 433        for (i = 0; i < nr_linfo; i++)
 434                if (off < linfo[i].insn_off)
 435                        break;
 436
 437        /* Push all off < linfo[i].insn_off by delta */
 438        for (; i < nr_linfo; i++)
 439                linfo[i].insn_off += delta;
 440}
 441
 442struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 443                                       const struct bpf_insn *patch, u32 len)
 444{
 445        u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
 446        const u32 cnt_max = S16_MAX;
 447        struct bpf_prog *prog_adj;
 448        int err;
 449
 450        /* Since our patchlet doesn't expand the image, we're done. */
 451        if (insn_delta == 0) {
 452                memcpy(prog->insnsi + off, patch, sizeof(*patch));
 453                return prog;
 454        }
 455
 456        insn_adj_cnt = prog->len + insn_delta;
 457
 458        /* Reject anything that would potentially let the insn->off
 459         * target overflow when we have excessive program expansions.
 460         * We need to probe here before we do any reallocation where
 461         * we afterwards may not fail anymore.
 462         */
 463        if (insn_adj_cnt > cnt_max &&
 464            (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
 465                return ERR_PTR(err);
 466
 467        /* Several new instructions need to be inserted. Make room
 468         * for them. Likely, there's no need for a new allocation as
 469         * last page could have large enough tailroom.
 470         */
 471        prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
 472                                    GFP_USER);
 473        if (!prog_adj)
 474                return ERR_PTR(-ENOMEM);
 475
 476        prog_adj->len = insn_adj_cnt;
 477
 478        /* Patching happens in 3 steps:
 479         *
 480         * 1) Move over tail of insnsi from next instruction onwards,
 481         *    so we can patch the single target insn with one or more
 482         *    new ones (patching is always from 1 to n insns, n > 0).
 483         * 2) Inject new instructions at the target location.
 484         * 3) Adjust branch offsets if necessary.
 485         */
 486        insn_rest = insn_adj_cnt - off - len;
 487
 488        memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
 489                sizeof(*patch) * insn_rest);
 490        memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
 491
 492        /* We are guaranteed to not fail at this point, otherwise
 493         * the ship has sailed to reverse to the original state. An
 494         * overflow cannot happen at this point.
 495         */
 496        BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
 497
 498        bpf_adj_linfo(prog_adj, off, insn_delta);
 499
 500        return prog_adj;
 501}
 502
 503int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
 504{
 505        /* Branch offsets can't overflow when program is shrinking, no need
 506         * to call bpf_adj_branches(..., true) here
 507         */
 508        memmove(prog->insnsi + off, prog->insnsi + off + cnt,
 509                sizeof(struct bpf_insn) * (prog->len - off - cnt));
 510        prog->len -= cnt;
 511
 512        return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
 513}
 514
 515static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 516{
 517        int i;
 518
 519        for (i = 0; i < fp->aux->func_cnt; i++)
 520                bpf_prog_kallsyms_del(fp->aux->func[i]);
 521}
 522
 523void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
 524{
 525        bpf_prog_kallsyms_del_subprogs(fp);
 526        bpf_prog_kallsyms_del(fp);
 527}
 528
 529#ifdef CONFIG_BPF_JIT
 530/* All BPF JIT sysctl knobs here. */
 531int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 532int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 533int bpf_jit_harden   __read_mostly;
 534long bpf_jit_limit   __read_mostly;
 535long bpf_jit_limit_max __read_mostly;
 536
 537static void
 538bpf_prog_ksym_set_addr(struct bpf_prog *prog)
 539{
 540        const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
 541        unsigned long addr = (unsigned long)hdr;
 542
 543        WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
 544
 545        prog->aux->ksym.start = (unsigned long) prog->bpf_func;
 546        prog->aux->ksym.end   = addr + hdr->pages * PAGE_SIZE;
 547}
 548
 549static void
 550bpf_prog_ksym_set_name(struct bpf_prog *prog)
 551{
 552        char *sym = prog->aux->ksym.name;
 553        const char *end = sym + KSYM_NAME_LEN;
 554        const struct btf_type *type;
 555        const char *func_name;
 556
 557        BUILD_BUG_ON(sizeof("bpf_prog_") +
 558                     sizeof(prog->tag) * 2 +
 559                     /* name has been null terminated.
 560                      * We should need +1 for the '_' preceding
 561                      * the name.  However, the null character
 562                      * is double counted between the name and the
 563                      * sizeof("bpf_prog_") above, so we omit
 564                      * the +1 here.
 565                      */
 566                     sizeof(prog->aux->name) > KSYM_NAME_LEN);
 567
 568        sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
 569        sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
 570
 571        /* prog->aux->name will be ignored if full btf name is available */
 572        if (prog->aux->func_info_cnt) {
 573                type = btf_type_by_id(prog->aux->btf,
 574                                      prog->aux->func_info[prog->aux->func_idx].type_id);
 575                func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
 576                snprintf(sym, (size_t)(end - sym), "_%s", func_name);
 577                return;
 578        }
 579
 580        if (prog->aux->name[0])
 581                snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
 582        else
 583                *sym = 0;
 584}
 585
 586static unsigned long bpf_get_ksym_start(struct latch_tree_node *n)
 587{
 588        return container_of(n, struct bpf_ksym, tnode)->start;
 589}
 590
 591static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
 592                                          struct latch_tree_node *b)
 593{
 594        return bpf_get_ksym_start(a) < bpf_get_ksym_start(b);
 595}
 596
 597static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
 598{
 599        unsigned long val = (unsigned long)key;
 600        const struct bpf_ksym *ksym;
 601
 602        ksym = container_of(n, struct bpf_ksym, tnode);
 603
 604        if (val < ksym->start)
 605                return -1;
 606        if (val >= ksym->end)
 607                return  1;
 608
 609        return 0;
 610}
 611
 612static const struct latch_tree_ops bpf_tree_ops = {
 613        .less   = bpf_tree_less,
 614        .comp   = bpf_tree_comp,
 615};
 616
 617static DEFINE_SPINLOCK(bpf_lock);
 618static LIST_HEAD(bpf_kallsyms);
 619static struct latch_tree_root bpf_tree __cacheline_aligned;
 620
 621void bpf_ksym_add(struct bpf_ksym *ksym)
 622{
 623        spin_lock_bh(&bpf_lock);
 624        WARN_ON_ONCE(!list_empty(&ksym->lnode));
 625        list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms);
 626        latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
 627        spin_unlock_bh(&bpf_lock);
 628}
 629
 630static void __bpf_ksym_del(struct bpf_ksym *ksym)
 631{
 632        if (list_empty(&ksym->lnode))
 633                return;
 634
 635        latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops);
 636        list_del_rcu(&ksym->lnode);
 637}
 638
 639void bpf_ksym_del(struct bpf_ksym *ksym)
 640{
 641        spin_lock_bh(&bpf_lock);
 642        __bpf_ksym_del(ksym);
 643        spin_unlock_bh(&bpf_lock);
 644}
 645
 646static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
 647{
 648        return fp->jited && !bpf_prog_was_classic(fp);
 649}
 650
 651static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
 652{
 653        return list_empty(&fp->aux->ksym.lnode) ||
 654               fp->aux->ksym.lnode.prev == LIST_POISON2;
 655}
 656
 657void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 658{
 659        if (!bpf_prog_kallsyms_candidate(fp) ||
 660            !bpf_capable())
 661                return;
 662
 663        bpf_prog_ksym_set_addr(fp);
 664        bpf_prog_ksym_set_name(fp);
 665        fp->aux->ksym.prog = true;
 666
 667        bpf_ksym_add(&fp->aux->ksym);
 668}
 669
 670void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 671{
 672        if (!bpf_prog_kallsyms_candidate(fp))
 673                return;
 674
 675        bpf_ksym_del(&fp->aux->ksym);
 676}
 677
 678static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
 679{
 680        struct latch_tree_node *n;
 681
 682        n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
 683        return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
 684}
 685
 686const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 687                                 unsigned long *off, char *sym)
 688{
 689        struct bpf_ksym *ksym;
 690        char *ret = NULL;
 691
 692        rcu_read_lock();
 693        ksym = bpf_ksym_find(addr);
 694        if (ksym) {
 695                unsigned long symbol_start = ksym->start;
 696                unsigned long symbol_end = ksym->end;
 697
 698                strncpy(sym, ksym->name, KSYM_NAME_LEN);
 699
 700                ret = sym;
 701                if (size)
 702                        *size = symbol_end - symbol_start;
 703                if (off)
 704                        *off  = addr - symbol_start;
 705        }
 706        rcu_read_unlock();
 707
 708        return ret;
 709}
 710
 711bool is_bpf_text_address(unsigned long addr)
 712{
 713        bool ret;
 714
 715        rcu_read_lock();
 716        ret = bpf_ksym_find(addr) != NULL;
 717        rcu_read_unlock();
 718
 719        return ret;
 720}
 721
 722static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
 723{
 724        struct bpf_ksym *ksym = bpf_ksym_find(addr);
 725
 726        return ksym && ksym->prog ?
 727               container_of(ksym, struct bpf_prog_aux, ksym)->prog :
 728               NULL;
 729}
 730
 731const struct exception_table_entry *search_bpf_extables(unsigned long addr)
 732{
 733        const struct exception_table_entry *e = NULL;
 734        struct bpf_prog *prog;
 735
 736        rcu_read_lock();
 737        prog = bpf_prog_ksym_find(addr);
 738        if (!prog)
 739                goto out;
 740        if (!prog->aux->num_exentries)
 741                goto out;
 742
 743        e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
 744out:
 745        rcu_read_unlock();
 746        return e;
 747}
 748
 749int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 750                    char *sym)
 751{
 752        struct bpf_ksym *ksym;
 753        unsigned int it = 0;
 754        int ret = -ERANGE;
 755
 756        if (!bpf_jit_kallsyms_enabled())
 757                return ret;
 758
 759        rcu_read_lock();
 760        list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) {
 761                if (it++ != symnum)
 762                        continue;
 763
 764                strncpy(sym, ksym->name, KSYM_NAME_LEN);
 765
 766                *value = ksym->start;
 767                *type  = BPF_SYM_ELF_TYPE;
 768
 769                ret = 0;
 770                break;
 771        }
 772        rcu_read_unlock();
 773
 774        return ret;
 775}
 776
 777int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
 778                                struct bpf_jit_poke_descriptor *poke)
 779{
 780        struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
 781        static const u32 poke_tab_max = 1024;
 782        u32 slot = prog->aux->size_poke_tab;
 783        u32 size = slot + 1;
 784
 785        if (size > poke_tab_max)
 786                return -ENOSPC;
 787        if (poke->tailcall_target || poke->tailcall_target_stable ||
 788            poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
 789                return -EINVAL;
 790
 791        switch (poke->reason) {
 792        case BPF_POKE_REASON_TAIL_CALL:
 793                if (!poke->tail_call.map)
 794                        return -EINVAL;
 795                break;
 796        default:
 797                return -EINVAL;
 798        }
 799
 800        tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL);
 801        if (!tab)
 802                return -ENOMEM;
 803
 804        memcpy(&tab[slot], poke, sizeof(*poke));
 805        prog->aux->size_poke_tab = size;
 806        prog->aux->poke_tab = tab;
 807
 808        return slot;
 809}
 810
 811static atomic_long_t bpf_jit_current;
 812
 813/* Can be overridden by an arch's JIT compiler if it has a custom,
 814 * dedicated BPF backend memory area, or if neither of the two
 815 * below apply.
 816 */
 817u64 __weak bpf_jit_alloc_exec_limit(void)
 818{
 819#if defined(MODULES_VADDR)
 820        return MODULES_END - MODULES_VADDR;
 821#else
 822        return VMALLOC_END - VMALLOC_START;
 823#endif
 824}
 825
 826static int __init bpf_jit_charge_init(void)
 827{
 828        /* Only used as heuristic here to derive limit. */
 829        bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
 830        bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
 831                                            PAGE_SIZE), LONG_MAX);
 832        return 0;
 833}
 834pure_initcall(bpf_jit_charge_init);
 835
 836int bpf_jit_charge_modmem(u32 pages)
 837{
 838        if (atomic_long_add_return(pages, &bpf_jit_current) >
 839            (bpf_jit_limit >> PAGE_SHIFT)) {
 840                if (!bpf_capable()) {
 841                        atomic_long_sub(pages, &bpf_jit_current);
 842                        return -EPERM;
 843                }
 844        }
 845
 846        return 0;
 847}
 848
 849void bpf_jit_uncharge_modmem(u32 pages)
 850{
 851        atomic_long_sub(pages, &bpf_jit_current);
 852}
 853
 854void *__weak bpf_jit_alloc_exec(unsigned long size)
 855{
 856        return module_alloc(size);
 857}
 858
 859void __weak bpf_jit_free_exec(void *addr)
 860{
 861        module_memfree(addr);
 862}
 863
 864struct bpf_binary_header *
 865bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 866                     unsigned int alignment,
 867                     bpf_jit_fill_hole_t bpf_fill_ill_insns)
 868{
 869        struct bpf_binary_header *hdr;
 870        u32 size, hole, start, pages;
 871
 872        WARN_ON_ONCE(!is_power_of_2(alignment) ||
 873                     alignment > BPF_IMAGE_ALIGNMENT);
 874
 875        /* Most of BPF filters are really small, but if some of them
 876         * fill a page, allow at least 128 extra bytes to insert a
 877         * random section of illegal instructions.
 878         */
 879        size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
 880        pages = size / PAGE_SIZE;
 881
 882        if (bpf_jit_charge_modmem(pages))
 883                return NULL;
 884        hdr = bpf_jit_alloc_exec(size);
 885        if (!hdr) {
 886                bpf_jit_uncharge_modmem(pages);
 887                return NULL;
 888        }
 889
 890        /* Fill space with illegal/arch-dep instructions. */
 891        bpf_fill_ill_insns(hdr, size);
 892
 893        hdr->pages = pages;
 894        hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 895                     PAGE_SIZE - sizeof(*hdr));
 896        start = (get_random_int() % hole) & ~(alignment - 1);
 897
 898        /* Leave a random number of instructions before BPF code. */
 899        *image_ptr = &hdr->image[start];
 900
 901        return hdr;
 902}
 903
 904void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 905{
 906        u32 pages = hdr->pages;
 907
 908        bpf_jit_free_exec(hdr);
 909        bpf_jit_uncharge_modmem(pages);
 910}
 911
 912/* This symbol is only overridden by archs that have different
 913 * requirements than the usual eBPF JITs, f.e. when they only
 914 * implement cBPF JIT, do not set images read-only, etc.
 915 */
 916void __weak bpf_jit_free(struct bpf_prog *fp)
 917{
 918        if (fp->jited) {
 919                struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
 920
 921                bpf_jit_binary_free(hdr);
 922
 923                WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
 924        }
 925
 926        bpf_prog_unlock_free(fp);
 927}
 928
 929int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 930                          const struct bpf_insn *insn, bool extra_pass,
 931                          u64 *func_addr, bool *func_addr_fixed)
 932{
 933        s16 off = insn->off;
 934        s32 imm = insn->imm;
 935        u8 *addr;
 936
 937        *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
 938        if (!*func_addr_fixed) {
 939                /* Place-holder address till the last pass has collected
 940                 * all addresses for JITed subprograms in which case we
 941                 * can pick them up from prog->aux.
 942                 */
 943                if (!extra_pass)
 944                        addr = NULL;
 945                else if (prog->aux->func &&
 946                         off >= 0 && off < prog->aux->func_cnt)
 947                        addr = (u8 *)prog->aux->func[off]->bpf_func;
 948                else
 949                        return -EINVAL;
 950        } else {
 951                /* Address of a BPF helper call. Since part of the core
 952                 * kernel, it's always at a fixed location. __bpf_call_base
 953                 * and the helper with imm relative to it are both in core
 954                 * kernel.
 955                 */
 956                addr = (u8 *)__bpf_call_base + imm;
 957        }
 958
 959        *func_addr = (unsigned long)addr;
 960        return 0;
 961}
 962
 963static int bpf_jit_blind_insn(const struct bpf_insn *from,
 964                              const struct bpf_insn *aux,
 965                              struct bpf_insn *to_buff,
 966                              bool emit_zext)
 967{
 968        struct bpf_insn *to = to_buff;
 969        u32 imm_rnd = get_random_int();
 970        s16 off;
 971
 972        BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG);
 973        BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
 974
 975        /* Constraints on AX register:
 976         *
 977         * AX register is inaccessible from user space. It is mapped in
 978         * all JITs, and used here for constant blinding rewrites. It is
 979         * typically "stateless" meaning its contents are only valid within
 980         * the executed instruction, but not across several instructions.
 981         * There are a few exceptions however which are further detailed
 982         * below.
 983         *
 984         * Constant blinding is only used by JITs, not in the interpreter.
 985         * The interpreter uses AX in some occasions as a local temporary
 986         * register e.g. in DIV or MOD instructions.
 987         *
 988         * In restricted circumstances, the verifier can also use the AX
 989         * register for rewrites as long as they do not interfere with
 990         * the above cases!
 991         */
 992        if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
 993                goto out;
 994
 995        if (from->imm == 0 &&
 996            (from->code == (BPF_ALU   | BPF_MOV | BPF_K) ||
 997             from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
 998                *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
 999                goto out;
1000        }

1001
1002        switch (from->code) {
1003        case BPF_ALU | BPF_ADD | BPF_K:
1004        case BPF_ALU | BPF_SUB | BPF_K:
1005        case BPF_ALU | BPF_AND | BPF_K:
1006        case BPF_ALU | BPF_OR  | BPF_K:
1007        case BPF_ALU | BPF_XOR | BPF_K:
1008        case BPF_ALU | BPF_MUL | BPF_K:
1009        case BPF_ALU | BPF_MOV | BPF_K:
1010        case BPF_ALU | BPF_DIV | BPF_K:
1011        case BPF_ALU | BPF_MOD | BPF_K:
1012                *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1013                *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1014                *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
1015                break;
1016
1017        case BPF_ALU64 | BPF_ADD | BPF_K:
1018        case BPF_ALU64 | BPF_SUB | BPF_K:
1019        case BPF_ALU64 | BPF_AND | BPF_K:
1020        case BPF_ALU64 | BPF_OR  | BPF_K:
1021        case BPF_ALU64 | BPF_XOR | BPF_K:
1022        case BPF_ALU64 | BPF_MUL | BPF_K:
1023        case BPF_ALU64 | BPF_MOV | BPF_K:
1024        case BPF_ALU64 | BPF_DIV | BPF_K:
1025        case BPF_ALU64 | BPF_MOD | BPF_K:
1026                *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1027                *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1028                *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
1029                break;
1030
1031        case BPF_JMP | BPF_JEQ  | BPF_K:
1032        case BPF_JMP | BPF_JNE  | BPF_K:
1033        case BPF_JMP | BPF_JGT  | BPF_K:
1034        case BPF_JMP | BPF_JLT  | BPF_K:
1035        case BPF_JMP | BPF_JGE  | BPF_K:
1036        case BPF_JMP | BPF_JLE  | BPF_K:
1037        case BPF_JMP | BPF_JSGT | BPF_K:
1038        case BPF_JMP | BPF_JSLT | BPF_K:
1039        case BPF_JMP | BPF_JSGE | BPF_K:
1040        case BPF_JMP | BPF_JSLE | BPF_K:
1041        case BPF_JMP | BPF_JSET | BPF_K:
1042                /* Accommodate for extra offset in case of a backjump. */
1043                off = from->off;
1044                if (off < 0)
1045                        off -= 2;
1046                *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1047                *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1048                *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
1049                break;
1050
1051        case BPF_JMP32 | BPF_JEQ  | BPF_K:
1052        case BPF_JMP32 | BPF_JNE  | BPF_K:
1053        case BPF_JMP32 | BPF_JGT  | BPF_K:
1054        case BPF_JMP32 | BPF_JLT  | BPF_K:
1055        case BPF_JMP32 | BPF_JGE  | BPF_K:
1056        case BPF_JMP32 | BPF_JLE  | BPF_K:
1057        case BPF_JMP32 | BPF_JSGT | BPF_K:
1058        case BPF_JMP32 | BPF_JSLT | BPF_K:
1059        case BPF_JMP32 | BPF_JSGE | BPF_K:
1060        case BPF_JMP32 | BPF_JSLE | BPF_K:
1061        case BPF_JMP32 | BPF_JSET | BPF_K:
1062                /* Accommodate for extra offset in case of a backjump. */
1063                off = from->off;
1064                if (off < 0)
1065                        off -= 2;
1066                *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1067                *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1068                *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1069                                      off);
1070                break;
1071
1072        case BPF_LD | BPF_IMM | BPF_DW:
1073                *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
1074                *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1075                *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
1076                *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
1077                break;
1078        case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
1079                *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
1080                *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1081                if (emit_zext)
1082                        *to++ = BPF_ZEXT_REG(BPF_REG_AX);
1083                *to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
1084                break;
1085
1086        case BPF_ST | BPF_MEM | BPF_DW:
1087        case BPF_ST | BPF_MEM | BPF_W:
1088        case BPF_ST | BPF_MEM | BPF_H:
1089        case BPF_ST | BPF_MEM | BPF_B:
1090                *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1091                *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1092                *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
1093                break;
1094        }
1095out:
1096        return to - to_buff;
1097}
1098
1099static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
1100                                              gfp_t gfp_extra_flags)
1101{
1102        gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
1103        struct bpf_prog *fp;
1104
1105        fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
1106        if (fp != NULL) {
1107                /* aux->prog still points to the fp_other one, so
1108                 * when promoting the clone to the real program,
1109                 * this still needs to be adapted.
1110                 */
1111                memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
1112        }
1113
1114        return fp;
1115}
1116
1117static void bpf_prog_clone_free(struct bpf_prog *fp)
1118{
1119        /* aux was stolen by the other clone, so we cannot free
1120         * it from this path! It will be freed eventually by the
1121         * other program on release.
1122         *
1123         * At this point, we don't need a deferred release since
1124         * clone is guaranteed to not be locked.
1125         */
1126        fp->aux = NULL;
1127        fp->stats = NULL;
1128        fp->active = NULL;
1129        __bpf_prog_free(fp);
1130}
1131
1132void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
1133{
1134        /* We have to repoint aux->prog to self, as we don't
1135         * know whether fp here is the clone or the original.
1136         */
1137        fp->aux->prog = fp;
1138        bpf_prog_clone_free(fp_other);
1139}
1140
1141struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1142{
1143        struct bpf_insn insn_buff[16], aux[2];
1144        struct bpf_prog *clone, *tmp;
1145        int insn_delta, insn_cnt;
1146        struct bpf_insn *insn;
1147        int i, rewritten;
1148
1149        if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
1150                return prog;
1151
1152        clone = bpf_prog_clone_create(prog, GFP_USER);
1153        if (!clone)
1154                return ERR_PTR(-ENOMEM);
1155
1156        insn_cnt = clone->len;
1157        insn = clone->insnsi;
1158
1159        for (i = 0; i < insn_cnt; i++, insn++) {
1160                /* We temporarily need to hold the original ld64 insn
1161                 * so that we can still access the first part in the
1162                 * second blinding run.
1163                 */
1164                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
1165                    insn[1].code == 0)
1166                        memcpy(aux, insn, sizeof(aux));
1167
1168                rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
1169                                                clone->aux->verifier_zext);
1170                if (!rewritten)
1171                        continue;
1172
1173                tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
1174                if (IS_ERR(tmp)) {
1175                        /* Patching may have repointed aux->prog during
1176                         * realloc from the original one, so we need to
1177                         * fix it up here on error.
1178                         */
1179                        bpf_jit_prog_release_other(prog, clone);
1180                        return tmp;
1181                }
1182
1183                clone = tmp;
1184                insn_delta = rewritten - 1;
1185
1186                /* Walk new program and skip insns we just inserted. */
1187                insn = clone->insnsi + i + insn_delta;
1188                insn_cnt += insn_delta;
1189                i        += insn_delta;
1190        }
1191
1192        clone->blinded = 1;
1193        return clone;
1194}
1195#endif /* CONFIG_BPF_JIT */
1196
1197/* Base function for offset calculation. Needs to go into .text section,
1198 * therefore keeping it non-static as well; will also be used by JITs
1199 * anyway later on, so do not let the compiler omit it. This also needs
1200 * to go into kallsyms for correlation from e.g. bpftool, so naming
1201 * must not change.
1202 */
1203noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1204{
1205        return 0;
1206}
1207EXPORT_SYMBOL_GPL(__bpf_call_base);
1208
1209/* All UAPI available opcodes. */
1210#define BPF_INSN_MAP(INSN_2, INSN_3)            \
1211        /* 32 bit ALU operations. */            \
1212        /*   Register based. */                 \
1213        INSN_3(ALU, ADD,  X),                   \
1214        INSN_3(ALU, SUB,  X),                   \
1215        INSN_3(ALU, AND,  X),                   \
1216        INSN_3(ALU, OR,   X),                   \
1217        INSN_3(ALU, LSH,  X),                   \
1218        INSN_3(ALU, RSH,  X),                   \
1219        INSN_3(ALU, XOR,  X),                   \
1220        INSN_3(ALU, MUL,  X),                   \
1221        INSN_3(ALU, MOV,  X),                   \
1222        INSN_3(ALU, ARSH, X),                   \
1223        INSN_3(ALU, DIV,  X),                   \
1224        INSN_3(ALU, MOD,  X),                   \
1225        INSN_2(ALU, NEG),                       \
1226        INSN_3(ALU, END, TO_BE),                \
1227        INSN_3(ALU, END, TO_LE),                \
1228        /*   Immediate based. */                \
1229        INSN_3(ALU, ADD,  K),                   \
1230        INSN_3(ALU, SUB,  K),                   \
1231        INSN_3(ALU, AND,  K),                   \
1232        INSN_3(ALU, OR,   K),                   \
1233        INSN_3(ALU, LSH,  K),                   \
1234        INSN_3(ALU, RSH,  K),                   \
1235        INSN_3(ALU, XOR,  K),                   \
1236        INSN_3(ALU, MUL,  K),                   \
1237        INSN_3(ALU, MOV,  K),                   \
1238        INSN_3(ALU, ARSH, K),                   \
1239        INSN_3(ALU, DIV,  K),                   \
1240        INSN_3(ALU, MOD,  K),                   \
1241        /* 64 bit ALU operations. */            \
1242        /*   Register based. */                 \
1243        INSN_3(ALU64, ADD,  X),                 \
1244        INSN_3(ALU64, SUB,  X),                 \
1245        INSN_3(ALU64, AND,  X),                 \
1246        INSN_3(ALU64, OR,   X),                 \
1247        INSN_3(ALU64, LSH,  X),                 \
1248        INSN_3(ALU64, RSH,  X),                 \
1249        INSN_3(ALU64, XOR,  X),                 \
1250        INSN_3(ALU64, MUL,  X),                 \
1251        INSN_3(ALU64, MOV,  X),                 \
1252        INSN_3(ALU64, ARSH, X),                 \
1253        INSN_3(ALU64, DIV,  X),                 \
1254        INSN_3(ALU64, MOD,  X),                 \
1255        INSN_2(ALU64, NEG),                     \
1256        /*   Immediate based. */                \
1257        INSN_3(ALU64, ADD,  K),                 \
1258        INSN_3(ALU64, SUB,  K),                 \
1259        INSN_3(ALU64, AND,  K),                 \
1260        INSN_3(ALU64, OR,   K),                 \
1261        INSN_3(ALU64, LSH,  K),                 \
1262        INSN_3(ALU64, RSH,  K),                 \
1263        INSN_3(ALU64, XOR,  K),                 \
1264        INSN_3(ALU64, MUL,  K),                 \
1265        INSN_3(ALU64, MOV,  K),                 \
1266        INSN_3(ALU64, ARSH, K),                 \
1267        INSN_3(ALU64, DIV,  K),                 \
1268        INSN_3(ALU64, MOD,  K),                 \
1269        /* Call instruction. */                 \
1270        INSN_2(JMP, CALL),                      \
1271        /* Exit instruction. */                 \
1272        INSN_2(JMP, EXIT),                      \
1273        /* 32-bit Jump instructions. */         \
1274        /*   Register based. */                 \
1275        INSN_3(JMP32, JEQ,  X),                 \
1276        INSN_3(JMP32, JNE,  X),                 \
1277        INSN_3(JMP32, JGT,  X),                 \
1278        INSN_3(JMP32, JLT,  X),                 \
1279        INSN_3(JMP32, JGE,  X),                 \
1280        INSN_3(JMP32, JLE,  X),                 \
1281        INSN_3(JMP32, JSGT, X),                 \
1282        INSN_3(JMP32, JSLT, X),                 \
1283        INSN_3(JMP32, JSGE, X),                 \
1284        INSN_3(JMP32, JSLE, X),                 \
1285        INSN_3(JMP32, JSET, X),                 \
1286        /*   Immediate based. */                \
1287        INSN_3(JMP32, JEQ,  K),                 \
1288        INSN_3(JMP32, JNE,  K),                 \
1289        INSN_3(JMP32, JGT,  K),                 \
1290        INSN_3(JMP32, JLT,  K),                 \
1291        INSN_3(JMP32, JGE,  K),                 \
1292        INSN_3(JMP32, JLE,  K),                 \
1293        INSN_3(JMP32, JSGT, K),                 \
1294        INSN_3(JMP32, JSLT, K),                 \
1295        INSN_3(JMP32, JSGE, K),                 \
1296        INSN_3(JMP32, JSLE, K),                 \
1297        INSN_3(JMP32, JSET, K),                 \
1298        /* Jump instructions. */                \
1299        /*   Register based. */                 \
1300        INSN_3(JMP, JEQ,  X),                   \
1301        INSN_3(JMP, JNE,  X),                   \
1302        INSN_3(JMP, JGT,  X),                   \
1303        INSN_3(JMP, JLT,  X),                   \
1304        INSN_3(JMP, JGE,  X),                   \
1305        INSN_3(JMP, JLE,  X),                   \
1306        INSN_3(JMP, JSGT, X),                   \
1307        INSN_3(JMP, JSLT, X),                   \
1308        INSN_3(JMP, JSGE, X),                   \
1309        INSN_3(JMP, JSLE, X),                   \
1310        INSN_3(JMP, JSET, X),                   \
1311        /*   Immediate based. */                \
1312        INSN_3(JMP, JEQ,  K),                   \
1313        INSN_3(JMP, JNE,  K),                   \
1314        INSN_3(JMP, JGT,  K),                   \
1315        INSN_3(JMP, JLT,  K),                   \
1316        INSN_3(JMP, JGE,  K),                   \
1317        INSN_3(JMP, JLE,  K),                   \
1318        INSN_3(JMP, JSGT, K),                   \
1319        INSN_3(JMP, JSLT, K),                   \
1320        INSN_3(JMP, JSGE, K),                   \
1321        INSN_3(JMP, JSLE, K),                   \
1322        INSN_3(JMP, JSET, K),                   \
1323        INSN_2(JMP, JA),                        \
1324        /* Store instructions. */               \
1325        /*   Register based. */                 \
1326        INSN_3(STX, MEM,  B),                   \
1327        INSN_3(STX, MEM,  H),                   \
1328        INSN_3(STX, MEM,  W),                   \
1329        INSN_3(STX, MEM,  DW),                  \
1330        INSN_3(STX, ATOMIC, W),                 \
1331        INSN_3(STX, ATOMIC, DW),                \
1332        /*   Immediate based. */                \
1333        INSN_3(ST, MEM, B),                     \
1334        INSN_3(ST, MEM, H),                     \
1335        INSN_3(ST, MEM, W),                     \
1336        INSN_3(ST, MEM, DW),                    \
1337        /* Load instructions. */                \
1338        /*   Register based. */                 \
1339        INSN_3(LDX, MEM, B),                    \
1340        INSN_3(LDX, MEM, H),                    \
1341        INSN_3(LDX, MEM, W),                    \
1342        INSN_3(LDX, MEM, DW),                   \
1343        /*   Immediate based. */                \
1344        INSN_3(LD, IMM, DW)
1345
1346bool bpf_opcode_in_insntable(u8 code)
1347{
1348#define BPF_INSN_2_TBL(x, y)    [BPF_##x | BPF_##y] = true
1349#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
1350        static const bool public_insntable[256] = {
1351                [0 ... 255] = false,
1352                /* Now overwrite non-defaults ... */
1353                BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
1354                /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
1355                [BPF_LD | BPF_ABS | BPF_B] = true,
1356                [BPF_LD | BPF_ABS | BPF_H] = true,
1357                [BPF_LD | BPF_ABS | BPF_W] = true,
1358                [BPF_LD | BPF_IND | BPF_B] = true,
1359                [BPF_LD | BPF_IND | BPF_H] = true,
1360                [BPF_LD | BPF_IND | BPF_W] = true,
1361        };
1362#undef BPF_INSN_3_TBL
1363#undef BPF_INSN_2_TBL
1364        return public_insntable[code];
1365}
1366
1367#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1368u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
1369{
1370        memset(dst, 0, size);
1371        return -EFAULT;
1372}
1373
1374/**
1375 *      ___bpf_prog_run - run eBPF program on a given context
1376 *      @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
1377 *      @insn: is the array of eBPF instructions
1378 *
1379 * Decode and execute eBPF instructions.
1380 *
1381 * Return: whatever value is in %BPF_R0 at program exit
1382 */
1383static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
1384{
1385#define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
1386#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
1387        static const void * const jumptable[256] __annotate_jump_table = {
1388                [0 ... 255] = &&default_label,
1389                /* Now overwrite non-defaults ... */
1390                BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
1391                /* Non-UAPI available opcodes. */
1392                [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
1393                [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
1394                [BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
1395                [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
1396                [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
1397                [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
1398                [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
1399        };
1400#undef BPF_INSN_3_LBL
1401#undef BPF_INSN_2_LBL
1402        u32 tail_call_cnt = 0;
1403
1404#define CONT     ({ insn++; goto select_insn; })
1405#define CONT_JMP ({ insn++; goto select_insn; })
1406
1407select_insn:
1408        goto *jumptable[insn->code];
1409
1410        /* Explicitly mask the register-based shift amounts with 63 or 31
1411         * to avoid undefined behavior. Normally this won't affect the
1412         * generated code, for example, in case of native 64 bit archs such
1413         * as x86-64 or arm64, the compiler is optimizing the AND away for
1414         * the interpreter. In case of JITs, each of the JIT backends compiles
1415         * the BPF shift operations to machine instructions which produce
1416         * implementation-defined results in such a case; the resulting
1417         * contents of the register may be arbitrary, but program behaviour
1418         * as a whole remains defined. In other words, in case of JIT backends,
1419         * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
1420         */
1421        /* ALU (shifts) */
1422#define SHT(OPCODE, OP)                                 \
1423        ALU64_##OPCODE##_X:                             \
1424                DST = DST OP (SRC & 63);                \
1425                CONT;                                   \
1426        ALU_##OPCODE##_X:                               \
1427                DST = (u32) DST OP ((u32) SRC & 31);    \
1428                CONT;                                   \
1429        ALU64_##OPCODE##_K:                             \
1430                DST = DST OP IMM;                       \
1431                CONT;                                   \
1432        ALU_##OPCODE##_K:                               \
1433                DST = (u32) DST OP (u32) IMM;           \
1434                CONT;
1435        /* ALU (rest) */
1436#define ALU(OPCODE, OP)                                 \
1437        ALU64_##OPCODE##_X:                             \
1438                DST = DST OP SRC;                       \
1439                CONT;                                   \
1440        ALU_##OPCODE##_X:                               \
1441                DST = (u32) DST OP (u32) SRC;           \
1442                CONT;                                   \
1443        ALU64_##OPCODE##_K:                             \
1444                DST = DST OP IMM;                       \
1445                CONT;                                   \
1446        ALU_##OPCODE##_K:                               \
1447                DST = (u32) DST OP (u32) IMM;           \
1448                CONT;
1449        ALU(ADD,  +)
1450        ALU(SUB,  -)
1451        ALU(AND,  &)
1452        ALU(OR,   |)
1453        ALU(XOR,  ^)
1454        ALU(MUL,  *)
1455        SHT(LSH, <<)
1456        SHT(RSH, >>)
1457#undef SHT
1458#undef ALU
1459        ALU_NEG:
1460                DST = (u32) -DST;
1461                CONT;
1462        ALU64_NEG:
1463                DST = -DST;
1464                CONT;
1465        ALU_MOV_X:
1466                DST = (u32) SRC;
1467                CONT;
1468        ALU_MOV_K:
1469                DST = (u32) IMM;
1470                CONT;
1471        ALU64_MOV_X:
1472                DST = SRC;
1473                CONT;
1474        ALU64_MOV_K:
1475                DST = IMM;
1476                CONT;
1477        LD_IMM_DW:
1478                DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
1479                insn++;
1480                CONT;
1481        ALU_ARSH_X:
1482                DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
1483                CONT;
1484        ALU_ARSH_K:
1485                DST = (u64) (u32) (((s32) DST) >> IMM);
1486                CONT;
1487        ALU64_ARSH_X:
1488                (*(s64 *) &DST) >>= (SRC & 63);
1489                CONT;
1490        ALU64_ARSH_K:
1491                (*(s64 *) &DST) >>= IMM;
1492                CONT;
1493        ALU64_MOD_X:
1494                div64_u64_rem(DST, SRC, &AX);
1495                DST = AX;
1496                CONT;
1497        ALU_MOD_X:
1498                AX = (u32) DST;
1499                DST = do_div(AX, (u32) SRC);
1500                CONT;
1501        ALU64_MOD_K:
1502                div64_u64_rem(DST, IMM, &AX);
1503                DST = AX;
1504                CONT;
1505        ALU_MOD_K:
1506                AX = (u32) DST;
1507                DST = do_div(AX, (u32) IMM);
1508                CONT;
1509        ALU64_DIV_X:
1510                DST = div64_u64(DST, SRC);
1511                CONT;
1512        ALU_DIV_X:
1513                AX = (u32) DST;
1514                do_div(AX, (u32) SRC);
1515                DST = (u32) AX;
1516                CONT;
1517        ALU64_DIV_K:
1518                DST = div64_u64(DST, IMM);
1519                CONT;
1520        ALU_DIV_K:
1521                AX = (u32) DST;
1522                do_div(AX, (u32) IMM);
1523                DST = (u32) AX;
1524                CONT;
1525        ALU_END_TO_BE:
1526                switch (IMM) {
1527                case 16:
1528                        DST = (__force u16) cpu_to_be16(DST);
1529                        break;
1530                case 32:
1531                        DST = (__force u32) cpu_to_be32(DST);
1532                        break;
1533                case 64:
1534                        DST = (__force u64) cpu_to_be64(DST);
1535                        break;
1536                }
1537                CONT;
1538        ALU_END_TO_LE:
1539                switch (IMM) {
1540                case 16:
1541                        DST = (__force u16) cpu_to_le16(DST);
1542                        break;
1543                case 32:
1544                        DST = (__force u32) cpu_to_le32(DST);
1545                        break;
1546                case 64:
1547                        DST = (__force u64) cpu_to_le64(DST);
1548                        break;
1549                }
1550                CONT;
1551
1552        /* CALL */
1553        JMP_CALL:
1554                /* Function call scratches BPF_R1-BPF_R5 registers,
1555                 * preserves BPF_R6-BPF_R9, and stores return value
1556                 * into BPF_R0.
1557                 */
1558                BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
1559                                                       BPF_R4, BPF_R5);
1560                CONT;
1561
1562        JMP_CALL_ARGS:
1563                BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
1564                                                            BPF_R3, BPF_R4,
1565                                                            BPF_R5,
1566                                                            insn + insn->off + 1);
1567                CONT;
1568
1569        JMP_TAIL_CALL: {
1570                struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1571                struct bpf_array *array = container_of(map, struct bpf_array, map);
1572                struct bpf_prog *prog;
1573                u32 index = BPF_R3;
1574
1575                if (unlikely(index >= array->map.max_entries))
1576                        goto out;
1577                if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
1578                        goto out;
1579
1580                tail_call_cnt++;
1581
1582                prog = READ_ONCE(array->ptrs[index]);
1583                if (!prog)
1584                        goto out;
1585
1586                /* ARG1 at this point is guaranteed to point to CTX from
1587                 * the verifier side due to the fact that the tail call is
1588                 * handled like a helper, that is, bpf_tail_call_proto,
1589                 * where arg1_type is ARG_PTR_TO_CTX.
1590                 */
1591                insn = prog->insnsi;
1592                goto select_insn;
1593out:
1594                CONT;
1595        }
1596        JMP_JA:
1597                insn += insn->off;
1598                CONT;
1599        JMP_EXIT:
1600                return BPF_R0;
1601        /* JMP */
1602#define COND_JMP(SIGN, OPCODE, CMP_OP)                          \
1603        JMP_##OPCODE##_X:                                       \
1604                if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) {     \
1605                        insn += insn->off;                      \
1606                        CONT_JMP;                               \
1607                }                                               \
1608                CONT;                                           \
1609        JMP32_##OPCODE##_X:                                     \
1610                if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) {     \
1611                        insn += insn->off;                      \
1612                        CONT_JMP;                               \
1613                }                                               \
1614                CONT;                                           \
1615        JMP_##OPCODE##_K:                                       \
1616                if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) {     \
1617                        insn += insn->off;                      \
1618                        CONT_JMP;                               \
1619                }                                               \
1620                CONT;                                           \
1621        JMP32_##OPCODE##_K:                                     \
1622                if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) {     \
1623                        insn += insn->off;                      \
1624                        CONT_JMP;                               \
1625                }                                               \
1626                CONT;
1627        COND_JMP(u, JEQ, ==)
1628        COND_JMP(u, JNE, !=)
1629        COND_JMP(u, JGT, >)
1630        COND_JMP(u, JLT, <)
1631        COND_JMP(u, JGE, >=)
1632        COND_JMP(u, JLE, <=)
1633        COND_JMP(u, JSET, &)
1634        COND_JMP(s, JSGT, >)
1635        COND_JMP(s, JSLT, <)
1636        COND_JMP(s, JSGE, >=)
1637        COND_JMP(s, JSLE, <=)
1638#undef COND_JMP
1639        /* ST, STX and LDX*/
1640        ST_NOSPEC:
1641                /* Speculation barrier for mitigating Speculative Store Bypass.
1642                 * In case of arm64, we rely on the firmware mitigation as
1643                 * controlled via the ssbd kernel parameter. Whenever the
1644                 * mitigation is enabled, it works for all of the kernel code
1645                 * with no need to provide any additional instructions here.
1646                 * In case of x86, we use 'lfence' insn for mitigation. We
1647                 * reuse preexisting logic from Spectre v1 mitigation that
1648                 * happens to produce the required code on x86 for v4 as well.
1649                 */
1650#ifdef CONFIG_X86
1651                barrier_nospec();
1652#endif
1653                CONT;
1654#define LDST(SIZEOP, SIZE)                                              \
1655        STX_MEM_##SIZEOP:                                               \
1656                *(SIZE *)(unsigned long) (DST + insn->off) = SRC;       \
1657                CONT;                                                   \
1658        ST_MEM_##SIZEOP:                                                \
1659                *(SIZE *)(unsigned long) (DST + insn->off) = IMM;       \
1660                CONT;                                                   \
1661        LDX_MEM_##SIZEOP:                                               \
1662                DST = *(SIZE *)(unsigned long) (SRC + insn->off);       \
1663                CONT;
1664
1665        LDST(B,   u8)
1666        LDST(H,  u16)
1667        LDST(W,  u32)
1668        LDST(DW, u64)
1669#undef LDST
1670#define LDX_PROBE(SIZEOP, SIZE)                                                 \
1671        LDX_PROBE_MEM_##SIZEOP:                                                 \
1672                bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off));      \
1673                CONT;
1674        LDX_PROBE(B,  1)
1675        LDX_PROBE(H,  2)
1676        LDX_PROBE(W,  4)
1677        LDX_PROBE(DW, 8)
1678#undef LDX_PROBE
1679
1680#define ATOMIC_ALU_OP(BOP, KOP)                                         \
1681                case BOP:                                               \
1682                        if (BPF_SIZE(insn->code) == BPF_W)              \
1683                                atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \
1684                                             (DST + insn->off));        \
1685                        else                                            \
1686                                atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \
1687                                               (DST + insn->off));      \
1688                        break;                                          \
1689                case BOP | BPF_FETCH:                                   \
1690                        if (BPF_SIZE(insn->code) == BPF_W)              \
1691                                SRC = (u32) atomic_fetch_##KOP(         \
1692                                        (u32) SRC,                      \
1693                                        (atomic_t *)(unsigned long) (DST + insn->off)); \
1694                        else                                            \
1695                                SRC = (u64) atomic64_fetch_##KOP(       \
1696                                        (u64) SRC,                      \
1697                                        (atomic64_t *)(unsigned long) (DST + insn->off)); \
1698                        break;
1699
1700        STX_ATOMIC_DW:
1701        STX_ATOMIC_W:
1702                switch (IMM) {
1703                ATOMIC_ALU_OP(BPF_ADD, add)
1704                ATOMIC_ALU_OP(BPF_AND, and)
1705                ATOMIC_ALU_OP(BPF_OR, or)
1706                ATOMIC_ALU_OP(BPF_XOR, xor)
1707#undef ATOMIC_ALU_OP
1708
1709                case BPF_XCHG:
1710                        if (BPF_SIZE(insn->code) == BPF_W)
1711                                SRC = (u32) atomic_xchg(
1712                                        (atomic_t *)(unsigned long) (DST + insn->off),
1713                                        (u32) SRC);
1714                        else
1715                                SRC = (u64) atomic64_xchg(
1716                                        (atomic64_t *)(unsigned long) (DST + insn->off),
1717                                        (u64) SRC);
1718                        break;
1719                case BPF_CMPXCHG:
1720                        if (BPF_SIZE(insn->code) == BPF_W)
1721                                BPF_R0 = (u32) atomic_cmpxchg(
1722                                        (atomic_t *)(unsigned long) (DST + insn->off),
1723                                        (u32) BPF_R0, (u32) SRC);
1724                        else
1725                                BPF_R0 = (u64) atomic64_cmpxchg(
1726                                        (atomic64_t *)(unsigned long) (DST + insn->off),
1727                                        (u64) BPF_R0, (u64) SRC);
1728                        break;
1729
1730                default:
1731                        goto default_label;
1732                }
1733                CONT;
1734
1735        default_label:
1736                /* If we ever reach this, we have a bug somewhere. Die hard here
1737                 * instead of just returning 0; we could be somewhere in a subprog,
1738                 * so execution could continue otherwise which we do /not/ want.
1739                 *
1740                 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
1741                 */
1742                pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n",
1743                        insn->code, insn->imm);
1744                BUG_ON(1);
1745                return 0;
1746}
1747
1748#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
1749#define DEFINE_BPF_PROG_RUN(stack_size) \
1750static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
1751{ \
1752        u64 stack[stack_size / sizeof(u64)]; \
1753        u64 regs[MAX_BPF_EXT_REG]; \
1754\
1755        FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1756        ARG1 = (u64) (unsigned long) ctx; \
1757        return ___bpf_prog_run(regs, insn); \
1758}
1759
1760#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
1761#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
1762static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
1763                                      const struct bpf_insn *insn) \
1764{ \
1765        u64 stack[stack_size / sizeof(u64)]; \
1766        u64 regs[MAX_BPF_EXT_REG]; \
1767\
1768        FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1769        BPF_R1 = r1; \
1770        BPF_R2 = r2; \
1771        BPF_R3 = r3; \
1772        BPF_R4 = r4; \
1773        BPF_R5 = r5; \
1774        return ___bpf_prog_run(regs, insn); \
1775}
1776
1777#define EVAL1(FN, X) FN(X)
1778#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
1779#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
1780#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
1781#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
1782#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
1783
1784EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
1785EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
1786EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
1787
1788EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
1789EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
1790EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
1791
1792#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
1793
1794static unsigned int (*interpreters[])(const void *ctx,
1795                                      const struct bpf_insn *insn) = {
1796EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1797EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1798EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1799};
1800#undef PROG_NAME_LIST
1801#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
1802static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
1803                                  const struct bpf_insn *insn) = {
1804EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1805EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1806EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1807};
1808#undef PROG_NAME_LIST
1809
1810void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
1811{
1812        stack_depth = max_t(u32, stack_depth, 1);
1813        insn->off = (s16) insn->imm;
1814        insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
1815                __bpf_call_base_args;
1816        insn->code = BPF_JMP | BPF_CALL_ARGS;
1817}
1818
1819#else
1820static unsigned int __bpf_prog_ret0_warn(const void *ctx,
1821                                         const struct bpf_insn *insn)
1822{
1823        /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
1824         * is not working properly, so warn about it!
1825         */
1826        WARN_ON_ONCE(1);
1827        return 0;
1828}
1829#endif
1830
1831bool bpf_prog_array_compatible(struct bpf_array *array,
1832                               const struct bpf_prog *fp)
1833{
1834        bool ret;
1835
1836        if (fp->kprobe_override)
1837                return false;
1838
1839        spin_lock(&array->aux->owner.lock);
1840
1841        if (!array->aux->owner.type) {
1842                /* There's no owner yet where we could check for
1843                 * compatibility.
1844                 */
1845                array->aux->owner.type  = fp->type;
1846                array->aux->owner.jited = fp->jited;
1847                ret = true;
1848        } else {
1849                ret = array->aux->owner.type  == fp->type &&
1850                      array->aux->owner.jited == fp->jited;
1851        }
1852        spin_unlock(&array->aux->owner.lock);
1853        return ret;
1854}
1855
1856static int bpf_check_tail_call(const struct bpf_prog *fp)
1857{
1858        struct bpf_prog_aux *aux = fp->aux;
1859        int i, ret = 0;
1860
1861        mutex_lock(&aux->used_maps_mutex);
1862        for (i = 0; i < aux->used_map_cnt; i++) {
1863                struct bpf_map *map = aux->used_maps[i];
1864                struct bpf_array *array;
1865
1866                if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1867                        continue;
1868
1869                array = container_of(map, struct bpf_array, map);
1870                if (!bpf_prog_array_compatible(array, fp)) {
1871                        ret = -EINVAL;
1872                        goto out;
1873                }
1874        }
1875
1876out:
1877        mutex_unlock(&aux->used_maps_mutex);
1878        return ret;
1879}
1880
1881static void bpf_prog_select_func(struct bpf_prog *fp)
1882{
1883#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1884        u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1885
1886        fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1887#else
1888        fp->bpf_func = __bpf_prog_ret0_warn;
1889#endif
1890}
1891
1892/**
1893 *      bpf_prog_select_runtime - select exec runtime for BPF program
1894 *      @fp: bpf_prog populated with internal BPF program
1895 *      @err: pointer to error variable
1896 *
1897 * Try to JIT eBPF program, if JIT is not available, use interpreter.
1898 * The BPF program will be executed via bpf_prog_run() function.
1899 *
1900 * Return: the &fp argument along with &err set to 0 for success or
1901 * a negative errno code on failure
1902 */
1903struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1904{
1905        /* In case of BPF to BPF calls, verifier did all the prep
1906         * work with regards to JITing, etc.
1907         */
1908        bool jit_needed = false;
1909
1910        if (fp->bpf_func)
1911                goto finalize;
1912
1913        if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) ||
1914            bpf_prog_has_kfunc_call(fp))
1915                jit_needed = true;
1916
1917        bpf_prog_select_func(fp);
1918
1919        /* eBPF JITs can rewrite the program in case constant
1920         * blinding is active. However, in case of error during
1921         * blinding, bpf_int_jit_compile() must always return a
1922         * valid program, which in this case would simply not
1923         * be JITed, but falls back to the interpreter.
1924         */
1925        if (!bpf_prog_is_dev_bound(fp->aux)) {
1926                *err = bpf_prog_alloc_jited_linfo(fp);
1927                if (*err)
1928                        return fp;
1929
1930                fp = bpf_int_jit_compile(fp);
1931                bpf_prog_jit_attempt_done(fp);
1932                if (!fp->jited && jit_needed) {
1933                        *err = -ENOTSUPP;
1934                        return fp;
1935                }
1936        } else {
1937                *err = bpf_prog_offload_compile(fp);
1938                if (*err)
1939                        return fp;
1940        }
1941
1942finalize:
1943        bpf_prog_lock_ro(fp);
1944
1945        /* The tail call compatibility check can only be done at
1946         * this late stage as we need to determine, if we deal
1947         * with JITed or non JITed program concatenations and not
1948         * all eBPF JITs might immediately support all features.
1949         */
1950        *err = bpf_check_tail_call(fp);
1951
1952        return fp;
1953}
1954EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
1955
1956static unsigned int __bpf_prog_ret1(const void *ctx,
1957                                    const struct bpf_insn *insn)
1958{
1959        return 1;
1960}
1961
1962static struct bpf_prog_dummy {
1963        struct bpf_prog prog;
1964} dummy_bpf_prog = {
1965        .prog = {
1966                .bpf_func = __bpf_prog_ret1,
1967        },
1968};
1969
1970/* to avoid allocating empty bpf_prog_array for cgroups that
1971 * don't have bpf program attached use one global 'empty_prog_array'
1972 * It will not be modified the caller of bpf_prog_array_alloc()
1973 * (since caller requested prog_cnt == 0)
1974 * that pointer should be 'freed' by bpf_prog_array_free()
1975 */
1976static struct {
1977        struct bpf_prog_array hdr;
1978        struct bpf_prog *null_prog;
1979} empty_prog_array = {
1980        .null_prog = NULL,
1981};
1982
1983struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
1984{
1985        if (prog_cnt)
1986                return kzalloc(sizeof(struct bpf_prog_array) +
1987                               sizeof(struct bpf_prog_array_item) *
1988                               (prog_cnt + 1),
1989                               flags);
1990
1991        return &empty_prog_array.hdr;
1992}
1993
1994void bpf_prog_array_free(struct bpf_prog_array *progs)
1995{
1996        if (!progs || progs == &empty_prog_array.hdr)
1997                return;
1998        kfree_rcu(progs, rcu);
1999}
2000

2001int bpf_prog_array_length(struct bpf_prog_array *array)
2002{
2003        struct bpf_prog_array_item *item;
2004        u32 cnt = 0;
2005
2006        for (item = array->items; item->prog; item++)
2007                if (item->prog != &dummy_bpf_prog.prog)
2008                        cnt++;
2009        return cnt;
2010}
2011
2012bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
2013{
2014        struct bpf_prog_array_item *item;
2015
2016        for (item = array->items; item->prog; item++)
2017                if (item->prog != &dummy_bpf_prog.prog)
2018                        return false;
2019        return true;
2020}
2021
2022static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
2023                                     u32 *prog_ids,
2024                                     u32 request_cnt)
2025{
2026        struct bpf_prog_array_item *item;
2027        int i = 0;
2028
2029        for (item = array->items; item->prog; item++) {
2030                if (item->prog == &dummy_bpf_prog.prog)
2031                        continue;
2032                prog_ids[i] = item->prog->aux->id;
2033                if (++i == request_cnt) {
2034                        item++;
2035                        break;
2036                }
2037        }
2038
2039        return !!(item->prog);
2040}
2041
2042int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
2043                                __u32 __user *prog_ids, u32 cnt)
2044{
2045        unsigned long err = 0;
2046        bool nospc;
2047        u32 *ids;
2048
2049        /* users of this function are doing:
2050         * cnt = bpf_prog_array_length();
2051         * if (cnt > 0)
2052         *     bpf_prog_array_copy_to_user(..., cnt);
2053         * so below kcalloc doesn't need extra cnt > 0 check.
2054         */
2055        ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
2056        if (!ids)
2057                return -ENOMEM;
2058        nospc = bpf_prog_array_copy_core(array, ids, cnt);
2059        err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
2060        kfree(ids);
2061        if (err)
2062                return -EFAULT;
2063        if (nospc)
2064                return -ENOSPC;
2065        return 0;
2066}
2067
2068void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
2069                                struct bpf_prog *old_prog)
2070{
2071        struct bpf_prog_array_item *item;
2072
2073        for (item = array->items; item->prog; item++)
2074                if (item->prog == old_prog) {
2075                        WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
2076                        break;
2077                }
2078}
2079
2080/**
2081 * bpf_prog_array_delete_safe_at() - Replaces the program at the given
2082 *                                   index into the program array with
2083 *                                   a dummy no-op program.
2084 * @array: a bpf_prog_array
2085 * @index: the index of the program to replace
2086 *
2087 * Skips over dummy programs, by not counting them, when calculating
2088 * the position of the program to replace.
2089 *
2090 * Return:
2091 * * 0          - Success
2092 * * -EINVAL    - Invalid index value. Must be a non-negative integer.
2093 * * -ENOENT    - Index out of range
2094 */
2095int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
2096{
2097        return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
2098}
2099
2100/**
2101 * bpf_prog_array_update_at() - Updates the program at the given index
2102 *                              into the program array.
2103 * @array: a bpf_prog_array
2104 * @index: the index of the program to update
2105 * @prog: the program to insert into the array
2106 *
2107 * Skips over dummy programs, by not counting them, when calculating
2108 * the position of the program to update.
2109 *
2110 * Return:
2111 * * 0          - Success
2112 * * -EINVAL    - Invalid index value. Must be a non-negative integer.
2113 * * -ENOENT    - Index out of range
2114 */
2115int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
2116                             struct bpf_prog *prog)
2117{
2118        struct bpf_prog_array_item *item;
2119
2120        if (unlikely(index < 0))
2121                return -EINVAL;
2122
2123        for (item = array->items; item->prog; item++) {
2124                if (item->prog == &dummy_bpf_prog.prog)
2125                        continue;
2126                if (!index) {
2127                        WRITE_ONCE(item->prog, prog);
2128                        return 0;
2129                }
2130                index--;
2131        }
2132        return -ENOENT;
2133}
2134
2135int bpf_prog_array_copy(struct bpf_prog_array *old_array,
2136                        struct bpf_prog *exclude_prog,
2137                        struct bpf_prog *include_prog,
2138                        u64 bpf_cookie,
2139                        struct bpf_prog_array **new_array)
2140{
2141        int new_prog_cnt, carry_prog_cnt = 0;
2142        struct bpf_prog_array_item *existing, *new;
2143        struct bpf_prog_array *array;
2144        bool found_exclude = false;
2145
2146        /* Figure out how many existing progs we need to carry over to
2147         * the new array.
2148         */
2149        if (old_array) {
2150                existing = old_array->items;
2151                for (; existing->prog; existing++) {
2152                        if (existing->prog == exclude_prog) {
2153                                found_exclude = true;
2154                                continue;
2155                        }
2156                        if (existing->prog != &dummy_bpf_prog.prog)
2157                                carry_prog_cnt++;
2158                        if (existing->prog == include_prog)
2159                                return -EEXIST;
2160                }
2161        }
2162
2163        if (exclude_prog && !found_exclude)
2164                return -ENOENT;
2165
2166        /* How many progs (not NULL) will be in the new array? */
2167        new_prog_cnt = carry_prog_cnt;
2168        if (include_prog)
2169                new_prog_cnt += 1;
2170
2171        /* Do we have any prog (not NULL) in the new array? */
2172        if (!new_prog_cnt) {
2173                *new_array = NULL;
2174                return 0;
2175        }
2176
2177        /* +1 as the end of prog_array is marked with NULL */
2178        array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
2179        if (!array)
2180                return -ENOMEM;
2181        new = array->items;
2182
2183        /* Fill in the new prog array */
2184        if (carry_prog_cnt) {
2185                existing = old_array->items;
2186                for (; existing->prog; existing++) {
2187                        if (existing->prog == exclude_prog ||
2188                            existing->prog == &dummy_bpf_prog.prog)
2189                                continue;
2190
2191                        new->prog = existing->prog;
2192                        new->bpf_cookie = existing->bpf_cookie;
2193                        new++;
2194                }
2195        }
2196        if (include_prog) {
2197                new->prog = include_prog;
2198                new->bpf_cookie = bpf_cookie;
2199                new++;
2200        }
2201        new->prog = NULL;
2202        *new_array = array;
2203        return 0;
2204}
2205
2206int bpf_prog_array_copy_info(struct bpf_prog_array *array,
2207                             u32 *prog_ids, u32 request_cnt,
2208                             u32 *prog_cnt)
2209{
2210        u32 cnt = 0;
2211
2212        if (array)
2213                cnt = bpf_prog_array_length(array);
2214
2215        *prog_cnt = cnt;
2216
2217        /* return early if user requested only program count or nothing to copy */
2218        if (!request_cnt || !cnt)
2219                return 0;
2220
2221        /* this function is called under trace/bpf_trace.c: bpf_event_mutex */
2222        return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
2223                                                                     : 0;
2224}
2225
2226void __bpf_free_used_maps(struct bpf_prog_aux *aux,
2227                          struct bpf_map **used_maps, u32 len)
2228{
2229        struct bpf_map *map;
2230        u32 i;
2231
2232        for (i = 0; i < len; i++) {
2233                map = used_maps[i];
2234                if (map->ops->map_poke_untrack)
2235                        map->ops->map_poke_untrack(map, aux);
2236                bpf_map_put(map);
2237        }
2238}
2239
2240static void bpf_free_used_maps(struct bpf_prog_aux *aux)
2241{
2242        __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
2243        kfree(aux->used_maps);
2244}
2245
2246void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
2247                          struct btf_mod_pair *used_btfs, u32 len)
2248{
2249#ifdef CONFIG_BPF_SYSCALL
2250        struct btf_mod_pair *btf_mod;
2251        u32 i;
2252
2253        for (i = 0; i < len; i++) {
2254                btf_mod = &used_btfs[i];
2255                if (btf_mod->module)
2256                        module_put(btf_mod->module);
2257                btf_put(btf_mod->btf);
2258        }
2259#endif
2260}
2261
2262static void bpf_free_used_btfs(struct bpf_prog_aux *aux)
2263{
2264        __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt);
2265        kfree(aux->used_btfs);
2266}
2267
2268static void bpf_prog_free_deferred(struct work_struct *work)
2269{
2270        struct bpf_prog_aux *aux;
2271        int i;
2272
2273        aux = container_of(work, struct bpf_prog_aux, work);
2274#ifdef CONFIG_BPF_SYSCALL
2275        bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
2276#endif
2277        bpf_free_used_maps(aux);
2278        bpf_free_used_btfs(aux);
2279        if (bpf_prog_is_dev_bound(aux))
2280                bpf_prog_offload_destroy(aux->prog);
2281#ifdef CONFIG_PERF_EVENTS
2282        if (aux->prog->has_callchain_buf)
2283                put_callchain_buffers();
2284#endif
2285        if (aux->dst_trampoline)
2286                bpf_trampoline_put(aux->dst_trampoline);
2287        for (i = 0; i < aux->func_cnt; i++) {
2288                /* We can just unlink the subprog poke descriptor table as
2289                 * it was originally linked to the main program and is also
2290                 * released along with it.
2291                 */
2292                aux->func[i]->aux->poke_tab = NULL;
2293                bpf_jit_free(aux->func[i]);
2294        }
2295        if (aux->func_cnt) {
2296                kfree(aux->func);
2297                bpf_prog_unlock_free(aux->prog);
2298        } else {
2299                bpf_jit_free(aux->prog);
2300        }
2301}
2302
2303/* Free internal BPF program */
2304void bpf_prog_free(struct bpf_prog *fp)
2305{
2306        struct bpf_prog_aux *aux = fp->aux;
2307
2308        if (aux->dst_prog)
2309                bpf_prog_put(aux->dst_prog);
2310        INIT_WORK(&aux->work, bpf_prog_free_deferred);
2311        schedule_work(&aux->work);
2312}
2313EXPORT_SYMBOL_GPL(bpf_prog_free);
2314
2315/* RNG for unpriviledged user space with separated state from prandom_u32(). */
2316static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
2317
2318void bpf_user_rnd_init_once(void)
2319{
2320        prandom_init_once(&bpf_user_rnd_state);
2321}
2322
2323BPF_CALL_0(bpf_user_rnd_u32)
2324{
2325        /* Should someone ever have the rather unwise idea to use some
2326         * of the registers passed into this function, then note that
2327         * this function is called from native eBPF and classic-to-eBPF
2328         * transformations. Register assignments from both sides are
2329         * different, f.e. classic always sets fn(ctx, A, X) here.
2330         */
2331        struct rnd_state *state;
2332        u32 res;
2333
2334        state = &get_cpu_var(bpf_user_rnd_state);
2335        res = prandom_u32_state(state);
2336        put_cpu_var(bpf_user_rnd_state);
2337
2338        return res;
2339}
2340
2341BPF_CALL_0(bpf_get_raw_cpu_id)
2342{
2343        return raw_smp_processor_id();
2344}
2345
2346/* Weak definitions of helper functions in case we don't have bpf syscall. */
2347const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
2348const struct bpf_func_proto bpf_map_update_elem_proto __weak;
2349const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
2350const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2351const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2352const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
2353const struct bpf_func_proto bpf_spin_lock_proto __weak;
2354const struct bpf_func_proto bpf_spin_unlock_proto __weak;
2355const struct bpf_func_proto bpf_jiffies64_proto __weak;
2356
2357const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
2358const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
2359const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
2360const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
2361const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
2362const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
2363
2364const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
2365const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
2366const struct bpf_func_proto bpf_get_current_comm_proto __weak;
2367const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
2368const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
2369const struct bpf_func_proto bpf_get_local_storage_proto __weak;
2370const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
2371const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
2372const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
2373
2374const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
2375{
2376        return NULL;
2377}
2378
2379const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
2380{
2381        return NULL;
2382}
2383
2384u64 __weak
2385bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
2386                 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
2387{
2388        return -ENOTSUPP;
2389}
2390EXPORT_SYMBOL_GPL(bpf_event_output);
2391
2392/* Always built-in helper functions. */
2393const struct bpf_func_proto bpf_tail_call_proto = {
2394        .func           = NULL,
2395        .gpl_only       = false,
2396        .ret_type       = RET_VOID,
2397        .arg1_type      = ARG_PTR_TO_CTX,
2398        .arg2_type      = ARG_CONST_MAP_PTR,
2399        .arg3_type      = ARG_ANYTHING,
2400};
2401
2402/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
2403 * It is encouraged to implement bpf_int_jit_compile() instead, so that
2404 * eBPF and implicitly also cBPF can get JITed!
2405 */
2406struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
2407{
2408        return prog;
2409}
2410
2411/* Stub for JITs that support eBPF. All cBPF code gets transformed into
2412 * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
2413 */
2414void __weak bpf_jit_compile(struct bpf_prog *prog)
2415{
2416}
2417
2418bool __weak bpf_helper_changes_pkt_data(void *func)
2419{
2420        return false;
2421}
2422
2423/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
2424 * analysis code and wants explicit zero extension inserted by verifier.
2425 * Otherwise, return FALSE.
2426 *
2427 * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if
2428 * you don't override this. JITs that don't want these extra insns can detect
2429 * them using insn_is_zext.
2430 */
2431bool __weak bpf_jit_needs_zext(void)
2432{
2433        return false;
2434}
2435
2436bool __weak bpf_jit_supports_kfunc_call(void)
2437{
2438        return false;
2439}
2440
2441/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
2442 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
2443 */
2444int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
2445                         int len)
2446{
2447        return -EFAULT;
2448}
2449
2450int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2451                              void *addr1, void *addr2)
2452{
2453        return -ENOTSUPP;
2454}
2455
2456DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2457EXPORT_SYMBOL(bpf_stats_enabled_key);
2458
2459/* All definitions of tracepoints related to BPF. */
2460#define CREATE_TRACE_POINTS
2461#include <linux/bpf_trace.h>
2462
2463EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
2464EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
2465