linux/arch/x86/net/bpf_jit_comp.c
<<
>>
Prefs
   1/* bpf_jit_comp.c : BPF JIT compiler
   2 *
   3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
   4 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; version 2
   9 * of the License.
  10 */
  11#include <linux/netdevice.h>
  12#include <linux/filter.h>
  13#include <linux/if_vlan.h>
  14#include <asm/cacheflush.h>
  15#include <asm/set_memory.h>
  16#include <linux/bpf.h>
  17
  18int bpf_jit_enable __read_mostly;
  19
  20/*
  21 * assembly code in arch/x86/net/bpf_jit.S
  22 */
  23extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  24extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
  25extern u8 sk_load_byte_positive_offset[];
  26extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
  27extern u8 sk_load_byte_negative_offset[];
  28
  29static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  30{
  31        if (len == 1)
  32                *ptr = bytes;
  33        else if (len == 2)
  34                *(u16 *)ptr = bytes;
  35        else {
  36                *(u32 *)ptr = bytes;
  37                barrier();
  38        }
  39        return ptr + len;
  40}
  41
  42#define EMIT(bytes, len) \
  43        do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
  44
  45#define EMIT1(b1)               EMIT(b1, 1)
  46#define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
  47#define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  48#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  49#define EMIT1_off32(b1, off) \
  50        do {EMIT1(b1); EMIT(off, 4); } while (0)
  51#define EMIT2_off32(b1, b2, off) \
  52        do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
  53#define EMIT3_off32(b1, b2, b3, off) \
  54        do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  55#define EMIT4_off32(b1, b2, b3, b4, off) \
  56        do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  57
  58static bool is_imm8(int value)
  59{
  60        return value <= 127 && value >= -128;
  61}
  62
  63static bool is_simm32(s64 value)
  64{
  65        return value == (s64) (s32) value;
  66}
  67
  68/* mov dst, src */
  69#define EMIT_mov(DST, SRC) \
  70        do {if (DST != SRC) \
  71                EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
  72        } while (0)
  73
  74static int bpf_size_to_x86_bytes(int bpf_size)
  75{
  76        if (bpf_size == BPF_W)
  77                return 4;
  78        else if (bpf_size == BPF_H)
  79                return 2;
  80        else if (bpf_size == BPF_B)
  81                return 1;
  82        else if (bpf_size == BPF_DW)
  83                return 4; /* imm32 */
  84        else
  85                return 0;
  86}
  87
  88/* list of x86 cond jumps opcodes (. + s8)
  89 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
  90 */
  91#define X86_JB  0x72
  92#define X86_JAE 0x73
  93#define X86_JE  0x74
  94#define X86_JNE 0x75
  95#define X86_JBE 0x76
  96#define X86_JA  0x77
  97#define X86_JL  0x7C
  98#define X86_JGE 0x7D
  99#define X86_JLE 0x7E
 100#define X86_JG  0x7F
 101
 102static void bpf_flush_icache(void *start, void *end)
 103{
 104        mm_segment_t old_fs = get_fs();
 105
 106        set_fs(KERNEL_DS);
 107        smp_wmb();
 108        flush_icache_range((unsigned long)start, (unsigned long)end);
 109        set_fs(old_fs);
 110}
 111
 112#define CHOOSE_LOAD_FUNC(K, func) \
 113        ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 114
 115/* pick a register outside of BPF range for JIT internal work */
 116#define AUX_REG (MAX_BPF_JIT_REG + 1)
 117
 118/* The following table maps BPF registers to x64 registers.
 119 *
 120 * x64 register r12 is unused, since if used as base address
 121 * register in load/store instructions, it always needs an
 122 * extra byte of encoding and is callee saved.
 123 *
 124 *  r9 caches skb->len - skb->data_len
 125 * r10 caches skb->data, and used for blinding (if enabled)
 126 */
 127static const int reg2hex[] = {
 128        [BPF_REG_0] = 0,  /* rax */
 129        [BPF_REG_1] = 7,  /* rdi */
 130        [BPF_REG_2] = 6,  /* rsi */
 131        [BPF_REG_3] = 2,  /* rdx */
 132        [BPF_REG_4] = 1,  /* rcx */
 133        [BPF_REG_5] = 0,  /* r8 */
 134        [BPF_REG_6] = 3,  /* rbx callee saved */
 135        [BPF_REG_7] = 5,  /* r13 callee saved */
 136        [BPF_REG_8] = 6,  /* r14 callee saved */
 137        [BPF_REG_9] = 7,  /* r15 callee saved */
 138        [BPF_REG_FP] = 5, /* rbp readonly */
 139        [BPF_REG_AX] = 2, /* r10 temp register */
 140        [AUX_REG] = 3,    /* r11 temp register */
 141};
 142
 143/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
 144 * which need extra byte of encoding.
 145 * rax,rcx,...,rbp have simpler encoding
 146 */
 147static bool is_ereg(u32 reg)
 148{
 149        return (1 << reg) & (BIT(BPF_REG_5) |
 150                             BIT(AUX_REG) |
 151                             BIT(BPF_REG_7) |
 152                             BIT(BPF_REG_8) |
 153                             BIT(BPF_REG_9) |
 154                             BIT(BPF_REG_AX));
 155}
 156
 157/* add modifiers if 'reg' maps to x64 registers r8..r15 */
 158static u8 add_1mod(u8 byte, u32 reg)
 159{
 160        if (is_ereg(reg))
 161                byte |= 1;
 162        return byte;
 163}
 164
 165static u8 add_2mod(u8 byte, u32 r1, u32 r2)
 166{
 167        if (is_ereg(r1))
 168                byte |= 1;
 169        if (is_ereg(r2))
 170                byte |= 4;
 171        return byte;
 172}
 173
 174/* encode 'dst_reg' register into x64 opcode 'byte' */
 175static u8 add_1reg(u8 byte, u32 dst_reg)
 176{
 177        return byte + reg2hex[dst_reg];
 178}
 179
 180/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
 181static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 182{
 183        return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 184}
 185
 186static void jit_fill_hole(void *area, unsigned int size)
 187{
 188        /* fill whole space with int3 instructions */
 189        memset(area, 0xcc, size);
 190}
 191
 192struct jit_context {
 193        int cleanup_addr; /* epilogue code offset */
 194        bool seen_ld_abs;
 195        bool seen_ax_reg;
 196};
 197
 198/* maximum number of bytes emitted while JITing one eBPF insn */
 199#define BPF_MAX_INSN_SIZE       128
 200#define BPF_INSN_SAFETY         64
 201
 202#define AUX_STACK_SPACE \
 203        (32 /* space for rbx, r13, r14, r15 */ + \
 204         8 /* space for skb_copy_bits() buffer */)
 205
 206#define PROLOGUE_SIZE 37
 207
 208/* emit x64 prologue code for BPF program and check it's size.
 209 * bpf_tail_call helper will skip it while jumping into another program
 210 */
 211static void emit_prologue(u8 **pprog, u32 stack_depth)
 212{
 213        u8 *prog = *pprog;
 214        int cnt = 0;
 215
 216        EMIT1(0x55); /* push rbp */
 217        EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
 218
 219        /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
 220        EMIT3_off32(0x48, 0x81, 0xEC,
 221                    round_up(stack_depth, 8) + AUX_STACK_SPACE);
 222
 223        /* sub rbp, AUX_STACK_SPACE */
 224        EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
 225
 226        /* all classic BPF filters use R6(rbx) save it */
 227
 228        /* mov qword ptr [rbp+0],rbx */
 229        EMIT4(0x48, 0x89, 0x5D, 0);
 230
 231        /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
 232         * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
 233         * R8(r14). R9(r15) spill could be made conditional, but there is only
 234         * one 'bpf_error' return path out of helper functions inside bpf_jit.S
 235         * The overhead of extra spill is negligible for any filter other
 236         * than synthetic ones. Therefore not worth adding complexity.
 237         */
 238
 239        /* mov qword ptr [rbp+8],r13 */
 240        EMIT4(0x4C, 0x89, 0x6D, 8);
 241        /* mov qword ptr [rbp+16],r14 */
 242        EMIT4(0x4C, 0x89, 0x75, 16);
 243        /* mov qword ptr [rbp+24],r15 */
 244        EMIT4(0x4C, 0x89, 0x7D, 24);
 245
 246        /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
 247         * we need to reset the counter to 0. It's done in two instructions,
 248         * resetting rax register to 0 (xor on eax gets 0 extended), and
 249         * moving it to the counter location.
 250         */
 251
 252        /* xor eax, eax */
 253        EMIT2(0x31, 0xc0);
 254        /* mov qword ptr [rbp+32], rax */
 255        EMIT4(0x48, 0x89, 0x45, 32);
 256
 257        BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 258        *pprog = prog;
 259}
 260
 261/* generate the following code:
 262 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
 263 *   if (index >= array->map.max_entries)
 264 *     goto out;
 265 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
 266 *     goto out;
 267 *   prog = array->ptrs[index];
 268 *   if (prog == NULL)
 269 *     goto out;
 270 *   goto *(prog->bpf_func + prologue_size);
 271 * out:
 272 */
 273static void emit_bpf_tail_call(u8 **pprog)
 274{
 275        u8 *prog = *pprog;
 276        int label1, label2, label3;
 277        int cnt = 0;
 278
 279        /* rdi - pointer to ctx
 280         * rsi - pointer to bpf_array
 281         * rdx - index in bpf_array
 282         */
 283
 284        /* if (index >= array->map.max_entries)
 285         *   goto out;
 286         */
 287        EMIT2(0x89, 0xD2);                        /* mov edx, edx */
 288        EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 289              offsetof(struct bpf_array, map.max_entries));
 290#define OFFSET1 43 /* number of bytes to jump */
 291        EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 292        label1 = cnt;
 293
 294        /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 295         *   goto out;
 296         */
 297        EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
 298        EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
 299#define OFFSET2 32
 300        EMIT2(X86_JA, OFFSET2);                   /* ja out */
 301        label2 = cnt;
 302        EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 303        EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], eax */
 304
 305        /* prog = array->ptrs[index]; */
 306        EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 307                    offsetof(struct bpf_array, ptrs));
 308
 309        /* if (prog == NULL)
 310         *   goto out;
 311         */
 312        EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
 313#define OFFSET3 10
 314        EMIT2(X86_JE, OFFSET3);                   /* je out */
 315        label3 = cnt;
 316
 317        /* goto *(prog->bpf_func + prologue_size); */
 318        EMIT4(0x48, 0x8B, 0x40,                   /* mov rax, qword ptr [rax + 32] */
 319              offsetof(struct bpf_prog, bpf_func));
 320        EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE);   /* add rax, prologue_size */
 321
 322        /* now we're ready to jump into next BPF program
 323         * rdi == ctx (1st arg)
 324         * rax == prog->bpf_func + prologue_size
 325         */
 326        EMIT2(0xFF, 0xE0);                        /* jmp rax */
 327
 328        /* out: */
 329        BUILD_BUG_ON(cnt - label1 != OFFSET1);
 330        BUILD_BUG_ON(cnt - label2 != OFFSET2);
 331        BUILD_BUG_ON(cnt - label3 != OFFSET3);
 332        *pprog = prog;
 333}
 334
 335
 336static void emit_load_skb_data_hlen(u8 **pprog)
 337{
 338        u8 *prog = *pprog;
 339        int cnt = 0;
 340
 341        /* r9d = skb->len - skb->data_len (headlen)
 342         * r10 = skb->data
 343         */
 344        /* mov %r9d, off32(%rdi) */
 345        EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
 346
 347        /* sub %r9d, off32(%rdi) */
 348        EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
 349
 350        /* mov %r10, off32(%rdi) */
 351        EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
 352        *pprog = prog;
 353}
 354
 355static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 356                  int oldproglen, struct jit_context *ctx)
 357{
 358        struct bpf_insn *insn = bpf_prog->insnsi;
 359        int insn_cnt = bpf_prog->len;
 360        bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
 361        bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
 362        bool seen_exit = false;
 363        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
 364        int i, cnt = 0;
 365        int proglen = 0;
 366        u8 *prog = temp;
 367
 368        emit_prologue(&prog, bpf_prog->aux->stack_depth);
 369
 370        if (seen_ld_abs)
 371                emit_load_skb_data_hlen(&prog);
 372
 373        for (i = 0; i < insn_cnt; i++, insn++) {
 374                const s32 imm32 = insn->imm;
 375                u32 dst_reg = insn->dst_reg;
 376                u32 src_reg = insn->src_reg;
 377                u8 b1 = 0, b2 = 0, b3 = 0;
 378                s64 jmp_offset;
 379                u8 jmp_cond;
 380                bool reload_skb_data;
 381                int ilen;
 382                u8 *func;
 383
 384                if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
 385                        ctx->seen_ax_reg = seen_ax_reg = true;
 386
 387                switch (insn->code) {
 388                        /* ALU */
 389                case BPF_ALU | BPF_ADD | BPF_X:
 390                case BPF_ALU | BPF_SUB | BPF_X:
 391                case BPF_ALU | BPF_AND | BPF_X:
 392                case BPF_ALU | BPF_OR | BPF_X:
 393                case BPF_ALU | BPF_XOR | BPF_X:
 394                case BPF_ALU64 | BPF_ADD | BPF_X:
 395                case BPF_ALU64 | BPF_SUB | BPF_X:
 396                case BPF_ALU64 | BPF_AND | BPF_X:
 397                case BPF_ALU64 | BPF_OR | BPF_X:
 398                case BPF_ALU64 | BPF_XOR | BPF_X:
 399                        switch (BPF_OP(insn->code)) {
 400                        case BPF_ADD: b2 = 0x01; break;
 401                        case BPF_SUB: b2 = 0x29; break;
 402                        case BPF_AND: b2 = 0x21; break;
 403                        case BPF_OR: b2 = 0x09; break;
 404                        case BPF_XOR: b2 = 0x31; break;
 405                        }
 406                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 407                                EMIT1(add_2mod(0x48, dst_reg, src_reg));
 408                        else if (is_ereg(dst_reg) || is_ereg(src_reg))
 409                                EMIT1(add_2mod(0x40, dst_reg, src_reg));
 410                        EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 411                        break;
 412
 413                        /* mov dst, src */
 414                case BPF_ALU64 | BPF_MOV | BPF_X:
 415                        EMIT_mov(dst_reg, src_reg);
 416                        break;
 417
 418                        /* mov32 dst, src */
 419                case BPF_ALU | BPF_MOV | BPF_X:
 420                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 421                                EMIT1(add_2mod(0x40, dst_reg, src_reg));
 422                        EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
 423                        break;
 424
 425                        /* neg dst */
 426                case BPF_ALU | BPF_NEG:
 427                case BPF_ALU64 | BPF_NEG:
 428                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 429                                EMIT1(add_1mod(0x48, dst_reg));
 430                        else if (is_ereg(dst_reg))
 431                                EMIT1(add_1mod(0x40, dst_reg));
 432                        EMIT2(0xF7, add_1reg(0xD8, dst_reg));
 433                        break;
 434
 435                case BPF_ALU | BPF_ADD | BPF_K:
 436                case BPF_ALU | BPF_SUB | BPF_K:
 437                case BPF_ALU | BPF_AND | BPF_K:
 438                case BPF_ALU | BPF_OR | BPF_K:
 439                case BPF_ALU | BPF_XOR | BPF_K:
 440                case BPF_ALU64 | BPF_ADD | BPF_K:
 441                case BPF_ALU64 | BPF_SUB | BPF_K:
 442                case BPF_ALU64 | BPF_AND | BPF_K:
 443                case BPF_ALU64 | BPF_OR | BPF_K:
 444                case BPF_ALU64 | BPF_XOR | BPF_K:
 445                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 446                                EMIT1(add_1mod(0x48, dst_reg));
 447                        else if (is_ereg(dst_reg))
 448                                EMIT1(add_1mod(0x40, dst_reg));
 449
 450                        switch (BPF_OP(insn->code)) {
 451                        case BPF_ADD: b3 = 0xC0; break;
 452                        case BPF_SUB: b3 = 0xE8; break;
 453                        case BPF_AND: b3 = 0xE0; break;
 454                        case BPF_OR: b3 = 0xC8; break;
 455                        case BPF_XOR: b3 = 0xF0; break;
 456                        }
 457
 458                        if (is_imm8(imm32))
 459                                EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
 460                        else
 461                                EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 462                        break;
 463
 464                case BPF_ALU64 | BPF_MOV | BPF_K:
 465                        /* optimization: if imm32 is positive,
 466                         * use 'mov eax, imm32' (which zero-extends imm32)
 467                         * to save 2 bytes
 468                         */
 469                        if (imm32 < 0) {
 470                                /* 'mov rax, imm32' sign extends imm32 */
 471                                b1 = add_1mod(0x48, dst_reg);
 472                                b2 = 0xC7;
 473                                b3 = 0xC0;
 474                                EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
 475                                break;
 476                        }
 477
 478                case BPF_ALU | BPF_MOV | BPF_K:
 479                        /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
 480                         * to save 3 bytes.
 481                         */
 482                        if (imm32 == 0) {
 483                                if (is_ereg(dst_reg))
 484                                        EMIT1(add_2mod(0x40, dst_reg, dst_reg));
 485                                b2 = 0x31; /* xor */
 486                                b3 = 0xC0;
 487                                EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
 488                                break;
 489                        }
 490
 491                        /* mov %eax, imm32 */
 492                        if (is_ereg(dst_reg))
 493                                EMIT1(add_1mod(0x40, dst_reg));
 494                        EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 495                        break;
 496
 497                case BPF_LD | BPF_IMM | BPF_DW:
 498                        /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
 499                         * to save 7 bytes.
 500                         */
 501                        if (insn[0].imm == 0 && insn[1].imm == 0) {
 502                                b1 = add_2mod(0x48, dst_reg, dst_reg);
 503                                b2 = 0x31; /* xor */
 504                                b3 = 0xC0;
 505                                EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
 506
 507                                insn++;
 508                                i++;
 509                                break;
 510                        }
 511
 512                        /* movabsq %rax, imm64 */
 513                        EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
 514                        EMIT(insn[0].imm, 4);
 515                        EMIT(insn[1].imm, 4);
 516
 517                        insn++;
 518                        i++;
 519                        break;
 520
 521                        /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 522                case BPF_ALU | BPF_MOD | BPF_X:
 523                case BPF_ALU | BPF_DIV | BPF_X:
 524                case BPF_ALU | BPF_MOD | BPF_K:
 525                case BPF_ALU | BPF_DIV | BPF_K:
 526                case BPF_ALU64 | BPF_MOD | BPF_X:
 527                case BPF_ALU64 | BPF_DIV | BPF_X:
 528                case BPF_ALU64 | BPF_MOD | BPF_K:
 529                case BPF_ALU64 | BPF_DIV | BPF_K:
 530                        EMIT1(0x50); /* push rax */
 531                        EMIT1(0x52); /* push rdx */
 532
 533                        if (BPF_SRC(insn->code) == BPF_X)
 534                                /* mov r11, src_reg */
 535                                EMIT_mov(AUX_REG, src_reg);
 536                        else
 537                                /* mov r11, imm32 */
 538                                EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
 539
 540                        /* mov rax, dst_reg */
 541                        EMIT_mov(BPF_REG_0, dst_reg);
 542
 543                        /* xor edx, edx
 544                         * equivalent to 'xor rdx, rdx', but one byte less
 545                         */
 546                        EMIT2(0x31, 0xd2);
 547
 548                        if (BPF_SRC(insn->code) == BPF_X) {
 549                                /* if (src_reg == 0) return 0 */
 550
 551                                /* cmp r11, 0 */
 552                                EMIT4(0x49, 0x83, 0xFB, 0x00);
 553
 554                                /* jne .+9 (skip over pop, pop, xor and jmp) */
 555                                EMIT2(X86_JNE, 1 + 1 + 2 + 5);
 556                                EMIT1(0x5A); /* pop rdx */
 557                                EMIT1(0x58); /* pop rax */
 558                                EMIT2(0x31, 0xc0); /* xor eax, eax */
 559
 560                                /* jmp cleanup_addr
 561                                 * addrs[i] - 11, because there are 11 bytes
 562                                 * after this insn: div, mov, pop, pop, mov
 563                                 */
 564                                jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
 565                                EMIT1_off32(0xE9, jmp_offset);
 566                        }
 567
 568                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 569                                /* div r11 */
 570                                EMIT3(0x49, 0xF7, 0xF3);
 571                        else
 572                                /* div r11d */
 573                                EMIT3(0x41, 0xF7, 0xF3);
 574
 575                        if (BPF_OP(insn->code) == BPF_MOD)
 576                                /* mov r11, rdx */
 577                                EMIT3(0x49, 0x89, 0xD3);
 578                        else
 579                                /* mov r11, rax */
 580                                EMIT3(0x49, 0x89, 0xC3);
 581
 582                        EMIT1(0x5A); /* pop rdx */
 583                        EMIT1(0x58); /* pop rax */
 584
 585                        /* mov dst_reg, r11 */
 586                        EMIT_mov(dst_reg, AUX_REG);
 587                        break;
 588
 589                case BPF_ALU | BPF_MUL | BPF_K:
 590                case BPF_ALU | BPF_MUL | BPF_X:
 591                case BPF_ALU64 | BPF_MUL | BPF_K:
 592                case BPF_ALU64 | BPF_MUL | BPF_X:
 593                        EMIT1(0x50); /* push rax */
 594                        EMIT1(0x52); /* push rdx */
 595
 596                        /* mov r11, dst_reg */
 597                        EMIT_mov(AUX_REG, dst_reg);
 598
 599                        if (BPF_SRC(insn->code) == BPF_X)
 600                                /* mov rax, src_reg */
 601                                EMIT_mov(BPF_REG_0, src_reg);
 602                        else
 603                                /* mov rax, imm32 */
 604                                EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
 605
 606                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 607                                EMIT1(add_1mod(0x48, AUX_REG));
 608                        else if (is_ereg(AUX_REG))
 609                                EMIT1(add_1mod(0x40, AUX_REG));
 610                        /* mul(q) r11 */
 611                        EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 612
 613                        /* mov r11, rax */
 614                        EMIT_mov(AUX_REG, BPF_REG_0);
 615
 616                        EMIT1(0x5A); /* pop rdx */
 617                        EMIT1(0x58); /* pop rax */
 618
 619                        /* mov dst_reg, r11 */
 620                        EMIT_mov(dst_reg, AUX_REG);
 621                        break;
 622
 623                        /* shifts */
 624                case BPF_ALU | BPF_LSH | BPF_K:
 625                case BPF_ALU | BPF_RSH | BPF_K:
 626                case BPF_ALU | BPF_ARSH | BPF_K:
 627                case BPF_ALU64 | BPF_LSH | BPF_K:
 628                case BPF_ALU64 | BPF_RSH | BPF_K:
 629                case BPF_ALU64 | BPF_ARSH | BPF_K:
 630                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 631                                EMIT1(add_1mod(0x48, dst_reg));
 632                        else if (is_ereg(dst_reg))
 633                                EMIT1(add_1mod(0x40, dst_reg));
 634
 635                        switch (BPF_OP(insn->code)) {
 636                        case BPF_LSH: b3 = 0xE0; break;
 637                        case BPF_RSH: b3 = 0xE8; break;
 638                        case BPF_ARSH: b3 = 0xF8; break;
 639                        }
 640                        EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 641                        break;
 642
 643                case BPF_ALU | BPF_LSH | BPF_X:
 644                case BPF_ALU | BPF_RSH | BPF_X:
 645                case BPF_ALU | BPF_ARSH | BPF_X:
 646                case BPF_ALU64 | BPF_LSH | BPF_X:
 647                case BPF_ALU64 | BPF_RSH | BPF_X:
 648                case BPF_ALU64 | BPF_ARSH | BPF_X:
 649
 650                        /* check for bad case when dst_reg == rcx */
 651                        if (dst_reg == BPF_REG_4) {
 652                                /* mov r11, dst_reg */
 653                                EMIT_mov(AUX_REG, dst_reg);
 654                                dst_reg = AUX_REG;
 655                        }
 656
 657                        if (src_reg != BPF_REG_4) { /* common case */
 658                                EMIT1(0x51); /* push rcx */
 659
 660                                /* mov rcx, src_reg */
 661                                EMIT_mov(BPF_REG_4, src_reg);
 662                        }
 663
 664                        /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
 665                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 666                                EMIT1(add_1mod(0x48, dst_reg));
 667                        else if (is_ereg(dst_reg))
 668                                EMIT1(add_1mod(0x40, dst_reg));
 669
 670                        switch (BPF_OP(insn->code)) {
 671                        case BPF_LSH: b3 = 0xE0; break;
 672                        case BPF_RSH: b3 = 0xE8; break;
 673                        case BPF_ARSH: b3 = 0xF8; break;
 674                        }
 675                        EMIT2(0xD3, add_1reg(b3, dst_reg));
 676
 677                        if (src_reg != BPF_REG_4)
 678                                EMIT1(0x59); /* pop rcx */
 679
 680                        if (insn->dst_reg == BPF_REG_4)
 681                                /* mov dst_reg, r11 */
 682                                EMIT_mov(insn->dst_reg, AUX_REG);
 683                        break;
 684
 685                case BPF_ALU | BPF_END | BPF_FROM_BE:
 686                        switch (imm32) {
 687                        case 16:
 688                                /* emit 'ror %ax, 8' to swap lower 2 bytes */
 689                                EMIT1(0x66);
 690                                if (is_ereg(dst_reg))
 691                                        EMIT1(0x41);
 692                                EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 693
 694                                /* emit 'movzwl eax, ax' */
 695                                if (is_ereg(dst_reg))
 696                                        EMIT3(0x45, 0x0F, 0xB7);
 697                                else
 698                                        EMIT2(0x0F, 0xB7);
 699                                EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 700                                break;
 701                        case 32:
 702                                /* emit 'bswap eax' to swap lower 4 bytes */
 703                                if (is_ereg(dst_reg))
 704                                        EMIT2(0x41, 0x0F);
 705                                else
 706                                        EMIT1(0x0F);
 707                                EMIT1(add_1reg(0xC8, dst_reg));
 708                                break;
 709                        case 64:
 710                                /* emit 'bswap rax' to swap 8 bytes */
 711                                EMIT3(add_1mod(0x48, dst_reg), 0x0F,
 712                                      add_1reg(0xC8, dst_reg));
 713                                break;
 714                        }
 715                        break;
 716
 717                case BPF_ALU | BPF_END | BPF_FROM_LE:
 718                        switch (imm32) {
 719                        case 16:
 720                                /* emit 'movzwl eax, ax' to zero extend 16-bit
 721                                 * into 64 bit
 722                                 */
 723                                if (is_ereg(dst_reg))
 724                                        EMIT3(0x45, 0x0F, 0xB7);
 725                                else
 726                                        EMIT2(0x0F, 0xB7);
 727                                EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 728                                break;
 729                        case 32:
 730                                /* emit 'mov eax, eax' to clear upper 32-bits */
 731                                if (is_ereg(dst_reg))
 732                                        EMIT1(0x45);
 733                                EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
 734                                break;
 735                        case 64:
 736                                /* nop */
 737                                break;
 738                        }
 739                        break;
 740
 741                        /* ST: *(u8*)(dst_reg + off) = imm */
 742                case BPF_ST | BPF_MEM | BPF_B:
 743                        if (is_ereg(dst_reg))
 744                                EMIT2(0x41, 0xC6);
 745                        else
 746                                EMIT1(0xC6);
 747                        goto st;
 748                case BPF_ST | BPF_MEM | BPF_H:
 749                        if (is_ereg(dst_reg))
 750                                EMIT3(0x66, 0x41, 0xC7);
 751                        else
 752                                EMIT2(0x66, 0xC7);
 753                        goto st;
 754                case BPF_ST | BPF_MEM | BPF_W:
 755                        if (is_ereg(dst_reg))
 756                                EMIT2(0x41, 0xC7);
 757                        else
 758                                EMIT1(0xC7);
 759                        goto st;
 760                case BPF_ST | BPF_MEM | BPF_DW:
 761                        EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 762
 763st:                     if (is_imm8(insn->off))
 764                                EMIT2(add_1reg(0x40, dst_reg), insn->off);
 765                        else
 766                                EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
 767
 768                        EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 769                        break;
 770
 771                        /* STX: *(u8*)(dst_reg + off) = src_reg */
 772                case BPF_STX | BPF_MEM | BPF_B:
 773                        /* emit 'mov byte ptr [rax + off], al' */
 774                        if (is_ereg(dst_reg) || is_ereg(src_reg) ||
 775                            /* have to add extra byte for x86 SIL, DIL regs */
 776                            src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
 777                                EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
 778                        else
 779                                EMIT1(0x88);
 780                        goto stx;
 781                case BPF_STX | BPF_MEM | BPF_H:
 782                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 783                                EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
 784                        else
 785                                EMIT2(0x66, 0x89);
 786                        goto stx;
 787                case BPF_STX | BPF_MEM | BPF_W:
 788                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 789                                EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
 790                        else
 791                                EMIT1(0x89);
 792                        goto stx;
 793                case BPF_STX | BPF_MEM | BPF_DW:
 794                        EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 795stx:                    if (is_imm8(insn->off))
 796                                EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 797                        else
 798                                EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 799                                            insn->off);
 800                        break;
 801
 802                        /* LDX: dst_reg = *(u8*)(src_reg + off) */
 803                case BPF_LDX | BPF_MEM | BPF_B:
 804                        /* emit 'movzx rax, byte ptr [rax + off]' */
 805                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 806                        goto ldx;
 807                case BPF_LDX | BPF_MEM | BPF_H:
 808                        /* emit 'movzx rax, word ptr [rax + off]' */
 809                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 810                        goto ldx;
 811                case BPF_LDX | BPF_MEM | BPF_W:
 812                        /* emit 'mov eax, dword ptr [rax+0x14]' */
 813                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 814                                EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
 815                        else
 816                                EMIT1(0x8B);
 817                        goto ldx;
 818                case BPF_LDX | BPF_MEM | BPF_DW:
 819                        /* emit 'mov rax, qword ptr [rax+0x14]' */
 820                        EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 821ldx:                    /* if insn->off == 0 we can save one extra byte, but
 822                         * special case of x86 r13 which always needs an offset
 823                         * is not worth the hassle
 824                         */
 825                        if (is_imm8(insn->off))
 826                                EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
 827                        else
 828                                EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 829                                            insn->off);
 830                        break;
 831
 832                        /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
 833                case BPF_STX | BPF_XADD | BPF_W:
 834                        /* emit 'lock add dword ptr [rax + off], eax' */
 835                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 836                                EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
 837                        else
 838                                EMIT2(0xF0, 0x01);
 839                        goto xadd;
 840                case BPF_STX | BPF_XADD | BPF_DW:
 841                        EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
 842xadd:                   if (is_imm8(insn->off))
 843                                EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 844                        else
 845                                EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 846                                            insn->off);
 847                        break;
 848
 849                        /* call */
 850                case BPF_JMP | BPF_CALL:
 851                        func = (u8 *) __bpf_call_base + imm32;
 852                        jmp_offset = func - (image + addrs[i]);
 853                        if (seen_ld_abs) {
 854                                reload_skb_data = bpf_helper_changes_pkt_data(func);
 855                                if (reload_skb_data) {
 856                                        EMIT1(0x57); /* push %rdi */
 857                                        jmp_offset += 22; /* pop, mov, sub, mov */
 858                                } else {
 859                                        EMIT2(0x41, 0x52); /* push %r10 */
 860                                        EMIT2(0x41, 0x51); /* push %r9 */
 861                                        /* need to adjust jmp offset, since
 862                                         * pop %r9, pop %r10 take 4 bytes after call insn
 863                                         */
 864                                        jmp_offset += 4;
 865                                }
 866                        }
 867                        if (!imm32 || !is_simm32(jmp_offset)) {
 868                                pr_err("unsupported bpf func %d addr %p image %p\n",
 869                                       imm32, func, image);
 870                                return -EINVAL;
 871                        }
 872                        EMIT1_off32(0xE8, jmp_offset);
 873                        if (seen_ld_abs) {
 874                                if (reload_skb_data) {
 875                                        EMIT1(0x5F); /* pop %rdi */
 876                                        emit_load_skb_data_hlen(&prog);
 877                                } else {
 878                                        EMIT2(0x41, 0x59); /* pop %r9 */
 879                                        EMIT2(0x41, 0x5A); /* pop %r10 */
 880                                }
 881                        }
 882                        break;
 883
 884                case BPF_JMP | BPF_TAIL_CALL:
 885                        emit_bpf_tail_call(&prog);
 886                        break;
 887
 888                        /* cond jump */
 889                case BPF_JMP | BPF_JEQ | BPF_X:
 890                case BPF_JMP | BPF_JNE | BPF_X:
 891                case BPF_JMP | BPF_JGT | BPF_X:
 892                case BPF_JMP | BPF_JLT | BPF_X:
 893                case BPF_JMP | BPF_JGE | BPF_X:
 894                case BPF_JMP | BPF_JLE | BPF_X:
 895                case BPF_JMP | BPF_JSGT | BPF_X:
 896                case BPF_JMP | BPF_JSLT | BPF_X:
 897                case BPF_JMP | BPF_JSGE | BPF_X:
 898                case BPF_JMP | BPF_JSLE | BPF_X:
 899                        /* cmp dst_reg, src_reg */
 900                        EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
 901                              add_2reg(0xC0, dst_reg, src_reg));
 902                        goto emit_cond_jmp;
 903
 904                case BPF_JMP | BPF_JSET | BPF_X:
 905                        /* test dst_reg, src_reg */
 906                        EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
 907                              add_2reg(0xC0, dst_reg, src_reg));
 908                        goto emit_cond_jmp;
 909
 910                case BPF_JMP | BPF_JSET | BPF_K:
 911                        /* test dst_reg, imm32 */
 912                        EMIT1(add_1mod(0x48, dst_reg));
 913                        EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 914                        goto emit_cond_jmp;
 915
 916                case BPF_JMP | BPF_JEQ | BPF_K:
 917                case BPF_JMP | BPF_JNE | BPF_K:
 918                case BPF_JMP | BPF_JGT | BPF_K:
 919                case BPF_JMP | BPF_JLT | BPF_K:
 920                case BPF_JMP | BPF_JGE | BPF_K:
 921                case BPF_JMP | BPF_JLE | BPF_K:
 922                case BPF_JMP | BPF_JSGT | BPF_K:
 923                case BPF_JMP | BPF_JSLT | BPF_K:
 924                case BPF_JMP | BPF_JSGE | BPF_K:
 925                case BPF_JMP | BPF_JSLE | BPF_K:
 926                        /* cmp dst_reg, imm8/32 */
 927                        EMIT1(add_1mod(0x48, dst_reg));
 928
 929                        if (is_imm8(imm32))
 930                                EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
 931                        else
 932                                EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
 933
 934emit_cond_jmp:          /* convert BPF opcode to x86 */
 935                        switch (BPF_OP(insn->code)) {
 936                        case BPF_JEQ:
 937                                jmp_cond = X86_JE;
 938                                break;
 939                        case BPF_JSET:
 940                        case BPF_JNE:
 941                                jmp_cond = X86_JNE;
 942                                break;
 943                        case BPF_JGT:
 944                                /* GT is unsigned '>', JA in x86 */
 945                                jmp_cond = X86_JA;
 946                                break;
 947                        case BPF_JLT:
 948                                /* LT is unsigned '<', JB in x86 */
 949                                jmp_cond = X86_JB;
 950                                break;
 951                        case BPF_JGE:
 952                                /* GE is unsigned '>=', JAE in x86 */
 953                                jmp_cond = X86_JAE;
 954                                break;
 955                        case BPF_JLE:
 956                                /* LE is unsigned '<=', JBE in x86 */
 957                                jmp_cond = X86_JBE;
 958                                break;
 959                        case BPF_JSGT:
 960                                /* signed '>', GT in x86 */
 961                                jmp_cond = X86_JG;
 962                                break;
 963                        case BPF_JSLT:
 964                                /* signed '<', LT in x86 */
 965                                jmp_cond = X86_JL;
 966                                break;
 967                        case BPF_JSGE:
 968                                /* signed '>=', GE in x86 */
 969                                jmp_cond = X86_JGE;
 970                                break;
 971                        case BPF_JSLE:
 972                                /* signed '<=', LE in x86 */
 973                                jmp_cond = X86_JLE;
 974                                break;
 975                        default: /* to silence gcc warning */
 976                                return -EFAULT;
 977                        }
 978                        jmp_offset = addrs[i + insn->off] - addrs[i];
 979                        if (is_imm8(jmp_offset)) {
 980                                EMIT2(jmp_cond, jmp_offset);
 981                        } else if (is_simm32(jmp_offset)) {
 982                                EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
 983                        } else {
 984                                pr_err("cond_jmp gen bug %llx\n", jmp_offset);
 985                                return -EFAULT;
 986                        }
 987
 988                        break;
 989
 990                case BPF_JMP | BPF_JA:
 991                        jmp_offset = addrs[i + insn->off] - addrs[i];
 992                        if (!jmp_offset)
 993                                /* optimize out nop jumps */
 994                                break;
 995emit_jmp:
 996                        if (is_imm8(jmp_offset)) {
 997                                EMIT2(0xEB, jmp_offset);
 998                        } else if (is_simm32(jmp_offset)) {
 999                                EMIT1_off32(0xE9, jmp_offset);
1000                        } else {
1001                                pr_err("jmp gen bug %llx\n", jmp_offset);
1002                                return -EFAULT;
1003                        }
1004                        break;
1005
1006                case BPF_LD | BPF_IND | BPF_W:
1007                        func = sk_load_word;
1008                        goto common_load;
1009                case BPF_LD | BPF_ABS | BPF_W:
1010                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
1011common_load:
1012                        ctx->seen_ld_abs = seen_ld_abs = true;
1013                        jmp_offset = func - (image + addrs[i]);
1014                        if (!func || !is_simm32(jmp_offset)) {
1015                                pr_err("unsupported bpf func %d addr %p image %p\n",
1016                                       imm32, func, image);
1017                                return -EINVAL;
1018                        }
1019                        if (BPF_MODE(insn->code) == BPF_ABS) {
1020                                /* mov %esi, imm32 */
1021                                EMIT1_off32(0xBE, imm32);
1022                        } else {
1023                                /* mov %rsi, src_reg */
1024                                EMIT_mov(BPF_REG_2, src_reg);
1025                                if (imm32) {
1026                                        if (is_imm8(imm32))
1027                                                /* add %esi, imm8 */
1028                                                EMIT3(0x83, 0xC6, imm32);
1029                                        else
1030                                                /* add %esi, imm32 */
1031                                                EMIT2_off32(0x81, 0xC6, imm32);
1032                                }
1033                        }
1034                        /* skb pointer is in R6 (%rbx), it will be copied into
1035                         * %rdi if skb_copy_bits() call is necessary.
1036                         * sk_load_* helpers also use %r10 and %r9d.
1037                         * See bpf_jit.S
1038                         */
1039                        if (seen_ax_reg)
1040                                /* r10 = skb->data, mov %r10, off32(%rbx) */
1041                                EMIT3_off32(0x4c, 0x8b, 0x93,
1042                                            offsetof(struct sk_buff, data));
1043                        EMIT1_off32(0xE8, jmp_offset); /* call */
1044                        break;
1045
1046                case BPF_LD | BPF_IND | BPF_H:
1047                        func = sk_load_half;
1048                        goto common_load;
1049                case BPF_LD | BPF_ABS | BPF_H:
1050                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
1051                        goto common_load;
1052                case BPF_LD | BPF_IND | BPF_B:
1053                        func = sk_load_byte;
1054                        goto common_load;
1055                case BPF_LD | BPF_ABS | BPF_B:
1056                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
1057                        goto common_load;
1058
1059                case BPF_JMP | BPF_EXIT:
1060                        if (seen_exit) {
1061                                jmp_offset = ctx->cleanup_addr - addrs[i];
1062                                goto emit_jmp;
1063                        }
1064                        seen_exit = true;
1065                        /* update cleanup_addr */
1066                        ctx->cleanup_addr = proglen;
1067                        /* mov rbx, qword ptr [rbp+0] */
1068                        EMIT4(0x48, 0x8B, 0x5D, 0);
1069                        /* mov r13, qword ptr [rbp+8] */
1070                        EMIT4(0x4C, 0x8B, 0x6D, 8);
1071                        /* mov r14, qword ptr [rbp+16] */
1072                        EMIT4(0x4C, 0x8B, 0x75, 16);
1073                        /* mov r15, qword ptr [rbp+24] */
1074                        EMIT4(0x4C, 0x8B, 0x7D, 24);
1075
1076                        /* add rbp, AUX_STACK_SPACE */
1077                        EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
1078                        EMIT1(0xC9); /* leave */
1079                        EMIT1(0xC3); /* ret */
1080                        break;
1081
1082                default:
1083                        /* By design x64 JIT should support all BPF instructions
1084                         * This error will be seen if new instruction was added
1085                         * to interpreter, but not to JIT
1086                         * or if there is junk in bpf_prog
1087                         */
1088                        pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1089                        return -EINVAL;
1090                }
1091
1092                ilen = prog - temp;
1093                if (ilen > BPF_MAX_INSN_SIZE) {
1094                        pr_err("bpf_jit: fatal insn size error\n");
1095                        return -EFAULT;
1096                }
1097
1098                if (image) {
1099                        if (unlikely(proglen + ilen > oldproglen)) {
1100                                pr_err("bpf_jit: fatal error\n");
1101                                return -EFAULT;
1102                        }
1103                        memcpy(image + proglen, temp, ilen);
1104                }
1105                proglen += ilen;
1106                addrs[i] = proglen;
1107                prog = temp;
1108        }
1109        return proglen;
1110}
1111
1112struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1113{
1114        struct bpf_binary_header *header = NULL;
1115        struct bpf_prog *tmp, *orig_prog = prog;
1116        int proglen, oldproglen = 0;
1117        struct jit_context ctx = {};
1118        bool tmp_blinded = false;
1119        u8 *image = NULL;
1120        int *addrs;
1121        int pass;
1122        int i;
1123
1124        if (!bpf_jit_enable)
1125                return orig_prog;
1126
1127        tmp = bpf_jit_blind_constants(prog);
1128        /* If blinding was requested and we failed during blinding,
1129         * we must fall back to the interpreter.
1130         */
1131        if (IS_ERR(tmp))
1132                return orig_prog;
1133        if (tmp != prog) {
1134                tmp_blinded = true;
1135                prog = tmp;
1136        }
1137
1138        addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
1139        if (!addrs) {
1140                prog = orig_prog;
1141                goto out;
1142        }
1143
1144        /* Before first pass, make a rough estimation of addrs[]
1145         * each bpf instruction is translated to less than 64 bytes
1146         */
1147        for (proglen = 0, i = 0; i < prog->len; i++) {
1148                proglen += 64;
1149                addrs[i] = proglen;
1150        }
1151        ctx.cleanup_addr = proglen;
1152
1153        /* JITed image shrinks with every pass and the loop iterates
1154         * until the image stops shrinking. Very large bpf programs
1155         * may converge on the last pass. In such case do one more
1156         * pass to emit the final image
1157         */
1158        for (pass = 0; pass < 10 || image; pass++) {
1159                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
1160                if (proglen <= 0) {
1161                        image = NULL;
1162                        if (header)
1163                                bpf_jit_binary_free(header);
1164                        prog = orig_prog;
1165                        goto out_addrs;
1166                }
1167                if (image) {
1168                        if (proglen != oldproglen) {
1169                                pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
1170                                       proglen, oldproglen);
1171                                prog = orig_prog;
1172                                goto out_addrs;
1173                        }
1174                        break;
1175                }
1176                if (proglen == oldproglen) {
1177                        header = bpf_jit_binary_alloc(proglen, &image,
1178                                                      1, jit_fill_hole);
1179                        if (!header) {
1180                                prog = orig_prog;
1181                                goto out_addrs;
1182                        }
1183                }
1184                oldproglen = proglen;
1185        }
1186
1187        if (bpf_jit_enable > 1)
1188                bpf_jit_dump(prog->len, proglen, pass + 1, image);
1189
1190        if (image) {
1191                bpf_flush_icache(header, image + proglen);
1192                bpf_jit_binary_lock_ro(header);
1193                prog->bpf_func = (void *)image;
1194                prog->jited = 1;
1195                prog->jited_len = proglen;
1196        } else {
1197                prog = orig_prog;
1198        }
1199
1200out_addrs:
1201        kfree(addrs);
1202out:
1203        if (tmp_blinded)
1204                bpf_jit_prog_release_other(prog, prog == orig_prog ?
1205                                           tmp : orig_prog);
1206        return prog;
1207}
1208