linux/arch/x86/net/bpf_jit_comp.c
<<
>>
Prefs
   1/* bpf_jit_comp.c : BPF JIT compiler
   2 *
   3 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
   4 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; version 2
   9 * of the License.
  10 */
  11#include <linux/netdevice.h>
  12#include <linux/filter.h>
  13#include <linux/if_vlan.h>
  14#include <asm/cacheflush.h>
  15
  16int bpf_jit_enable __read_mostly;
  17
  18/*
  19 * assembly code in arch/x86/net/bpf_jit.S
  20 */
  21extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  22extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
  23extern u8 sk_load_byte_positive_offset[];
  24extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
  25extern u8 sk_load_byte_negative_offset[];
  26
  27static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  28{
  29        if (len == 1)
  30                *ptr = bytes;
  31        else if (len == 2)
  32                *(u16 *)ptr = bytes;
  33        else {
  34                *(u32 *)ptr = bytes;
  35                barrier();
  36        }
  37        return ptr + len;
  38}
  39
  40#define EMIT(bytes, len)        do { prog = emit_code(prog, bytes, len); } while (0)
  41
  42#define EMIT1(b1)               EMIT(b1, 1)
  43#define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
  44#define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  45#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  46#define EMIT1_off32(b1, off) \
  47        do {EMIT1(b1); EMIT(off, 4); } while (0)
  48#define EMIT2_off32(b1, b2, off) \
  49        do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
  50#define EMIT3_off32(b1, b2, b3, off) \
  51        do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  52#define EMIT4_off32(b1, b2, b3, b4, off) \
  53        do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  54
  55static bool is_imm8(int value)
  56{
  57        return value <= 127 && value >= -128;
  58}
  59
  60static bool is_simm32(s64 value)
  61{
  62        return value == (s64) (s32) value;
  63}
  64
  65/* mov dst, src */
  66#define EMIT_mov(DST, SRC) \
  67        do {if (DST != SRC) \
  68                EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
  69        } while (0)
  70
  71static int bpf_size_to_x86_bytes(int bpf_size)
  72{
  73        if (bpf_size == BPF_W)
  74                return 4;
  75        else if (bpf_size == BPF_H)
  76                return 2;
  77        else if (bpf_size == BPF_B)
  78                return 1;
  79        else if (bpf_size == BPF_DW)
  80                return 4; /* imm32 */
  81        else
  82                return 0;
  83}
  84
  85/* list of x86 cond jumps opcodes (. + s8)
  86 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
  87 */
  88#define X86_JB  0x72
  89#define X86_JAE 0x73
  90#define X86_JE  0x74
  91#define X86_JNE 0x75
  92#define X86_JBE 0x76
  93#define X86_JA  0x77
  94#define X86_JGE 0x7D
  95#define X86_JG  0x7F
  96
  97static void bpf_flush_icache(void *start, void *end)
  98{
  99        mm_segment_t old_fs = get_fs();
 100
 101        set_fs(KERNEL_DS);
 102        smp_wmb();
 103        flush_icache_range((unsigned long)start, (unsigned long)end);
 104        set_fs(old_fs);
 105}
 106
 107#define CHOOSE_LOAD_FUNC(K, func) \
 108        ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 109
 110/* pick a register outside of BPF range for JIT internal work */
 111#define AUX_REG (MAX_BPF_REG + 1)
 112
 113/* the following table maps BPF registers to x64 registers.
 114 * x64 register r12 is unused, since if used as base address register
 115 * in load/store instructions, it always needs an extra byte of encoding
 116 */
 117static const int reg2hex[] = {
 118        [BPF_REG_0] = 0,  /* rax */
 119        [BPF_REG_1] = 7,  /* rdi */
 120        [BPF_REG_2] = 6,  /* rsi */
 121        [BPF_REG_3] = 2,  /* rdx */
 122        [BPF_REG_4] = 1,  /* rcx */
 123        [BPF_REG_5] = 0,  /* r8 */
 124        [BPF_REG_6] = 3,  /* rbx callee saved */
 125        [BPF_REG_7] = 5,  /* r13 callee saved */
 126        [BPF_REG_8] = 6,  /* r14 callee saved */
 127        [BPF_REG_9] = 7,  /* r15 callee saved */
 128        [BPF_REG_FP] = 5, /* rbp readonly */
 129        [AUX_REG] = 3,    /* r11 temp register */
 130};
 131
 132/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
 133 * which need extra byte of encoding.
 134 * rax,rcx,...,rbp have simpler encoding
 135 */
 136static bool is_ereg(u32 reg)
 137{
 138        return (1 << reg) & (BIT(BPF_REG_5) |
 139                             BIT(AUX_REG) |
 140                             BIT(BPF_REG_7) |
 141                             BIT(BPF_REG_8) |
 142                             BIT(BPF_REG_9));
 143}
 144
 145/* add modifiers if 'reg' maps to x64 registers r8..r15 */
 146static u8 add_1mod(u8 byte, u32 reg)
 147{
 148        if (is_ereg(reg))
 149                byte |= 1;
 150        return byte;
 151}
 152
 153static u8 add_2mod(u8 byte, u32 r1, u32 r2)
 154{
 155        if (is_ereg(r1))
 156                byte |= 1;
 157        if (is_ereg(r2))
 158                byte |= 4;
 159        return byte;
 160}
 161
 162/* encode 'dst_reg' register into x64 opcode 'byte' */
 163static u8 add_1reg(u8 byte, u32 dst_reg)
 164{
 165        return byte + reg2hex[dst_reg];
 166}
 167
 168/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
 169static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 170{
 171        return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 172}
 173
 174static void jit_fill_hole(void *area, unsigned int size)
 175{
 176        /* fill whole space with int3 instructions */
 177        memset(area, 0xcc, size);
 178}
 179
 180struct jit_context {
 181        int cleanup_addr; /* epilogue code offset */
 182        bool seen_ld_abs;
 183};
 184
 185/* maximum number of bytes emitted while JITing one eBPF insn */
 186#define BPF_MAX_INSN_SIZE       128
 187#define BPF_INSN_SAFETY         64
 188
 189static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 190                  int oldproglen, struct jit_context *ctx)
 191{
 192        struct bpf_insn *insn = bpf_prog->insnsi;
 193        int insn_cnt = bpf_prog->len;
 194        bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
 195        bool seen_exit = false;
 196        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
 197        int i;
 198        int proglen = 0;
 199        u8 *prog = temp;
 200        int stacksize = MAX_BPF_STACK +
 201                32 /* space for rbx, r13, r14, r15 */ +
 202                8 /* space for skb_copy_bits() buffer */;
 203
 204        EMIT1(0x55); /* push rbp */
 205        EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
 206
 207        /* sub rsp, stacksize */
 208        EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
 209
 210        /* all classic BPF filters use R6(rbx) save it */
 211
 212        /* mov qword ptr [rbp-X],rbx */
 213        EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
 214
 215        /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
 216         * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
 217         * R8(r14). R9(r15) spill could be made conditional, but there is only
 218         * one 'bpf_error' return path out of helper functions inside bpf_jit.S
 219         * The overhead of extra spill is negligible for any filter other
 220         * than synthetic ones. Therefore not worth adding complexity.
 221         */
 222
 223        /* mov qword ptr [rbp-X],r13 */
 224        EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
 225        /* mov qword ptr [rbp-X],r14 */
 226        EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
 227        /* mov qword ptr [rbp-X],r15 */
 228        EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
 229
 230        /* clear A and X registers */
 231        EMIT2(0x31, 0xc0); /* xor eax, eax */
 232        EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
 233
 234        if (seen_ld_abs) {
 235                /* r9d : skb->len - skb->data_len (headlen)
 236                 * r10 : skb->data
 237                 */
 238                if (is_imm8(offsetof(struct sk_buff, len)))
 239                        /* mov %r9d, off8(%rdi) */
 240                        EMIT4(0x44, 0x8b, 0x4f,
 241                              offsetof(struct sk_buff, len));
 242                else
 243                        /* mov %r9d, off32(%rdi) */
 244                        EMIT3_off32(0x44, 0x8b, 0x8f,
 245                                    offsetof(struct sk_buff, len));
 246
 247                if (is_imm8(offsetof(struct sk_buff, data_len)))
 248                        /* sub %r9d, off8(%rdi) */
 249                        EMIT4(0x44, 0x2b, 0x4f,
 250                              offsetof(struct sk_buff, data_len));
 251                else
 252                        EMIT3_off32(0x44, 0x2b, 0x8f,
 253                                    offsetof(struct sk_buff, data_len));
 254
 255                if (is_imm8(offsetof(struct sk_buff, data)))
 256                        /* mov %r10, off8(%rdi) */
 257                        EMIT4(0x4c, 0x8b, 0x57,
 258                              offsetof(struct sk_buff, data));
 259                else
 260                        /* mov %r10, off32(%rdi) */
 261                        EMIT3_off32(0x4c, 0x8b, 0x97,
 262                                    offsetof(struct sk_buff, data));
 263        }
 264
 265        for (i = 0; i < insn_cnt; i++, insn++) {
 266                const s32 imm32 = insn->imm;
 267                u32 dst_reg = insn->dst_reg;
 268                u32 src_reg = insn->src_reg;
 269                u8 b1 = 0, b2 = 0, b3 = 0;
 270                s64 jmp_offset;
 271                u8 jmp_cond;
 272                int ilen;
 273                u8 *func;
 274
 275                switch (insn->code) {
 276                        /* ALU */
 277                case BPF_ALU | BPF_ADD | BPF_X:
 278                case BPF_ALU | BPF_SUB | BPF_X:
 279                case BPF_ALU | BPF_AND | BPF_X:
 280                case BPF_ALU | BPF_OR | BPF_X:
 281                case BPF_ALU | BPF_XOR | BPF_X:
 282                case BPF_ALU64 | BPF_ADD | BPF_X:
 283                case BPF_ALU64 | BPF_SUB | BPF_X:
 284                case BPF_ALU64 | BPF_AND | BPF_X:
 285                case BPF_ALU64 | BPF_OR | BPF_X:
 286                case BPF_ALU64 | BPF_XOR | BPF_X:
 287                        switch (BPF_OP(insn->code)) {
 288                        case BPF_ADD: b2 = 0x01; break;
 289                        case BPF_SUB: b2 = 0x29; break;
 290                        case BPF_AND: b2 = 0x21; break;
 291                        case BPF_OR: b2 = 0x09; break;
 292                        case BPF_XOR: b2 = 0x31; break;
 293                        }
 294                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 295                                EMIT1(add_2mod(0x48, dst_reg, src_reg));
 296                        else if (is_ereg(dst_reg) || is_ereg(src_reg))
 297                                EMIT1(add_2mod(0x40, dst_reg, src_reg));
 298                        EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 299                        break;
 300
 301                        /* mov dst, src */
 302                case BPF_ALU64 | BPF_MOV | BPF_X:
 303                        EMIT_mov(dst_reg, src_reg);
 304                        break;
 305
 306                        /* mov32 dst, src */
 307                case BPF_ALU | BPF_MOV | BPF_X:
 308                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 309                                EMIT1(add_2mod(0x40, dst_reg, src_reg));
 310                        EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
 311                        break;
 312
 313                        /* neg dst */
 314                case BPF_ALU | BPF_NEG:
 315                case BPF_ALU64 | BPF_NEG:
 316                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 317                                EMIT1(add_1mod(0x48, dst_reg));
 318                        else if (is_ereg(dst_reg))
 319                                EMIT1(add_1mod(0x40, dst_reg));
 320                        EMIT2(0xF7, add_1reg(0xD8, dst_reg));
 321                        break;
 322
 323                case BPF_ALU | BPF_ADD | BPF_K:
 324                case BPF_ALU | BPF_SUB | BPF_K:
 325                case BPF_ALU | BPF_AND | BPF_K:
 326                case BPF_ALU | BPF_OR | BPF_K:
 327                case BPF_ALU | BPF_XOR | BPF_K:
 328                case BPF_ALU64 | BPF_ADD | BPF_K:
 329                case BPF_ALU64 | BPF_SUB | BPF_K:
 330                case BPF_ALU64 | BPF_AND | BPF_K:
 331                case BPF_ALU64 | BPF_OR | BPF_K:
 332                case BPF_ALU64 | BPF_XOR | BPF_K:
 333                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 334                                EMIT1(add_1mod(0x48, dst_reg));
 335                        else if (is_ereg(dst_reg))
 336                                EMIT1(add_1mod(0x40, dst_reg));
 337
 338                        switch (BPF_OP(insn->code)) {
 339                        case BPF_ADD: b3 = 0xC0; break;
 340                        case BPF_SUB: b3 = 0xE8; break;
 341                        case BPF_AND: b3 = 0xE0; break;
 342                        case BPF_OR: b3 = 0xC8; break;
 343                        case BPF_XOR: b3 = 0xF0; break;
 344                        }
 345
 346                        if (is_imm8(imm32))
 347                                EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
 348                        else
 349                                EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 350                        break;
 351
 352                case BPF_ALU64 | BPF_MOV | BPF_K:
 353                        /* optimization: if imm32 is positive,
 354                         * use 'mov eax, imm32' (which zero-extends imm32)
 355                         * to save 2 bytes
 356                         */
 357                        if (imm32 < 0) {
 358                                /* 'mov rax, imm32' sign extends imm32 */
 359                                b1 = add_1mod(0x48, dst_reg);
 360                                b2 = 0xC7;
 361                                b3 = 0xC0;
 362                                EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
 363                                break;
 364                        }
 365
 366                case BPF_ALU | BPF_MOV | BPF_K:
 367                        /* mov %eax, imm32 */
 368                        if (is_ereg(dst_reg))
 369                                EMIT1(add_1mod(0x40, dst_reg));
 370                        EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 371                        break;
 372
 373                case BPF_LD | BPF_IMM | BPF_DW:
 374                        if (insn[1].code != 0 || insn[1].src_reg != 0 ||
 375                            insn[1].dst_reg != 0 || insn[1].off != 0) {
 376                                /* verifier must catch invalid insns */
 377                                pr_err("invalid BPF_LD_IMM64 insn\n");
 378                                return -EINVAL;
 379                        }
 380
 381                        /* movabsq %rax, imm64 */
 382                        EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
 383                        EMIT(insn[0].imm, 4);
 384                        EMIT(insn[1].imm, 4);
 385
 386                        insn++;
 387                        i++;
 388                        break;
 389
 390                        /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 391                case BPF_ALU | BPF_MOD | BPF_X:
 392                case BPF_ALU | BPF_DIV | BPF_X:
 393                case BPF_ALU | BPF_MOD | BPF_K:
 394                case BPF_ALU | BPF_DIV | BPF_K:
 395                case BPF_ALU64 | BPF_MOD | BPF_X:
 396                case BPF_ALU64 | BPF_DIV | BPF_X:
 397                case BPF_ALU64 | BPF_MOD | BPF_K:
 398                case BPF_ALU64 | BPF_DIV | BPF_K:
 399                        EMIT1(0x50); /* push rax */
 400                        EMIT1(0x52); /* push rdx */
 401
 402                        if (BPF_SRC(insn->code) == BPF_X)
 403                                /* mov r11, src_reg */
 404                                EMIT_mov(AUX_REG, src_reg);
 405                        else
 406                                /* mov r11, imm32 */
 407                                EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
 408
 409                        /* mov rax, dst_reg */
 410                        EMIT_mov(BPF_REG_0, dst_reg);
 411
 412                        /* xor edx, edx
 413                         * equivalent to 'xor rdx, rdx', but one byte less
 414                         */
 415                        EMIT2(0x31, 0xd2);
 416
 417                        if (BPF_SRC(insn->code) == BPF_X) {
 418                                /* if (src_reg == 0) return 0 */
 419
 420                                /* cmp r11, 0 */
 421                                EMIT4(0x49, 0x83, 0xFB, 0x00);
 422
 423                                /* jne .+9 (skip over pop, pop, xor and jmp) */
 424                                EMIT2(X86_JNE, 1 + 1 + 2 + 5);
 425                                EMIT1(0x5A); /* pop rdx */
 426                                EMIT1(0x58); /* pop rax */
 427                                EMIT2(0x31, 0xc0); /* xor eax, eax */
 428
 429                                /* jmp cleanup_addr
 430                                 * addrs[i] - 11, because there are 11 bytes
 431                                 * after this insn: div, mov, pop, pop, mov
 432                                 */
 433                                jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
 434                                EMIT1_off32(0xE9, jmp_offset);
 435                        }
 436
 437                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 438                                /* div r11 */
 439                                EMIT3(0x49, 0xF7, 0xF3);
 440                        else
 441                                /* div r11d */
 442                                EMIT3(0x41, 0xF7, 0xF3);
 443
 444                        if (BPF_OP(insn->code) == BPF_MOD)
 445                                /* mov r11, rdx */
 446                                EMIT3(0x49, 0x89, 0xD3);
 447                        else
 448                                /* mov r11, rax */
 449                                EMIT3(0x49, 0x89, 0xC3);
 450
 451                        EMIT1(0x5A); /* pop rdx */
 452                        EMIT1(0x58); /* pop rax */
 453
 454                        /* mov dst_reg, r11 */
 455                        EMIT_mov(dst_reg, AUX_REG);
 456                        break;
 457
 458                case BPF_ALU | BPF_MUL | BPF_K:
 459                case BPF_ALU | BPF_MUL | BPF_X:
 460                case BPF_ALU64 | BPF_MUL | BPF_K:
 461                case BPF_ALU64 | BPF_MUL | BPF_X:
 462                        EMIT1(0x50); /* push rax */
 463                        EMIT1(0x52); /* push rdx */
 464
 465                        /* mov r11, dst_reg */
 466                        EMIT_mov(AUX_REG, dst_reg);
 467
 468                        if (BPF_SRC(insn->code) == BPF_X)
 469                                /* mov rax, src_reg */
 470                                EMIT_mov(BPF_REG_0, src_reg);
 471                        else
 472                                /* mov rax, imm32 */
 473                                EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
 474
 475                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 476                                EMIT1(add_1mod(0x48, AUX_REG));
 477                        else if (is_ereg(AUX_REG))
 478                                EMIT1(add_1mod(0x40, AUX_REG));
 479                        /* mul(q) r11 */
 480                        EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 481
 482                        /* mov r11, rax */
 483                        EMIT_mov(AUX_REG, BPF_REG_0);
 484
 485                        EMIT1(0x5A); /* pop rdx */
 486                        EMIT1(0x58); /* pop rax */
 487
 488                        /* mov dst_reg, r11 */
 489                        EMIT_mov(dst_reg, AUX_REG);
 490                        break;
 491
 492                        /* shifts */
 493                case BPF_ALU | BPF_LSH | BPF_K:
 494                case BPF_ALU | BPF_RSH | BPF_K:
 495                case BPF_ALU | BPF_ARSH | BPF_K:
 496                case BPF_ALU64 | BPF_LSH | BPF_K:
 497                case BPF_ALU64 | BPF_RSH | BPF_K:
 498                case BPF_ALU64 | BPF_ARSH | BPF_K:
 499                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 500                                EMIT1(add_1mod(0x48, dst_reg));
 501                        else if (is_ereg(dst_reg))
 502                                EMIT1(add_1mod(0x40, dst_reg));
 503
 504                        switch (BPF_OP(insn->code)) {
 505                        case BPF_LSH: b3 = 0xE0; break;
 506                        case BPF_RSH: b3 = 0xE8; break;
 507                        case BPF_ARSH: b3 = 0xF8; break;
 508                        }
 509                        EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 510                        break;
 511
 512                case BPF_ALU | BPF_LSH | BPF_X:
 513                case BPF_ALU | BPF_RSH | BPF_X:
 514                case BPF_ALU | BPF_ARSH | BPF_X:
 515                case BPF_ALU64 | BPF_LSH | BPF_X:
 516                case BPF_ALU64 | BPF_RSH | BPF_X:
 517                case BPF_ALU64 | BPF_ARSH | BPF_X:
 518
 519                        /* check for bad case when dst_reg == rcx */
 520                        if (dst_reg == BPF_REG_4) {
 521                                /* mov r11, dst_reg */
 522                                EMIT_mov(AUX_REG, dst_reg);
 523                                dst_reg = AUX_REG;
 524                        }
 525
 526                        if (src_reg != BPF_REG_4) { /* common case */
 527                                EMIT1(0x51); /* push rcx */
 528
 529                                /* mov rcx, src_reg */
 530                                EMIT_mov(BPF_REG_4, src_reg);
 531                        }
 532
 533                        /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
 534                        if (BPF_CLASS(insn->code) == BPF_ALU64)
 535                                EMIT1(add_1mod(0x48, dst_reg));
 536                        else if (is_ereg(dst_reg))
 537                                EMIT1(add_1mod(0x40, dst_reg));
 538
 539                        switch (BPF_OP(insn->code)) {
 540                        case BPF_LSH: b3 = 0xE0; break;
 541                        case BPF_RSH: b3 = 0xE8; break;
 542                        case BPF_ARSH: b3 = 0xF8; break;
 543                        }
 544                        EMIT2(0xD3, add_1reg(b3, dst_reg));
 545
 546                        if (src_reg != BPF_REG_4)
 547                                EMIT1(0x59); /* pop rcx */
 548
 549                        if (insn->dst_reg == BPF_REG_4)
 550                                /* mov dst_reg, r11 */
 551                                EMIT_mov(insn->dst_reg, AUX_REG);
 552                        break;
 553
 554                case BPF_ALU | BPF_END | BPF_FROM_BE:
 555                        switch (imm32) {
 556                        case 16:
 557                                /* emit 'ror %ax, 8' to swap lower 2 bytes */
 558                                EMIT1(0x66);
 559                                if (is_ereg(dst_reg))
 560                                        EMIT1(0x41);
 561                                EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 562                                break;
 563                        case 32:
 564                                /* emit 'bswap eax' to swap lower 4 bytes */
 565                                if (is_ereg(dst_reg))
 566                                        EMIT2(0x41, 0x0F);
 567                                else
 568                                        EMIT1(0x0F);
 569                                EMIT1(add_1reg(0xC8, dst_reg));
 570                                break;
 571                        case 64:
 572                                /* emit 'bswap rax' to swap 8 bytes */
 573                                EMIT3(add_1mod(0x48, dst_reg), 0x0F,
 574                                      add_1reg(0xC8, dst_reg));
 575                                break;
 576                        }
 577                        break;
 578
 579                case BPF_ALU | BPF_END | BPF_FROM_LE:
 580                        break;
 581
 582                        /* ST: *(u8*)(dst_reg + off) = imm */
 583                case BPF_ST | BPF_MEM | BPF_B:
 584                        if (is_ereg(dst_reg))
 585                                EMIT2(0x41, 0xC6);
 586                        else
 587                                EMIT1(0xC6);
 588                        goto st;
 589                case BPF_ST | BPF_MEM | BPF_H:
 590                        if (is_ereg(dst_reg))
 591                                EMIT3(0x66, 0x41, 0xC7);
 592                        else
 593                                EMIT2(0x66, 0xC7);
 594                        goto st;
 595                case BPF_ST | BPF_MEM | BPF_W:
 596                        if (is_ereg(dst_reg))
 597                                EMIT2(0x41, 0xC7);
 598                        else
 599                                EMIT1(0xC7);
 600                        goto st;
 601                case BPF_ST | BPF_MEM | BPF_DW:
 602                        EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 603
 604st:                     if (is_imm8(insn->off))
 605                                EMIT2(add_1reg(0x40, dst_reg), insn->off);
 606                        else
 607                                EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
 608
 609                        EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 610                        break;
 611
 612                        /* STX: *(u8*)(dst_reg + off) = src_reg */
 613                case BPF_STX | BPF_MEM | BPF_B:
 614                        /* emit 'mov byte ptr [rax + off], al' */
 615                        if (is_ereg(dst_reg) || is_ereg(src_reg) ||
 616                            /* have to add extra byte for x86 SIL, DIL regs */
 617                            src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
 618                                EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
 619                        else
 620                                EMIT1(0x88);
 621                        goto stx;
 622                case BPF_STX | BPF_MEM | BPF_H:
 623                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 624                                EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
 625                        else
 626                                EMIT2(0x66, 0x89);
 627                        goto stx;
 628                case BPF_STX | BPF_MEM | BPF_W:
 629                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 630                                EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
 631                        else
 632                                EMIT1(0x89);
 633                        goto stx;
 634                case BPF_STX | BPF_MEM | BPF_DW:
 635                        EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 636stx:                    if (is_imm8(insn->off))
 637                                EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 638                        else
 639                                EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 640                                            insn->off);
 641                        break;
 642
 643                        /* LDX: dst_reg = *(u8*)(src_reg + off) */
 644                case BPF_LDX | BPF_MEM | BPF_B:
 645                        /* emit 'movzx rax, byte ptr [rax + off]' */
 646                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 647                        goto ldx;
 648                case BPF_LDX | BPF_MEM | BPF_H:
 649                        /* emit 'movzx rax, word ptr [rax + off]' */
 650                        EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 651                        goto ldx;
 652                case BPF_LDX | BPF_MEM | BPF_W:
 653                        /* emit 'mov eax, dword ptr [rax+0x14]' */
 654                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 655                                EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
 656                        else
 657                                EMIT1(0x8B);
 658                        goto ldx;
 659                case BPF_LDX | BPF_MEM | BPF_DW:
 660                        /* emit 'mov rax, qword ptr [rax+0x14]' */
 661                        EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 662ldx:                    /* if insn->off == 0 we can save one extra byte, but
 663                         * special case of x86 r13 which always needs an offset
 664                         * is not worth the hassle
 665                         */
 666                        if (is_imm8(insn->off))
 667                                EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
 668                        else
 669                                EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 670                                            insn->off);
 671                        break;
 672
 673                        /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
 674                case BPF_STX | BPF_XADD | BPF_W:
 675                        /* emit 'lock add dword ptr [rax + off], eax' */
 676                        if (is_ereg(dst_reg) || is_ereg(src_reg))
 677                                EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
 678                        else
 679                                EMIT2(0xF0, 0x01);
 680                        goto xadd;
 681                case BPF_STX | BPF_XADD | BPF_DW:
 682                        EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
 683xadd:                   if (is_imm8(insn->off))
 684                                EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 685                        else
 686                                EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 687                                            insn->off);
 688                        break;
 689
 690                        /* call */
 691                case BPF_JMP | BPF_CALL:
 692                        func = (u8 *) __bpf_call_base + imm32;
 693                        jmp_offset = func - (image + addrs[i]);
 694                        if (seen_ld_abs) {
 695                                EMIT2(0x41, 0x52); /* push %r10 */
 696                                EMIT2(0x41, 0x51); /* push %r9 */
 697                                /* need to adjust jmp offset, since
 698                                 * pop %r9, pop %r10 take 4 bytes after call insn
 699                                 */
 700                                jmp_offset += 4;
 701                        }
 702                        if (!imm32 || !is_simm32(jmp_offset)) {
 703                                pr_err("unsupported bpf func %d addr %p image %p\n",
 704                                       imm32, func, image);
 705                                return -EINVAL;
 706                        }
 707                        EMIT1_off32(0xE8, jmp_offset);
 708                        if (seen_ld_abs) {
 709                                EMIT2(0x41, 0x59); /* pop %r9 */
 710                                EMIT2(0x41, 0x5A); /* pop %r10 */
 711                        }
 712                        break;
 713
 714                        /* cond jump */
 715                case BPF_JMP | BPF_JEQ | BPF_X:
 716                case BPF_JMP | BPF_JNE | BPF_X:
 717                case BPF_JMP | BPF_JGT | BPF_X:
 718                case BPF_JMP | BPF_JGE | BPF_X:
 719                case BPF_JMP | BPF_JSGT | BPF_X:
 720                case BPF_JMP | BPF_JSGE | BPF_X:
 721                        /* cmp dst_reg, src_reg */
 722                        EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
 723                              add_2reg(0xC0, dst_reg, src_reg));
 724                        goto emit_cond_jmp;
 725
 726                case BPF_JMP | BPF_JSET | BPF_X:
 727                        /* test dst_reg, src_reg */
 728                        EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
 729                              add_2reg(0xC0, dst_reg, src_reg));
 730                        goto emit_cond_jmp;
 731
 732                case BPF_JMP | BPF_JSET | BPF_K:
 733                        /* test dst_reg, imm32 */
 734                        EMIT1(add_1mod(0x48, dst_reg));
 735                        EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 736                        goto emit_cond_jmp;
 737
 738                case BPF_JMP | BPF_JEQ | BPF_K:
 739                case BPF_JMP | BPF_JNE | BPF_K:
 740                case BPF_JMP | BPF_JGT | BPF_K:
 741                case BPF_JMP | BPF_JGE | BPF_K:
 742                case BPF_JMP | BPF_JSGT | BPF_K:
 743                case BPF_JMP | BPF_JSGE | BPF_K:
 744                        /* cmp dst_reg, imm8/32 */
 745                        EMIT1(add_1mod(0x48, dst_reg));
 746
 747                        if (is_imm8(imm32))
 748                                EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
 749                        else
 750                                EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
 751
 752emit_cond_jmp:          /* convert BPF opcode to x86 */
 753                        switch (BPF_OP(insn->code)) {
 754                        case BPF_JEQ:
 755                                jmp_cond = X86_JE;
 756                                break;
 757                        case BPF_JSET:
 758                        case BPF_JNE:
 759                                jmp_cond = X86_JNE;
 760                                break;
 761                        case BPF_JGT:
 762                                /* GT is unsigned '>', JA in x86 */
 763                                jmp_cond = X86_JA;
 764                                break;
 765                        case BPF_JGE:
 766                                /* GE is unsigned '>=', JAE in x86 */
 767                                jmp_cond = X86_JAE;
 768                                break;
 769                        case BPF_JSGT:
 770                                /* signed '>', GT in x86 */
 771                                jmp_cond = X86_JG;
 772                                break;
 773                        case BPF_JSGE:
 774                                /* signed '>=', GE in x86 */
 775                                jmp_cond = X86_JGE;
 776                                break;
 777                        default: /* to silence gcc warning */
 778                                return -EFAULT;
 779                        }
 780                        jmp_offset = addrs[i + insn->off] - addrs[i];
 781                        if (is_imm8(jmp_offset)) {
 782                                EMIT2(jmp_cond, jmp_offset);
 783                        } else if (is_simm32(jmp_offset)) {
 784                                EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
 785                        } else {
 786                                pr_err("cond_jmp gen bug %llx\n", jmp_offset);
 787                                return -EFAULT;
 788                        }
 789
 790                        break;
 791
 792                case BPF_JMP | BPF_JA:
 793                        jmp_offset = addrs[i + insn->off] - addrs[i];
 794                        if (!jmp_offset)
 795                                /* optimize out nop jumps */
 796                                break;
 797emit_jmp:
 798                        if (is_imm8(jmp_offset)) {
 799                                EMIT2(0xEB, jmp_offset);
 800                        } else if (is_simm32(jmp_offset)) {
 801                                EMIT1_off32(0xE9, jmp_offset);
 802                        } else {
 803                                pr_err("jmp gen bug %llx\n", jmp_offset);
 804                                return -EFAULT;
 805                        }
 806                        break;
 807
 808                case BPF_LD | BPF_IND | BPF_W:
 809                        func = sk_load_word;
 810                        goto common_load;
 811                case BPF_LD | BPF_ABS | BPF_W:
 812                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
 813common_load:
 814                        ctx->seen_ld_abs = seen_ld_abs = true;
 815                        jmp_offset = func - (image + addrs[i]);
 816                        if (!func || !is_simm32(jmp_offset)) {
 817                                pr_err("unsupported bpf func %d addr %p image %p\n",
 818                                       imm32, func, image);
 819                                return -EINVAL;
 820                        }
 821                        if (BPF_MODE(insn->code) == BPF_ABS) {
 822                                /* mov %esi, imm32 */
 823                                EMIT1_off32(0xBE, imm32);
 824                        } else {
 825                                /* mov %rsi, src_reg */
 826                                EMIT_mov(BPF_REG_2, src_reg);
 827                                if (imm32) {
 828                                        if (is_imm8(imm32))
 829                                                /* add %esi, imm8 */
 830                                                EMIT3(0x83, 0xC6, imm32);
 831                                        else
 832                                                /* add %esi, imm32 */
 833                                                EMIT2_off32(0x81, 0xC6, imm32);
 834                                }
 835                        }
 836                        /* skb pointer is in R6 (%rbx), it will be copied into
 837                         * %rdi if skb_copy_bits() call is necessary.
 838                         * sk_load_* helpers also use %r10 and %r9d.
 839                         * See bpf_jit.S
 840                         */
 841                        EMIT1_off32(0xE8, jmp_offset); /* call */
 842                        break;
 843
 844                case BPF_LD | BPF_IND | BPF_H:
 845                        func = sk_load_half;
 846                        goto common_load;
 847                case BPF_LD | BPF_ABS | BPF_H:
 848                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
 849                        goto common_load;
 850                case BPF_LD | BPF_IND | BPF_B:
 851                        func = sk_load_byte;
 852                        goto common_load;
 853                case BPF_LD | BPF_ABS | BPF_B:
 854                        func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
 855                        goto common_load;
 856
 857                case BPF_JMP | BPF_EXIT:
 858                        if (seen_exit) {
 859                                jmp_offset = ctx->cleanup_addr - addrs[i];
 860                                goto emit_jmp;
 861                        }
 862                        seen_exit = true;
 863                        /* update cleanup_addr */
 864                        ctx->cleanup_addr = proglen;
 865                        /* mov rbx, qword ptr [rbp-X] */
 866                        EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
 867                        /* mov r13, qword ptr [rbp-X] */
 868                        EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
 869                        /* mov r14, qword ptr [rbp-X] */
 870                        EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
 871                        /* mov r15, qword ptr [rbp-X] */
 872                        EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
 873
 874                        EMIT1(0xC9); /* leave */
 875                        EMIT1(0xC3); /* ret */
 876                        break;
 877
 878                default:
 879                        /* By design x64 JIT should support all BPF instructions
 880                         * This error will be seen if new instruction was added
 881                         * to interpreter, but not to JIT
 882                         * or if there is junk in bpf_prog
 883                         */
 884                        pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 885                        return -EINVAL;
 886                }
 887
 888                ilen = prog - temp;
 889                if (ilen > BPF_MAX_INSN_SIZE) {
 890                        pr_err("bpf_jit_compile fatal insn size error\n");
 891                        return -EFAULT;
 892                }
 893
 894                if (image) {
 895                        if (unlikely(proglen + ilen > oldproglen)) {
 896                                pr_err("bpf_jit_compile fatal error\n");
 897                                return -EFAULT;
 898                        }
 899                        memcpy(image + proglen, temp, ilen);
 900                }
 901                proglen += ilen;
 902                addrs[i] = proglen;
 903                prog = temp;
 904        }
 905        return proglen;
 906}
 907
 908void bpf_jit_compile(struct bpf_prog *prog)
 909{
 910}
 911
 912void bpf_int_jit_compile(struct bpf_prog *prog)
 913{
 914        struct bpf_binary_header *header = NULL;
 915        int proglen, oldproglen = 0;
 916        struct jit_context ctx = {};
 917        u8 *image = NULL;
 918        int *addrs;
 919        int pass;
 920        int i;
 921
 922        if (!bpf_jit_enable)
 923                return;
 924
 925        if (!prog || !prog->len)
 926                return;
 927
 928        addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
 929        if (!addrs)
 930                return;
 931
 932        /* Before first pass, make a rough estimation of addrs[]
 933         * each bpf instruction is translated to less than 64 bytes
 934         */
 935        for (proglen = 0, i = 0; i < prog->len; i++) {
 936                proglen += 64;
 937                addrs[i] = proglen;
 938        }
 939        ctx.cleanup_addr = proglen;
 940
 941        for (pass = 0; pass < 10; pass++) {
 942                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
 943                if (proglen <= 0) {
 944                        image = NULL;
 945                        if (header)
 946                                bpf_jit_binary_free(header);
 947                        goto out;
 948                }
 949                if (image) {
 950                        if (proglen != oldproglen) {
 951                                pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
 952                                       proglen, oldproglen);
 953                                goto out;
 954                        }
 955                        break;
 956                }
 957                if (proglen == oldproglen) {
 958                        header = bpf_jit_binary_alloc(proglen, &image,
 959                                                      1, jit_fill_hole);
 960                        if (!header)
 961                                goto out;
 962                }
 963                oldproglen = proglen;
 964        }
 965
 966        if (bpf_jit_enable > 1)
 967                bpf_jit_dump(prog->len, proglen, 0, image);
 968
 969        if (image) {
 970                bpf_flush_icache(header, image + proglen);
 971                set_memory_ro((unsigned long)header, header->pages);
 972                prog->bpf_func = (void *)image;
 973                prog->jited = true;
 974        }
 975out:
 976        kfree(addrs);
 977}
 978
 979void bpf_jit_free(struct bpf_prog *fp)
 980{
 981        unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 982        struct bpf_binary_header *header = (void *)addr;
 983
 984        if (!fp->jited)
 985                goto free_filter;
 986
 987        set_memory_rw(addr, header->pages);
 988        bpf_jit_binary_free(header);
 989
 990free_filter:
 991        bpf_prog_unlock_free(fp);
 992}
 993