linux/arch/x86/net/bpf_jit_comp32.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
   4 *
   5 * Author: Wang YanQing (udknight@gmail.com)
   6 * The code based on code and ideas from:
   7 * Eric Dumazet (eric.dumazet@gmail.com)
   8 * and from:
   9 * Shubham Bansal <illusionist.neo@gmail.com>
  10 */
  11
  12#include <linux/netdevice.h>
  13#include <linux/filter.h>
  14#include <linux/if_vlan.h>
  15#include <asm/cacheflush.h>
  16#include <asm/set_memory.h>
  17#include <asm/nospec-branch.h>
  18#include <linux/bpf.h>
  19
  20/*
  21 * eBPF prog stack layout:
  22 *
  23 *                         high
  24 * original ESP =>        +-----+
  25 *                        |     | callee saved registers
  26 *                        +-----+
  27 *                        | ... | eBPF JIT scratch space
  28 * BPF_FP,IA32_EBP  =>    +-----+
  29 *                        | ... | eBPF prog stack
  30 *                        +-----+
  31 *                        |RSVD | JIT scratchpad
  32 * current ESP =>         +-----+
  33 *                        |     |
  34 *                        | ... | Function call stack
  35 *                        |     |
  36 *                        +-----+
  37 *                          low
  38 *
  39 * The callee saved registers:
  40 *
  41 *                                high
  42 * original ESP =>        +------------------+ \
  43 *                        |        ebp       | |
  44 * current EBP =>         +------------------+ } callee saved registers
  45 *                        |    ebx,esi,edi   | |
  46 *                        +------------------+ /
  47 *                                low
  48 */
  49
  50static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  51{
  52        if (len == 1)
  53                *ptr = bytes;
  54        else if (len == 2)
  55                *(u16 *)ptr = bytes;
  56        else {
  57                *(u32 *)ptr = bytes;
  58                barrier();
  59        }
  60        return ptr + len;
  61}
  62
  63#define EMIT(bytes, len) \
  64        do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
  65
  66#define EMIT1(b1)               EMIT(b1, 1)
  67#define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
  68#define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  69#define EMIT4(b1, b2, b3, b4)   \
  70        EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  71
  72#define EMIT1_off32(b1, off) \
  73        do { EMIT1(b1); EMIT(off, 4); } while (0)
  74#define EMIT2_off32(b1, b2, off) \
  75        do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
  76#define EMIT3_off32(b1, b2, b3, off) \
  77        do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  78#define EMIT4_off32(b1, b2, b3, b4, off) \
  79        do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  80
  81#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
  82
  83static bool is_imm8(int value)
  84{
  85        return value <= 127 && value >= -128;
  86}
  87
  88static bool is_simm32(s64 value)
  89{
  90        return value == (s64) (s32) value;
  91}
  92
  93#define STACK_OFFSET(k) (k)
  94#define TCALL_CNT       (MAX_BPF_JIT_REG + 0)   /* Tail Call Count */
  95
  96#define IA32_EAX        (0x0)
  97#define IA32_EBX        (0x3)
  98#define IA32_ECX        (0x1)
  99#define IA32_EDX        (0x2)
 100#define IA32_ESI        (0x6)
 101#define IA32_EDI        (0x7)
 102#define IA32_EBP        (0x5)
 103#define IA32_ESP        (0x4)
 104
 105/*
 106 * List of x86 cond jumps opcodes (. + s8)
 107 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
 108 */
 109#define IA32_JB  0x72
 110#define IA32_JAE 0x73
 111#define IA32_JE  0x74
 112#define IA32_JNE 0x75
 113#define IA32_JBE 0x76
 114#define IA32_JA  0x77
 115#define IA32_JL  0x7C
 116#define IA32_JGE 0x7D
 117#define IA32_JLE 0x7E
 118#define IA32_JG  0x7F
 119
 120/*
 121 * Map eBPF registers to IA32 32bit registers or stack scratch space.
 122 *
 123 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
 124 * 2. We need two 64 bit temp registers to do complex operations on eBPF
 125 *    registers.
 126 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
 127 *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
 128 *
 129 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
 130 * registers, we have to map each eBPF registers with two IA32 32 bit regs
 131 * or scratch memory space and we have to build eBPF 64 bit register from those.
 132 *
 133 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
 134 */
 135static const u8 bpf2ia32[][2] = {
 136        /* Return value from in-kernel function, and exit value from eBPF */
 137        [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
 138
 139        /* The arguments from eBPF program to in-kernel function */
 140        /* Stored on stack scratch space */
 141        [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
 142        [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
 143        [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
 144        [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
 145        [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
 146
 147        /* Callee saved registers that in-kernel function will preserve */
 148        /* Stored on stack scratch space */
 149        [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
 150        [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
 151        [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
 152        [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
 153
 154        /* Read only Frame Pointer to access Stack */
 155        [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
 156
 157        /* Temporary register for blinding constants. */
 158        [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
 159
 160        /* Tail call count. Stored on stack scratch space. */
 161        [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
 162};
 163
 164#define dst_lo  dst[0]
 165#define dst_hi  dst[1]
 166#define src_lo  src[0]
 167#define src_hi  src[1]
 168
 169#define STACK_ALIGNMENT 8
 170/*
 171 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
 172 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
 173 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
 174 */
 175#define SCRATCH_SIZE 96
 176
 177/* Total stack size used in JITed code */
 178#define _STACK_SIZE     (stack_depth + SCRATCH_SIZE)
 179
 180#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 181
 182/* Get the offset of eBPF REGISTERs stored on scratch space. */
 183#define STACK_VAR(off) (off)
 184
 185/* Encode 'dst_reg' register into IA32 opcode 'byte' */
 186static u8 add_1reg(u8 byte, u32 dst_reg)
 187{
 188        return byte + dst_reg;
 189}
 190
 191/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
 192static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 193{
 194        return byte + dst_reg + (src_reg << 3);
 195}
 196
 197static void jit_fill_hole(void *area, unsigned int size)
 198{
 199        /* Fill whole space with int3 instructions */
 200        memset(area, 0xcc, size);
 201}
 202
 203static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
 204                                   u8 **pprog)
 205{
 206        u8 *prog = *pprog;
 207        int cnt = 0;
 208
 209        if (dstk) {
 210                if (val == 0) {
 211                        /* xor eax,eax */
 212                        EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
 213                        /* mov dword ptr [ebp+off],eax */
 214                        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 215                              STACK_VAR(dst));
 216                } else {
 217                        EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
 218                                    STACK_VAR(dst), val);
 219                }
 220        } else {
 221                if (val == 0)
 222                        EMIT2(0x33, add_2reg(0xC0, dst, dst));
 223                else
 224                        EMIT2_off32(0xC7, add_1reg(0xC0, dst),
 225                                    val);
 226        }
 227        *pprog = prog;
 228}
 229
 230/* dst = imm (4 bytes)*/
 231static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
 232                                   bool sstk, u8 **pprog)
 233{
 234        u8 *prog = *pprog;
 235        int cnt = 0;
 236        u8 sreg = sstk ? IA32_EAX : src;
 237
 238        if (sstk)
 239                /* mov eax,dword ptr [ebp+off] */
 240                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
 241        if (dstk)
 242                /* mov dword ptr [ebp+off],eax */
 243                EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
 244        else
 245                /* mov dst,sreg */
 246                EMIT2(0x89, add_2reg(0xC0, dst, sreg));
 247
 248        *pprog = prog;
 249}
 250
 251/* dst = src */
 252static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
 253                                     const u8 src[], bool dstk,
 254                                     bool sstk, u8 **pprog)
 255{
 256        emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
 257        if (is64)
 258                /* complete 8 byte move */
 259                emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
 260        else
 261                /* zero out high 4 bytes */
 262                emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
 263}
 264
 265/* Sign extended move */
 266static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
 267                                     const u32 val, bool dstk, u8 **pprog)
 268{
 269        u32 hi = 0;
 270
 271        if (is64 && (val & (1<<31)))
 272                hi = (u32)~0;
 273        emit_ia32_mov_i(dst_lo, val, dstk, pprog);
 274        emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
 275}
 276
 277/*
 278 * ALU operation (32 bit)
 279 * dst = dst * src
 280 */
 281static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
 282                                   bool sstk, u8 **pprog)
 283{
 284        u8 *prog = *pprog;
 285        int cnt = 0;
 286        u8 sreg = sstk ? IA32_ECX : src;
 287
 288        if (sstk)
 289                /* mov ecx,dword ptr [ebp+off] */
 290                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
 291
 292        if (dstk)
 293                /* mov eax,dword ptr [ebp+off] */
 294                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 295        else
 296                /* mov eax,dst */
 297                EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
 298
 299
 300        EMIT2(0xF7, add_1reg(0xE0, sreg));
 301
 302        if (dstk)
 303                /* mov dword ptr [ebp+off],eax */
 304                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 305                      STACK_VAR(dst));
 306        else
 307                /* mov dst,eax */
 308                EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
 309
 310        *pprog = prog;
 311}
 312
 313static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
 314                                         bool dstk, u8 **pprog)
 315{
 316        u8 *prog = *pprog;
 317        int cnt = 0;
 318        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 319        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 320
 321        if (dstk && val != 64) {
 322                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 323                      STACK_VAR(dst_lo));
 324                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 325                      STACK_VAR(dst_hi));
 326        }
 327        switch (val) {
 328        case 16:
 329                /*
 330                 * Emit 'movzwl eax,ax' to zero extend 16-bit
 331                 * into 64 bit
 332                 */
 333                EMIT2(0x0F, 0xB7);
 334                EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
 335                /* xor dreg_hi,dreg_hi */
 336                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 337                break;
 338        case 32:
 339                /* xor dreg_hi,dreg_hi */
 340                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 341                break;
 342        case 64:
 343                /* nop */
 344                break;
 345        }
 346
 347        if (dstk && val != 64) {
 348                /* mov dword ptr [ebp+off],dreg_lo */
 349                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 350                      STACK_VAR(dst_lo));
 351                /* mov dword ptr [ebp+off],dreg_hi */
 352                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 353                      STACK_VAR(dst_hi));
 354        }
 355        *pprog = prog;
 356}
 357
 358static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
 359                                       bool dstk, u8 **pprog)
 360{
 361        u8 *prog = *pprog;
 362        int cnt = 0;
 363        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 364        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 365
 366        if (dstk) {
 367                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 368                      STACK_VAR(dst_lo));
 369                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 370                      STACK_VAR(dst_hi));
 371        }
 372        switch (val) {
 373        case 16:
 374                /* Emit 'ror %ax, 8' to swap lower 2 bytes */
 375                EMIT1(0x66);
 376                EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
 377
 378                EMIT2(0x0F, 0xB7);
 379                EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
 380
 381                /* xor dreg_hi,dreg_hi */
 382                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 383                break;
 384        case 32:
 385                /* Emit 'bswap eax' to swap lower 4 bytes */
 386                EMIT1(0x0F);
 387                EMIT1(add_1reg(0xC8, dreg_lo));
 388
 389                /* xor dreg_hi,dreg_hi */
 390                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 391                break;
 392        case 64:
 393                /* Emit 'bswap eax' to swap lower 4 bytes */
 394                EMIT1(0x0F);
 395                EMIT1(add_1reg(0xC8, dreg_lo));
 396
 397                /* Emit 'bswap edx' to swap lower 4 bytes */
 398                EMIT1(0x0F);
 399                EMIT1(add_1reg(0xC8, dreg_hi));
 400
 401                /* mov ecx,dreg_hi */
 402                EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
 403                /* mov dreg_hi,dreg_lo */
 404                EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
 405                /* mov dreg_lo,ecx */
 406                EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
 407
 408                break;
 409        }
 410        if (dstk) {
 411                /* mov dword ptr [ebp+off],dreg_lo */
 412                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 413                      STACK_VAR(dst_lo));
 414                /* mov dword ptr [ebp+off],dreg_hi */
 415                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 416                      STACK_VAR(dst_hi));
 417        }
 418        *pprog = prog;
 419}
 420
 421/*
 422 * ALU operation (32 bit)
 423 * dst = dst (div|mod) src
 424 */
 425static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
 426                                       bool dstk, bool sstk, u8 **pprog)
 427{
 428        u8 *prog = *pprog;
 429        int cnt = 0;
 430
 431        if (sstk)
 432                /* mov ecx,dword ptr [ebp+off] */
 433                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 434                      STACK_VAR(src));
 435        else if (src != IA32_ECX)
 436                /* mov ecx,src */
 437                EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
 438
 439        if (dstk)
 440                /* mov eax,dword ptr [ebp+off] */
 441                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 442                      STACK_VAR(dst));
 443        else
 444                /* mov eax,dst */
 445                EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
 446
 447        /* xor edx,edx */
 448        EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
 449        /* div ecx */
 450        EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
 451
 452        if (op == BPF_MOD) {
 453                if (dstk)
 454                        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
 455                              STACK_VAR(dst));
 456                else
 457                        EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
 458        } else {
 459                if (dstk)
 460                        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 461                              STACK_VAR(dst));
 462                else
 463                        EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
 464        }
 465        *pprog = prog;
 466}
 467
 468/*
 469 * ALU operation (32 bit)
 470 * dst = dst (shift) src
 471 */
 472static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
 473                                     bool dstk, bool sstk, u8 **pprog)
 474{
 475        u8 *prog = *pprog;
 476        int cnt = 0;
 477        u8 dreg = dstk ? IA32_EAX : dst;
 478        u8 b2;
 479
 480        if (dstk)
 481                /* mov eax,dword ptr [ebp+off] */
 482                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 483
 484        if (sstk)
 485                /* mov ecx,dword ptr [ebp+off] */
 486                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
 487        else if (src != IA32_ECX)
 488                /* mov ecx,src */
 489                EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
 490
 491        switch (op) {
 492        case BPF_LSH:
 493                b2 = 0xE0; break;
 494        case BPF_RSH:
 495                b2 = 0xE8; break;
 496        case BPF_ARSH:
 497                b2 = 0xF8; break;
 498        default:
 499                return;
 500        }
 501        EMIT2(0xD3, add_1reg(b2, dreg));
 502
 503        if (dstk)
 504                /* mov dword ptr [ebp+off],dreg */
 505                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
 506        *pprog = prog;
 507}
 508
 509/*
 510 * ALU operation (32 bit)
 511 * dst = dst (op) src
 512 */
 513static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
 514                                   const u8 dst, const u8 src, bool dstk,
 515                                   bool sstk, u8 **pprog)
 516{
 517        u8 *prog = *pprog;
 518        int cnt = 0;
 519        u8 sreg = sstk ? IA32_EAX : src;
 520        u8 dreg = dstk ? IA32_EDX : dst;
 521
 522        if (sstk)
 523                /* mov eax,dword ptr [ebp+off] */
 524                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
 525
 526        if (dstk)
 527                /* mov eax,dword ptr [ebp+off] */
 528                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
 529
 530        switch (BPF_OP(op)) {
 531        /* dst = dst + src */
 532        case BPF_ADD:
 533                if (hi && is64)
 534                        EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
 535                else
 536                        EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
 537                break;
 538        /* dst = dst - src */
 539        case BPF_SUB:
 540                if (hi && is64)
 541                        EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
 542                else
 543                        EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
 544                break;
 545        /* dst = dst | src */
 546        case BPF_OR:
 547                EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
 548                break;
 549        /* dst = dst & src */
 550        case BPF_AND:
 551                EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
 552                break;
 553        /* dst = dst ^ src */
 554        case BPF_XOR:
 555                EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
 556                break;
 557        }
 558
 559        if (dstk)
 560                /* mov dword ptr [ebp+off],dreg */
 561                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
 562                      STACK_VAR(dst));
 563        *pprog = prog;
 564}
 565
 566/* ALU operation (64 bit) */
 567static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
 568                                     const u8 dst[], const u8 src[],
 569                                     bool dstk,  bool sstk,
 570                                     u8 **pprog)
 571{
 572        u8 *prog = *pprog;
 573
 574        emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
 575        if (is64)
 576                emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
 577                                &prog);
 578        else
 579                emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
 580        *pprog = prog;
 581}
 582
 583/*
 584 * ALU operation (32 bit)
 585 * dst = dst (op) val
 586 */
 587static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
 588                                   const u8 dst, const s32 val, bool dstk,
 589                                   u8 **pprog)
 590{
 591        u8 *prog = *pprog;
 592        int cnt = 0;
 593        u8 dreg = dstk ? IA32_EAX : dst;
 594        u8 sreg = IA32_EDX;
 595
 596        if (dstk)
 597                /* mov eax,dword ptr [ebp+off] */
 598                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 599
 600        if (!is_imm8(val))
 601                /* mov edx,imm32*/
 602                EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
 603
 604        switch (op) {
 605        /* dst = dst + val */
 606        case BPF_ADD:
 607                if (hi && is64) {
 608                        if (is_imm8(val))
 609                                EMIT3(0x83, add_1reg(0xD0, dreg), val);
 610                        else
 611                                EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
 612                } else {
 613                        if (is_imm8(val))
 614                                EMIT3(0x83, add_1reg(0xC0, dreg), val);
 615                        else
 616                                EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
 617                }
 618                break;
 619        /* dst = dst - val */
 620        case BPF_SUB:
 621                if (hi && is64) {
 622                        if (is_imm8(val))
 623                                EMIT3(0x83, add_1reg(0xD8, dreg), val);
 624                        else
 625                                EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
 626                } else {
 627                        if (is_imm8(val))
 628                                EMIT3(0x83, add_1reg(0xE8, dreg), val);
 629                        else
 630                                EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
 631                }
 632                break;
 633        /* dst = dst | val */
 634        case BPF_OR:
 635                if (is_imm8(val))
 636                        EMIT3(0x83, add_1reg(0xC8, dreg), val);
 637                else
 638                        EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
 639                break;
 640        /* dst = dst & val */
 641        case BPF_AND:
 642                if (is_imm8(val))
 643                        EMIT3(0x83, add_1reg(0xE0, dreg), val);
 644                else
 645                        EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
 646                break;
 647        /* dst = dst ^ val */
 648        case BPF_XOR:
 649                if (is_imm8(val))
 650                        EMIT3(0x83, add_1reg(0xF0, dreg), val);
 651                else
 652                        EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
 653                break;
 654        case BPF_NEG:
 655                EMIT2(0xF7, add_1reg(0xD8, dreg));
 656                break;
 657        }
 658
 659        if (dstk)
 660                /* mov dword ptr [ebp+off],dreg */
 661                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
 662                      STACK_VAR(dst));
 663        *pprog = prog;
 664}
 665
 666/* ALU operation (64 bit) */
 667static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
 668                                     const u8 dst[], const u32 val,
 669                                     bool dstk, u8 **pprog)
 670{
 671        u8 *prog = *pprog;
 672        u32 hi = 0;
 673
 674        if (is64 && (val & (1<<31)))
 675                hi = (u32)~0;
 676
 677        emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
 678        if (is64)
 679                emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
 680        else
 681                emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
 682
 683        *pprog = prog;
 684}
 685
 686/* dst = ~dst (64 bit) */
 687static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
 688{
 689        u8 *prog = *pprog;
 690        int cnt = 0;
 691        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 692        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 693
 694        if (dstk) {
 695                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 696                      STACK_VAR(dst_lo));
 697                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 698                      STACK_VAR(dst_hi));
 699        }
 700
 701        /* xor ecx,ecx */
 702        EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
 703        /* sub dreg_lo,ecx */
 704        EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
 705        /* mov dreg_lo,ecx */
 706        EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
 707
 708        /* xor ecx,ecx */
 709        EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
 710        /* sbb dreg_hi,ecx */
 711        EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
 712        /* mov dreg_hi,ecx */
 713        EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
 714
 715        if (dstk) {
 716                /* mov dword ptr [ebp+off],dreg_lo */
 717                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 718                      STACK_VAR(dst_lo));
 719                /* mov dword ptr [ebp+off],dreg_hi */
 720                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 721                      STACK_VAR(dst_hi));
 722        }
 723        *pprog = prog;
 724}
 725
 726/* dst = dst << src */
 727static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
 728                                     bool dstk, bool sstk, u8 **pprog)
 729{
 730        u8 *prog = *pprog;
 731        int cnt = 0;
 732        static int jmp_label1 = -1;
 733        static int jmp_label2 = -1;
 734        static int jmp_label3 = -1;
 735        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 736        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 737
 738        if (dstk) {
 739                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 740                      STACK_VAR(dst_lo));
 741                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 742                      STACK_VAR(dst_hi));
 743        }
 744
 745        if (sstk)
 746                /* mov ecx,dword ptr [ebp+off] */
 747                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 748                      STACK_VAR(src_lo));
 749        else
 750                /* mov ecx,src_lo */
 751                EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 752
 753        /* cmp ecx,32 */
 754        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 755        /* Jumps when >= 32 */
 756        if (is_imm8(jmp_label(jmp_label1, 2)))
 757                EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
 758        else
 759                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
 760
 761        /* < 32 */
 762        /* shl dreg_hi,cl */
 763        EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
 764        /* mov ebx,dreg_lo */
 765        EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
 766        /* shl dreg_lo,cl */
 767        EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
 768
 769        /* IA32_ECX = -IA32_ECX + 32 */
 770        /* neg ecx */
 771        EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
 772        /* add ecx,32 */
 773        EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
 774
 775        /* shr ebx,cl */
 776        EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
 777        /* or dreg_hi,ebx */
 778        EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
 779
 780        /* goto out; */
 781        if (is_imm8(jmp_label(jmp_label3, 2)))
 782                EMIT2(0xEB, jmp_label(jmp_label3, 2));
 783        else
 784                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
 785
 786        /* >= 32 */
 787        if (jmp_label1 == -1)
 788                jmp_label1 = cnt;
 789
 790        /* cmp ecx,64 */
 791        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
 792        /* Jumps when >= 64 */
 793        if (is_imm8(jmp_label(jmp_label2, 2)))
 794                EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
 795        else
 796                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
 797
 798        /* >= 32 && < 64 */
 799        /* sub ecx,32 */
 800        EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
 801        /* shl dreg_lo,cl */
 802        EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
 803        /* mov dreg_hi,dreg_lo */
 804        EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
 805
 806        /* xor dreg_lo,dreg_lo */
 807        EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 808
 809        /* goto out; */
 810        if (is_imm8(jmp_label(jmp_label3, 2)))
 811                EMIT2(0xEB, jmp_label(jmp_label3, 2));
 812        else
 813                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
 814
 815        /* >= 64 */
 816        if (jmp_label2 == -1)
 817                jmp_label2 = cnt;
 818        /* xor dreg_lo,dreg_lo */
 819        EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 820        /* xor dreg_hi,dreg_hi */
 821        EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 822
 823        if (jmp_label3 == -1)
 824                jmp_label3 = cnt;
 825
 826        if (dstk) {
 827                /* mov dword ptr [ebp+off],dreg_lo */
 828                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 829                      STACK_VAR(dst_lo));
 830                /* mov dword ptr [ebp+off],dreg_hi */
 831                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 832                      STACK_VAR(dst_hi));
 833        }
 834        /* out: */
 835        *pprog = prog;
 836}
 837
 838/* dst = dst >> src (signed)*/
 839static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
 840                                      bool dstk, bool sstk, u8 **pprog)
 841{
 842        u8 *prog = *pprog;
 843        int cnt = 0;
 844        static int jmp_label1 = -1;
 845        static int jmp_label2 = -1;
 846        static int jmp_label3 = -1;
 847        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 848        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 849
 850        if (dstk) {
 851                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 852                      STACK_VAR(dst_lo));
 853                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 854                      STACK_VAR(dst_hi));
 855        }
 856
 857        if (sstk)
 858                /* mov ecx,dword ptr [ebp+off] */
 859                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 860                      STACK_VAR(src_lo));
 861        else
 862                /* mov ecx,src_lo */
 863                EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 864
 865        /* cmp ecx,32 */
 866        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 867        /* Jumps when >= 32 */
 868        if (is_imm8(jmp_label(jmp_label1, 2)))
 869                EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
 870        else
 871                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
 872
 873        /* < 32 */
 874        /* lshr dreg_lo,cl */
 875        EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
 876        /* mov ebx,dreg_hi */
 877        EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
 878        /* ashr dreg_hi,cl */
 879        EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
 880
 881        /* IA32_ECX = -IA32_ECX + 32 */
 882        /* neg ecx */
 883        EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
 884        /* add ecx,32 */
 885        EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
 886
 887        /* shl ebx,cl */
 888        EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
 889        /* or dreg_lo,ebx */
 890        EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
 891
 892        /* goto out; */
 893        if (is_imm8(jmp_label(jmp_label3, 2)))
 894                EMIT2(0xEB, jmp_label(jmp_label3, 2));
 895        else
 896                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
 897
 898        /* >= 32 */
 899        if (jmp_label1 == -1)
 900                jmp_label1 = cnt;
 901
 902        /* cmp ecx,64 */
 903        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
 904        /* Jumps when >= 64 */
 905        if (is_imm8(jmp_label(jmp_label2, 2)))
 906                EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
 907        else
 908                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
 909
 910        /* >= 32 && < 64 */
 911        /* sub ecx,32 */
 912        EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
 913        /* ashr dreg_hi,cl */
 914        EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
 915        /* mov dreg_lo,dreg_hi */
 916        EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
 917
 918        /* ashr dreg_hi,imm8 */
 919        EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
 920
 921        /* goto out; */
 922        if (is_imm8(jmp_label(jmp_label3, 2)))
 923                EMIT2(0xEB, jmp_label(jmp_label3, 2));
 924        else
 925                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
 926
 927        /* >= 64 */
 928        if (jmp_label2 == -1)
 929                jmp_label2 = cnt;
 930        /* ashr dreg_hi,imm8 */
 931        EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
 932        /* mov dreg_lo,dreg_hi */
 933        EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
 934
 935        if (jmp_label3 == -1)
 936                jmp_label3 = cnt;
 937
 938        if (dstk) {
 939                /* mov dword ptr [ebp+off],dreg_lo */
 940                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 941                      STACK_VAR(dst_lo));
 942                /* mov dword ptr [ebp+off],dreg_hi */
 943                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 944                      STACK_VAR(dst_hi));
 945        }
 946        /* out: */
 947        *pprog = prog;
 948}
 949
 950/* dst = dst >> src */
 951static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
 952                                     bool sstk, u8 **pprog)
 953{
 954        u8 *prog = *pprog;
 955        int cnt = 0;
 956        static int jmp_label1 = -1;
 957        static int jmp_label2 = -1;
 958        static int jmp_label3 = -1;
 959        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 960        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 961
 962        if (dstk) {
 963                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 964                      STACK_VAR(dst_lo));
 965                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 966                      STACK_VAR(dst_hi));
 967        }
 968
 969        if (sstk)
 970                /* mov ecx,dword ptr [ebp+off] */
 971                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 972                      STACK_VAR(src_lo));
 973        else
 974                /* mov ecx,src_lo */
 975                EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 976
 977        /* cmp ecx,32 */
 978        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 979        /* Jumps when >= 32 */
 980        if (is_imm8(jmp_label(jmp_label1, 2)))
 981                EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
 982        else
 983                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
 984
 985        /* < 32 */
 986        /* lshr dreg_lo,cl */
 987        EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
 988        /* mov ebx,dreg_hi */
 989        EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
 990        /* shr dreg_hi,cl */
 991        EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
 992
 993        /* IA32_ECX = -IA32_ECX + 32 */
 994        /* neg ecx */
 995        EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
 996        /* add ecx,32 */
 997        EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
 998
 999        /* shl ebx,cl */
1000        EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1001        /* or dreg_lo,ebx */
1002        EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1003
1004        /* goto out; */
1005        if (is_imm8(jmp_label(jmp_label3, 2)))
1006                EMIT2(0xEB, jmp_label(jmp_label3, 2));
1007        else
1008                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1009
1010        /* >= 32 */
1011        if (jmp_label1 == -1)
1012                jmp_label1 = cnt;
1013        /* cmp ecx,64 */
1014        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
1015        /* Jumps when >= 64 */
1016        if (is_imm8(jmp_label(jmp_label2, 2)))
1017                EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
1018        else
1019                EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
1020
1021        /* >= 32 && < 64 */
1022        /* sub ecx,32 */
1023        EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
1024        /* shr dreg_hi,cl */
1025        EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
1026        /* mov dreg_lo,dreg_hi */
1027        EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1028        /* xor dreg_hi,dreg_hi */
1029        EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1030
1031        /* goto out; */
1032        if (is_imm8(jmp_label(jmp_label3, 2)))
1033                EMIT2(0xEB, jmp_label(jmp_label3, 2));
1034        else
1035                EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1036
1037        /* >= 64 */
1038        if (jmp_label2 == -1)
1039                jmp_label2 = cnt;
1040        /* xor dreg_lo,dreg_lo */
1041        EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1042        /* xor dreg_hi,dreg_hi */
1043        EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1044
1045        if (jmp_label3 == -1)
1046                jmp_label3 = cnt;
1047
1048        if (dstk) {
1049                /* mov dword ptr [ebp+off],dreg_lo */
1050                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1051                      STACK_VAR(dst_lo));
1052                /* mov dword ptr [ebp+off],dreg_hi */
1053                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1054                      STACK_VAR(dst_hi));
1055        }
1056        /* out: */
1057        *pprog = prog;
1058}
1059
1060/* dst = dst << val */
1061static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
1062                                     bool dstk, u8 **pprog)
1063{
1064        u8 *prog = *pprog;
1065        int cnt = 0;
1066        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1067        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1068
1069        if (dstk) {
1070                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1071                      STACK_VAR(dst_lo));
1072                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1073                      STACK_VAR(dst_hi));
1074        }
1075        /* Do LSH operation */
1076        if (val < 32) {
1077                /* shl dreg_hi,imm8 */
1078                EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
1079                /* mov ebx,dreg_lo */
1080                EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
1081                /* shl dreg_lo,imm8 */
1082                EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
1083
1084                /* IA32_ECX = 32 - val */
1085                /* mov ecx,val */
1086                EMIT2(0xB1, val);
1087                /* movzx ecx,ecx */
1088                EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1089                /* neg ecx */
1090                EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1091                /* add ecx,32 */
1092                EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1093
1094                /* shr ebx,cl */
1095                EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
1096                /* or dreg_hi,ebx */
1097                EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
1098        } else if (val >= 32 && val < 64) {
1099                u32 value = val - 32;
1100
1101                /* shl dreg_lo,imm8 */
1102                EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
1103                /* mov dreg_hi,dreg_lo */
1104                EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
1105                /* xor dreg_lo,dreg_lo */
1106                EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1107        } else {
1108                /* xor dreg_lo,dreg_lo */
1109                EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1110                /* xor dreg_hi,dreg_hi */
1111                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1112        }
1113
1114        if (dstk) {
1115                /* mov dword ptr [ebp+off],dreg_lo */
1116                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1117                      STACK_VAR(dst_lo));
1118                /* mov dword ptr [ebp+off],dreg_hi */
1119                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1120                      STACK_VAR(dst_hi));
1121        }
1122        *pprog = prog;
1123}
1124
1125/* dst = dst >> val */
1126static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
1127                                     bool dstk, u8 **pprog)
1128{
1129        u8 *prog = *pprog;
1130        int cnt = 0;
1131        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1132        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1133
1134        if (dstk) {
1135                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1136                      STACK_VAR(dst_lo));
1137                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1138                      STACK_VAR(dst_hi));
1139        }
1140
1141        /* Do RSH operation */
1142        if (val < 32) {
1143                /* shr dreg_lo,imm8 */
1144                EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1145                /* mov ebx,dreg_hi */
1146                EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1147                /* shr dreg_hi,imm8 */
1148                EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
1149
1150                /* IA32_ECX = 32 - val */
1151                /* mov ecx,val */
1152                EMIT2(0xB1, val);
1153                /* movzx ecx,ecx */
1154                EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1155                /* neg ecx */
1156                EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1157                /* add ecx,32 */
1158                EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1159
1160                /* shl ebx,cl */
1161                EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1162                /* or dreg_lo,ebx */
1163                EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1164        } else if (val >= 32 && val < 64) {
1165                u32 value = val - 32;
1166
1167                /* shr dreg_hi,imm8 */
1168                EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
1169                /* mov dreg_lo,dreg_hi */
1170                EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1171                /* xor dreg_hi,dreg_hi */
1172                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1173        } else {
1174                /* xor dreg_lo,dreg_lo */
1175                EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1176                /* xor dreg_hi,dreg_hi */
1177                EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1178        }
1179
1180        if (dstk) {
1181                /* mov dword ptr [ebp+off],dreg_lo */
1182                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1183                      STACK_VAR(dst_lo));
1184                /* mov dword ptr [ebp+off],dreg_hi */
1185                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1186                      STACK_VAR(dst_hi));
1187        }
1188        *pprog = prog;
1189}
1190
1191/* dst = dst >> val (signed) */
1192static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
1193                                      bool dstk, u8 **pprog)
1194{
1195        u8 *prog = *pprog;
1196        int cnt = 0;
1197        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1198        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1199
1200        if (dstk) {
1201                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1202                      STACK_VAR(dst_lo));
1203                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1204                      STACK_VAR(dst_hi));
1205        }
1206        /* Do RSH operation */
1207        if (val < 32) {
1208                /* shr dreg_lo,imm8 */
1209                EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1210                /* mov ebx,dreg_hi */
1211                EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1212                /* ashr dreg_hi,imm8 */
1213                EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1214
1215                /* IA32_ECX = 32 - val */
1216                /* mov ecx,val */
1217                EMIT2(0xB1, val);
1218                /* movzx ecx,ecx */
1219                EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1220                /* neg ecx */
1221                EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1222                /* add ecx,32 */
1223                EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1224
1225                /* shl ebx,cl */
1226                EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1227                /* or dreg_lo,ebx */
1228                EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1229        } else if (val >= 32 && val < 64) {
1230                u32 value = val - 32;
1231
1232                /* ashr dreg_hi,imm8 */
1233                EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1234                /* mov dreg_lo,dreg_hi */
1235                EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1236
1237                /* ashr dreg_hi,imm8 */
1238                EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1239        } else {
1240                /* ashr dreg_hi,imm8 */
1241                EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1242                /* mov dreg_lo,dreg_hi */
1243                EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1244        }
1245
1246        if (dstk) {
1247                /* mov dword ptr [ebp+off],dreg_lo */
1248                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1249                      STACK_VAR(dst_lo));
1250                /* mov dword ptr [ebp+off],dreg_hi */
1251                EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1252                      STACK_VAR(dst_hi));
1253        }
1254        *pprog = prog;
1255}
1256
1257static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1258                                     bool sstk, u8 **pprog)
1259{
1260        u8 *prog = *pprog;
1261        int cnt = 0;
1262
1263        if (dstk)
1264                /* mov eax,dword ptr [ebp+off] */
1265                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1266                      STACK_VAR(dst_hi));
1267        else
1268                /* mov eax,dst_hi */
1269                EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1270
1271        if (sstk)
1272                /* mul dword ptr [ebp+off] */
1273                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1274        else
1275                /* mul src_lo */
1276                EMIT2(0xF7, add_1reg(0xE0, src_lo));
1277
1278        /* mov ecx,eax */
1279        EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1280
1281        if (dstk)
1282                /* mov eax,dword ptr [ebp+off] */
1283                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1284                      STACK_VAR(dst_lo));
1285        else
1286                /* mov eax,dst_lo */
1287                EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1288
1289        if (sstk)
1290                /* mul dword ptr [ebp+off] */
1291                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1292        else
1293                /* mul src_hi */
1294                EMIT2(0xF7, add_1reg(0xE0, src_hi));
1295
1296        /* add eax,eax */
1297        EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1298
1299        if (dstk)
1300                /* mov eax,dword ptr [ebp+off] */
1301                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1302                      STACK_VAR(dst_lo));
1303        else
1304                /* mov eax,dst_lo */
1305                EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1306
1307        if (sstk)
1308                /* mul dword ptr [ebp+off] */
1309                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1310        else
1311                /* mul src_lo */
1312                EMIT2(0xF7, add_1reg(0xE0, src_lo));
1313
1314        /* add ecx,edx */
1315        EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1316
1317        if (dstk) {
1318                /* mov dword ptr [ebp+off],eax */
1319                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1320                      STACK_VAR(dst_lo));
1321                /* mov dword ptr [ebp+off],ecx */
1322                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1323                      STACK_VAR(dst_hi));
1324        } else {
1325                /* mov dst_lo,eax */
1326                EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1327                /* mov dst_hi,ecx */
1328                EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1329        }
1330
1331        *pprog = prog;
1332}
1333
1334static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1335                                     bool dstk, u8 **pprog)
1336{
1337        u8 *prog = *pprog;
1338        int cnt = 0;
1339        u32 hi;
1340
1341        hi = val & (1<<31) ? (u32)~0 : 0;
1342        /* movl eax,imm32 */
1343        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1344        if (dstk)
1345                /* mul dword ptr [ebp+off] */
1346                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1347        else
1348                /* mul dst_hi */
1349                EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1350
1351        /* mov ecx,eax */
1352        EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1353
1354        /* movl eax,imm32 */
1355        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1356        if (dstk)
1357                /* mul dword ptr [ebp+off] */
1358                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1359        else
1360                /* mul dst_lo */
1361                EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1362        /* add ecx,eax */
1363        EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1364
1365        /* movl eax,imm32 */
1366        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1367        if (dstk)
1368                /* mul dword ptr [ebp+off] */
1369                EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1370        else
1371                /* mul dst_lo */
1372                EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1373
1374        /* add ecx,edx */
1375        EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1376
1377        if (dstk) {
1378                /* mov dword ptr [ebp+off],eax */
1379                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1380                      STACK_VAR(dst_lo));
1381                /* mov dword ptr [ebp+off],ecx */
1382                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1383                      STACK_VAR(dst_hi));
1384        } else {
1385                /* mov dword ptr [ebp+off],eax */
1386                EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1387                /* mov dword ptr [ebp+off],ecx */
1388                EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1389        }
1390
1391        *pprog = prog;
1392}
1393
1394static int bpf_size_to_x86_bytes(int bpf_size)
1395{
1396        if (bpf_size == BPF_W)
1397                return 4;
1398        else if (bpf_size == BPF_H)
1399                return 2;
1400        else if (bpf_size == BPF_B)
1401                return 1;
1402        else if (bpf_size == BPF_DW)
1403                return 4; /* imm32 */
1404        else
1405                return 0;
1406}
1407
1408struct jit_context {
1409        int cleanup_addr; /* Epilogue code offset */
1410};
1411
1412/* Maximum number of bytes emitted while JITing one eBPF insn */
1413#define BPF_MAX_INSN_SIZE       128
1414#define BPF_INSN_SAFETY         64
1415
1416#define PROLOGUE_SIZE 35
1417
1418/*
1419 * Emit prologue code for BPF program and check it's size.
1420 * bpf_tail_call helper will skip it while jumping into another program.
1421 */
1422static void emit_prologue(u8 **pprog, u32 stack_depth)
1423{
1424        u8 *prog = *pprog;
1425        int cnt = 0;
1426        const u8 *r1 = bpf2ia32[BPF_REG_1];
1427        const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1428        const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1429        const u8 *tcc = bpf2ia32[TCALL_CNT];
1430
1431        /* push ebp */
1432        EMIT1(0x55);
1433        /* mov ebp,esp */
1434        EMIT2(0x89, 0xE5);
1435        /* push edi */
1436        EMIT1(0x57);
1437        /* push esi */
1438        EMIT1(0x56);
1439        /* push ebx */
1440        EMIT1(0x53);
1441
1442        /* sub esp,STACK_SIZE */
1443        EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1444        /* sub ebp,SCRATCH_SIZE+12*/
1445        EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1446        /* xor ebx,ebx */
1447        EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1448
1449        /* Set up BPF prog stack base register */
1450        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1451        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1452
1453        /* Move BPF_CTX (EAX) to BPF_REG_R1 */
1454        /* mov dword ptr [ebp+off],eax */
1455        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1456        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1457
1458        /* Initialize Tail Count */
1459        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1460        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1461
1462        BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1463        *pprog = prog;
1464}
1465
1466/* Emit epilogue code for BPF program */
1467static void emit_epilogue(u8 **pprog, u32 stack_depth)
1468{
1469        u8 *prog = *pprog;
1470        const u8 *r0 = bpf2ia32[BPF_REG_0];
1471        int cnt = 0;
1472
1473        /* mov eax,dword ptr [ebp+off]*/
1474        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1475        /* mov edx,dword ptr [ebp+off]*/
1476        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1477
1478        /* add ebp,SCRATCH_SIZE+12*/
1479        EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1480
1481        /* mov ebx,dword ptr [ebp-12]*/
1482        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1483        /* mov esi,dword ptr [ebp-8]*/
1484        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1485        /* mov edi,dword ptr [ebp-4]*/
1486        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1487
1488        EMIT1(0xC9); /* leave */
1489        EMIT1(0xC3); /* ret */
1490        *pprog = prog;
1491}
1492
1493/*
1494 * Generate the following code:
1495 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1496 *   if (index >= array->map.max_entries)
1497 *     goto out;
1498 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1499 *     goto out;
1500 *   prog = array->ptrs[index];
1501 *   if (prog == NULL)
1502 *     goto out;
1503 *   goto *(prog->bpf_func + prologue_size);
1504 * out:
1505 */
1506static void emit_bpf_tail_call(u8 **pprog)
1507{
1508        u8 *prog = *pprog;
1509        int cnt = 0;
1510        const u8 *r1 = bpf2ia32[BPF_REG_1];
1511        const u8 *r2 = bpf2ia32[BPF_REG_2];
1512        const u8 *r3 = bpf2ia32[BPF_REG_3];
1513        const u8 *tcc = bpf2ia32[TCALL_CNT];
1514        u32 lo, hi;
1515        static int jmp_label1 = -1;
1516
1517        /*
1518         * if (index >= array->map.max_entries)
1519         *     goto out;
1520         */
1521        /* mov eax,dword ptr [ebp+off] */
1522        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1523        /* mov edx,dword ptr [ebp+off] */
1524        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1525
1526        /* cmp dword ptr [eax+off],edx */
1527        EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1528              offsetof(struct bpf_array, map.max_entries));
1529        /* jbe out */
1530        EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1531
1532        /*
1533         * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1534         *     goto out;
1535         */
1536        lo = (u32)MAX_TAIL_CALL_CNT;
1537        hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1538        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1539        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1540
1541        /* cmp edx,hi */
1542        EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1543        EMIT2(IA32_JNE, 3);
1544        /* cmp ecx,lo */
1545        EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1546
1547        /* ja out */
1548        EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1549
1550        /* add eax,0x1 */
1551        EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1552        /* adc ebx,0x0 */
1553        EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1554
1555        /* mov dword ptr [ebp+off],eax */
1556        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1557        /* mov dword ptr [ebp+off],edx */
1558        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1559
1560        /* prog = array->ptrs[index]; */
1561        /* mov edx, [eax + edx * 4 + offsetof(...)] */
1562        EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1563
1564        /*
1565         * if (prog == NULL)
1566         *     goto out;
1567         */
1568        /* test edx,edx */
1569        EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1570        /* je out */
1571        EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1572
1573        /* goto *(prog->bpf_func + prologue_size); */
1574        /* mov edx, dword ptr [edx + 32] */
1575        EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1576              offsetof(struct bpf_prog, bpf_func));
1577        /* add edx,prologue_size */
1578        EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1579
1580        /* mov eax,dword ptr [ebp+off] */
1581        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1582
1583        /*
1584         * Now we're ready to jump into next BPF program:
1585         * eax == ctx (1st arg)
1586         * edx == prog->bpf_func + prologue_size
1587         */
1588        RETPOLINE_EDX_BPF_JIT();
1589
1590        if (jmp_label1 == -1)
1591                jmp_label1 = cnt;
1592
1593        /* out: */
1594        *pprog = prog;
1595}
1596
1597/* Push the scratch stack register on top of the stack. */
1598static inline void emit_push_r64(const u8 src[], u8 **pprog)
1599{
1600        u8 *prog = *pprog;
1601        int cnt = 0;
1602
1603        /* mov ecx,dword ptr [ebp+off] */
1604        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1605        /* push ecx */
1606        EMIT1(0x51);
1607
1608        /* mov ecx,dword ptr [ebp+off] */
1609        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1610        /* push ecx */
1611        EMIT1(0x51);
1612
1613        *pprog = prog;
1614}
1615
1616static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1617                  int oldproglen, struct jit_context *ctx)
1618{
1619        struct bpf_insn *insn = bpf_prog->insnsi;
1620        int insn_cnt = bpf_prog->len;
1621        bool seen_exit = false;
1622        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1623        int i, cnt = 0;
1624        int proglen = 0;
1625        u8 *prog = temp;
1626
1627        emit_prologue(&prog, bpf_prog->aux->stack_depth);
1628
1629        for (i = 0; i < insn_cnt; i++, insn++) {
1630                const s32 imm32 = insn->imm;
1631                const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1632                const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
1633                const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
1634                const u8 code = insn->code;
1635                const u8 *dst = bpf2ia32[insn->dst_reg];
1636                const u8 *src = bpf2ia32[insn->src_reg];
1637                const u8 *r0 = bpf2ia32[BPF_REG_0];
1638                s64 jmp_offset;
1639                u8 jmp_cond;
1640                int ilen;
1641                u8 *func;
1642
1643                switch (code) {
1644                /* ALU operations */
1645                /* dst = src */
1646                case BPF_ALU | BPF_MOV | BPF_K:
1647                case BPF_ALU | BPF_MOV | BPF_X:
1648                case BPF_ALU64 | BPF_MOV | BPF_K:
1649                case BPF_ALU64 | BPF_MOV | BPF_X:
1650                        switch (BPF_SRC(code)) {
1651                        case BPF_X:
1652                                emit_ia32_mov_r64(is64, dst, src, dstk,
1653                                                  sstk, &prog);
1654                                break;
1655                        case BPF_K:
1656                                /* Sign-extend immediate value to dst reg */
1657                                emit_ia32_mov_i64(is64, dst, imm32,
1658                                                  dstk, &prog);
1659                                break;
1660                        }
1661                        break;
1662                /* dst = dst + src/imm */
1663                /* dst = dst - src/imm */
1664                /* dst = dst | src/imm */
1665                /* dst = dst & src/imm */
1666                /* dst = dst ^ src/imm */
1667                /* dst = dst * src/imm */
1668                /* dst = dst << src */
1669                /* dst = dst >> src */
1670                case BPF_ALU | BPF_ADD | BPF_K:
1671                case BPF_ALU | BPF_ADD | BPF_X:
1672                case BPF_ALU | BPF_SUB | BPF_K:
1673                case BPF_ALU | BPF_SUB | BPF_X:
1674                case BPF_ALU | BPF_OR | BPF_K:
1675                case BPF_ALU | BPF_OR | BPF_X:
1676                case BPF_ALU | BPF_AND | BPF_K:
1677                case BPF_ALU | BPF_AND | BPF_X:
1678                case BPF_ALU | BPF_XOR | BPF_K:
1679                case BPF_ALU | BPF_XOR | BPF_X:
1680                case BPF_ALU64 | BPF_ADD | BPF_K:
1681                case BPF_ALU64 | BPF_ADD | BPF_X:
1682                case BPF_ALU64 | BPF_SUB | BPF_K:
1683                case BPF_ALU64 | BPF_SUB | BPF_X:
1684                case BPF_ALU64 | BPF_OR | BPF_K:
1685                case BPF_ALU64 | BPF_OR | BPF_X:
1686                case BPF_ALU64 | BPF_AND | BPF_K:
1687                case BPF_ALU64 | BPF_AND | BPF_X:
1688                case BPF_ALU64 | BPF_XOR | BPF_K:
1689                case BPF_ALU64 | BPF_XOR | BPF_X:
1690                        switch (BPF_SRC(code)) {
1691                        case BPF_X:
1692                                emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1693                                                  src, dstk, sstk, &prog);
1694                                break;
1695                        case BPF_K:
1696                                emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1697                                                  imm32, dstk, &prog);
1698                                break;
1699                        }
1700                        break;
1701                case BPF_ALU | BPF_MUL | BPF_K:
1702                case BPF_ALU | BPF_MUL | BPF_X:
1703                        switch (BPF_SRC(code)) {
1704                        case BPF_X:
1705                                emit_ia32_mul_r(dst_lo, src_lo, dstk,
1706                                                sstk, &prog);
1707                                break;
1708                        case BPF_K:
1709                                /* mov ecx,imm32*/
1710                                EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1711                                            imm32);
1712                                emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1713                                                false, &prog);
1714                                break;
1715                        }
1716                        emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1717                        break;
1718                case BPF_ALU | BPF_LSH | BPF_X:
1719                case BPF_ALU | BPF_RSH | BPF_X:
1720                case BPF_ALU | BPF_ARSH | BPF_K:
1721                case BPF_ALU | BPF_ARSH | BPF_X:
1722                        switch (BPF_SRC(code)) {
1723                        case BPF_X:
1724                                emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1725                                                  dstk, sstk, &prog);
1726                                break;
1727                        case BPF_K:
1728                                /* mov ecx,imm32*/
1729                                EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1730                                            imm32);
1731                                emit_ia32_shift_r(BPF_OP(code), dst_lo,
1732                                                  IA32_ECX, dstk, false,
1733                                                  &prog);
1734                                break;
1735                        }
1736                        emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1737                        break;
1738                /* dst = dst / src(imm) */
1739                /* dst = dst % src(imm) */
1740                case BPF_ALU | BPF_DIV | BPF_K:
1741                case BPF_ALU | BPF_DIV | BPF_X:
1742                case BPF_ALU | BPF_MOD | BPF_K:
1743                case BPF_ALU | BPF_MOD | BPF_X:
1744                        switch (BPF_SRC(code)) {
1745                        case BPF_X:
1746                                emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1747                                                    src_lo, dstk, sstk, &prog);
1748                                break;
1749                        case BPF_K:
1750                                /* mov ecx,imm32*/
1751                                EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1752                                            imm32);
1753                                emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1754                                                    IA32_ECX, dstk, false,
1755                                                    &prog);
1756                                break;
1757                        }
1758                        emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1759                        break;
1760                case BPF_ALU64 | BPF_DIV | BPF_K:
1761                case BPF_ALU64 | BPF_DIV | BPF_X:
1762                case BPF_ALU64 | BPF_MOD | BPF_K:
1763                case BPF_ALU64 | BPF_MOD | BPF_X:
1764                        goto notyet;
1765                /* dst = dst >> imm */
1766                /* dst = dst << imm */
1767                case BPF_ALU | BPF_RSH | BPF_K:
1768                case BPF_ALU | BPF_LSH | BPF_K:
1769                        if (unlikely(imm32 > 31))
1770                                return -EINVAL;
1771                        /* mov ecx,imm32*/
1772                        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1773                        emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1774                                          false, &prog);
1775                        emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1776                        break;
1777                /* dst = dst << imm */
1778                case BPF_ALU64 | BPF_LSH | BPF_K:
1779                        if (unlikely(imm32 > 63))
1780                                return -EINVAL;
1781                        emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1782                        break;
1783                /* dst = dst >> imm */
1784                case BPF_ALU64 | BPF_RSH | BPF_K:
1785                        if (unlikely(imm32 > 63))
1786                                return -EINVAL;
1787                        emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1788                        break;
1789                /* dst = dst << src */
1790                case BPF_ALU64 | BPF_LSH | BPF_X:
1791                        emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1792                        break;
1793                /* dst = dst >> src */
1794                case BPF_ALU64 | BPF_RSH | BPF_X:
1795                        emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1796                        break;
1797                /* dst = dst >> src (signed) */
1798                case BPF_ALU64 | BPF_ARSH | BPF_X:
1799                        emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1800                        break;
1801                /* dst = dst >> imm (signed) */
1802                case BPF_ALU64 | BPF_ARSH | BPF_K:
1803                        if (unlikely(imm32 > 63))
1804                                return -EINVAL;
1805                        emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1806                        break;
1807                /* dst = ~dst */
1808                case BPF_ALU | BPF_NEG:
1809                        emit_ia32_alu_i(is64, false, BPF_OP(code),
1810                                        dst_lo, 0, dstk, &prog);
1811                        emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1812                        break;
1813                /* dst = ~dst (64 bit) */
1814                case BPF_ALU64 | BPF_NEG:
1815                        emit_ia32_neg64(dst, dstk, &prog);
1816                        break;
1817                /* dst = dst * src/imm */
1818                case BPF_ALU64 | BPF_MUL | BPF_X:
1819                case BPF_ALU64 | BPF_MUL | BPF_K:
1820                        switch (BPF_SRC(code)) {
1821                        case BPF_X:
1822                                emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1823                                break;
1824                        case BPF_K:
1825                                emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1826                                break;
1827                        }
1828                        break;
1829                /* dst = htole(dst) */
1830                case BPF_ALU | BPF_END | BPF_FROM_LE:
1831                        emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
1832                        break;
1833                /* dst = htobe(dst) */
1834                case BPF_ALU | BPF_END | BPF_FROM_BE:
1835                        emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
1836                        break;
1837                /* dst = imm64 */
1838                case BPF_LD | BPF_IMM | BPF_DW: {
1839                        s32 hi, lo = imm32;
1840
1841                        hi = insn[1].imm;
1842                        emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1843                        emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1844                        insn++;
1845                        i++;
1846                        break;
1847                }
1848                /* ST: *(u8*)(dst_reg + off) = imm */
1849                case BPF_ST | BPF_MEM | BPF_H:
1850                case BPF_ST | BPF_MEM | BPF_B:
1851                case BPF_ST | BPF_MEM | BPF_W:
1852                case BPF_ST | BPF_MEM | BPF_DW:
1853                        if (dstk)
1854                                /* mov eax,dword ptr [ebp+off] */
1855                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1856                                      STACK_VAR(dst_lo));
1857                        else
1858                                /* mov eax,dst_lo */
1859                                EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1860
1861                        switch (BPF_SIZE(code)) {
1862                        case BPF_B:
1863                                EMIT(0xC6, 1); break;
1864                        case BPF_H:
1865                                EMIT2(0x66, 0xC7); break;
1866                        case BPF_W:
1867                        case BPF_DW:
1868                                EMIT(0xC7, 1); break;
1869                        }
1870
1871                        if (is_imm8(insn->off))
1872                                EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1873                        else
1874                                EMIT1_off32(add_1reg(0x80, IA32_EAX),
1875                                            insn->off);
1876                        EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1877
1878                        if (BPF_SIZE(code) == BPF_DW) {
1879                                u32 hi;
1880
1881                                hi = imm32 & (1<<31) ? (u32)~0 : 0;
1882                                EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1883                                            insn->off + 4);
1884                                EMIT(hi, 4);
1885                        }
1886                        break;
1887
1888                /* STX: *(u8*)(dst_reg + off) = src_reg */
1889                case BPF_STX | BPF_MEM | BPF_B:
1890                case BPF_STX | BPF_MEM | BPF_H:
1891                case BPF_STX | BPF_MEM | BPF_W:
1892                case BPF_STX | BPF_MEM | BPF_DW:
1893                        if (dstk)
1894                                /* mov eax,dword ptr [ebp+off] */
1895                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1896                                      STACK_VAR(dst_lo));
1897                        else
1898                                /* mov eax,dst_lo */
1899                                EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1900
1901                        if (sstk)
1902                                /* mov edx,dword ptr [ebp+off] */
1903                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1904                                      STACK_VAR(src_lo));
1905                        else
1906                                /* mov edx,src_lo */
1907                                EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1908
1909                        switch (BPF_SIZE(code)) {
1910                        case BPF_B:
1911                                EMIT(0x88, 1); break;
1912                        case BPF_H:
1913                                EMIT2(0x66, 0x89); break;
1914                        case BPF_W:
1915                        case BPF_DW:
1916                                EMIT(0x89, 1); break;
1917                        }
1918
1919                        if (is_imm8(insn->off))
1920                                EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1921                                      insn->off);
1922                        else
1923                                EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1924                                            insn->off);
1925
1926                        if (BPF_SIZE(code) == BPF_DW) {
1927                                if (sstk)
1928                                        /* mov edi,dword ptr [ebp+off] */
1929                                        EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1930                                                             IA32_EDX),
1931                                              STACK_VAR(src_hi));
1932                                else
1933                                        /* mov edi,src_hi */
1934                                        EMIT2(0x8B, add_2reg(0xC0, src_hi,
1935                                                             IA32_EDX));
1936                                EMIT1(0x89);
1937                                if (is_imm8(insn->off + 4)) {
1938                                        EMIT2(add_2reg(0x40, IA32_EAX,
1939                                                       IA32_EDX),
1940                                              insn->off + 4);
1941                                } else {
1942                                        EMIT1(add_2reg(0x80, IA32_EAX,
1943                                                       IA32_EDX));
1944                                        EMIT(insn->off + 4, 4);
1945                                }
1946                        }
1947                        break;
1948
1949                /* LDX: dst_reg = *(u8*)(src_reg + off) */
1950                case BPF_LDX | BPF_MEM | BPF_B:
1951                case BPF_LDX | BPF_MEM | BPF_H:
1952                case BPF_LDX | BPF_MEM | BPF_W:
1953                case BPF_LDX | BPF_MEM | BPF_DW:
1954                        if (sstk)
1955                                /* mov eax,dword ptr [ebp+off] */
1956                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1957                                      STACK_VAR(src_lo));
1958                        else
1959                                /* mov eax,dword ptr [ebp+off] */
1960                                EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1961
1962                        switch (BPF_SIZE(code)) {
1963                        case BPF_B:
1964                                EMIT2(0x0F, 0xB6); break;
1965                        case BPF_H:
1966                                EMIT2(0x0F, 0xB7); break;
1967                        case BPF_W:
1968                        case BPF_DW:
1969                                EMIT(0x8B, 1); break;
1970                        }
1971
1972                        if (is_imm8(insn->off))
1973                                EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1974                                      insn->off);
1975                        else
1976                                EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1977                                            insn->off);
1978
1979                        if (dstk)
1980                                /* mov dword ptr [ebp+off],edx */
1981                                EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1982                                      STACK_VAR(dst_lo));
1983                        else
1984                                /* mov dst_lo,edx */
1985                                EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1986                        switch (BPF_SIZE(code)) {
1987                        case BPF_B:
1988                        case BPF_H:
1989                        case BPF_W:
1990                                if (dstk) {
1991                                        EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1992                                              STACK_VAR(dst_hi));
1993                                        EMIT(0x0, 4);
1994                                } else {
1995                                        EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
1996                                }
1997                                break;
1998                        case BPF_DW:
1999                                EMIT2_off32(0x8B,
2000                                            add_2reg(0x80, IA32_EAX, IA32_EDX),
2001                                            insn->off + 4);
2002                                if (dstk)
2003                                        EMIT3(0x89,
2004                                              add_2reg(0x40, IA32_EBP,
2005                                                       IA32_EDX),
2006                                              STACK_VAR(dst_hi));
2007                                else
2008                                        EMIT2(0x89,
2009                                              add_2reg(0xC0, dst_hi, IA32_EDX));
2010                                break;
2011                        default:
2012                                break;
2013                        }
2014                        break;
2015                /* call */
2016                case BPF_JMP | BPF_CALL:
2017                {
2018                        const u8 *r1 = bpf2ia32[BPF_REG_1];
2019                        const u8 *r2 = bpf2ia32[BPF_REG_2];
2020                        const u8 *r3 = bpf2ia32[BPF_REG_3];
2021                        const u8 *r4 = bpf2ia32[BPF_REG_4];
2022                        const u8 *r5 = bpf2ia32[BPF_REG_5];
2023
2024                        if (insn->src_reg == BPF_PSEUDO_CALL)
2025                                goto notyet;
2026
2027                        func = (u8 *) __bpf_call_base + imm32;
2028                        jmp_offset = func - (image + addrs[i]);
2029
2030                        if (!imm32 || !is_simm32(jmp_offset)) {
2031                                pr_err("unsupported BPF func %d addr %p image %p\n",
2032                                       imm32, func, image);
2033                                return -EINVAL;
2034                        }
2035
2036                        /* mov eax,dword ptr [ebp+off] */
2037                        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2038                              STACK_VAR(r1[0]));
2039                        /* mov edx,dword ptr [ebp+off] */
2040                        EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2041                              STACK_VAR(r1[1]));
2042
2043                        emit_push_r64(r5, &prog);
2044                        emit_push_r64(r4, &prog);
2045                        emit_push_r64(r3, &prog);
2046                        emit_push_r64(r2, &prog);
2047
2048                        EMIT1_off32(0xE8, jmp_offset + 9);
2049
2050                        /* mov dword ptr [ebp+off],eax */
2051                        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2052                              STACK_VAR(r0[0]));
2053                        /* mov dword ptr [ebp+off],edx */
2054                        EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2055                              STACK_VAR(r0[1]));
2056
2057                        /* add esp,32 */
2058                        EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2059                        break;
2060                }
2061                case BPF_JMP | BPF_TAIL_CALL:
2062                        emit_bpf_tail_call(&prog);
2063                        break;
2064
2065                /* cond jump */
2066                case BPF_JMP | BPF_JEQ | BPF_X:
2067                case BPF_JMP | BPF_JNE | BPF_X:
2068                case BPF_JMP | BPF_JGT | BPF_X:
2069                case BPF_JMP | BPF_JLT | BPF_X:
2070                case BPF_JMP | BPF_JGE | BPF_X:
2071                case BPF_JMP | BPF_JLE | BPF_X:
2072                case BPF_JMP | BPF_JSGT | BPF_X:
2073                case BPF_JMP | BPF_JSLE | BPF_X:
2074                case BPF_JMP | BPF_JSLT | BPF_X:
2075                case BPF_JMP | BPF_JSGE | BPF_X: {
2076                        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2077                        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2078                        u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2079                        u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2080
2081                        if (dstk) {
2082                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2083                                      STACK_VAR(dst_lo));
2084                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2085                                      STACK_VAR(dst_hi));
2086                        }
2087
2088                        if (sstk) {
2089                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2090                                      STACK_VAR(src_lo));
2091                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2092                                      STACK_VAR(src_hi));
2093                        }
2094
2095                        /* cmp dreg_hi,sreg_hi */
2096                        EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2097                        EMIT2(IA32_JNE, 2);
2098                        /* cmp dreg_lo,sreg_lo */
2099                        EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2100                        goto emit_cond_jmp;
2101                }
2102                case BPF_JMP | BPF_JSET | BPF_X: {
2103                        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2104                        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2105                        u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2106                        u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2107
2108                        if (dstk) {
2109                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2110                                      STACK_VAR(dst_lo));
2111                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2112                                      STACK_VAR(dst_hi));
2113                        }
2114
2115                        if (sstk) {
2116                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2117                                      STACK_VAR(src_lo));
2118                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2119                                      STACK_VAR(src_hi));
2120                        }
2121                        /* and dreg_lo,sreg_lo */
2122                        EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2123                        /* and dreg_hi,sreg_hi */
2124                        EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2125                        /* or dreg_lo,dreg_hi */
2126                        EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2127                        goto emit_cond_jmp;
2128                }
2129                case BPF_JMP | BPF_JSET | BPF_K: {
2130                        u32 hi;
2131                        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2132                        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2133                        u8 sreg_lo = IA32_ECX;
2134                        u8 sreg_hi = IA32_EBX;
2135
2136                        if (dstk) {
2137                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2138                                      STACK_VAR(dst_lo));
2139                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2140                                      STACK_VAR(dst_hi));
2141                        }
2142                        hi = imm32 & (1<<31) ? (u32)~0 : 0;
2143
2144                        /* mov ecx,imm32 */
2145                        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2146                        /* mov ebx,imm32 */
2147                        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2148
2149                        /* and dreg_lo,sreg_lo */
2150                        EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2151                        /* and dreg_hi,sreg_hi */
2152                        EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2153                        /* or dreg_lo,dreg_hi */
2154                        EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2155                        goto emit_cond_jmp;
2156                }
2157                case BPF_JMP | BPF_JEQ | BPF_K:
2158                case BPF_JMP | BPF_JNE | BPF_K:
2159                case BPF_JMP | BPF_JGT | BPF_K:
2160                case BPF_JMP | BPF_JLT | BPF_K:
2161                case BPF_JMP | BPF_JGE | BPF_K:
2162                case BPF_JMP | BPF_JLE | BPF_K:
2163                case BPF_JMP | BPF_JSGT | BPF_K:
2164                case BPF_JMP | BPF_JSLE | BPF_K:
2165                case BPF_JMP | BPF_JSLT | BPF_K:
2166                case BPF_JMP | BPF_JSGE | BPF_K: {
2167                        u32 hi;
2168                        u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2169                        u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2170                        u8 sreg_lo = IA32_ECX;
2171                        u8 sreg_hi = IA32_EBX;
2172
2173                        if (dstk) {
2174                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2175                                      STACK_VAR(dst_lo));
2176                                EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2177                                      STACK_VAR(dst_hi));
2178                        }
2179
2180                        hi = imm32 & (1<<31) ? (u32)~0 : 0;
2181                        /* mov ecx,imm32 */
2182                        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2183                        /* mov ebx,imm32 */
2184                        EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2185
2186                        /* cmp dreg_hi,sreg_hi */
2187                        EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2188                        EMIT2(IA32_JNE, 2);
2189                        /* cmp dreg_lo,sreg_lo */
2190                        EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2191
2192emit_cond_jmp:          /* Convert BPF opcode to x86 */
2193                        switch (BPF_OP(code)) {
2194                        case BPF_JEQ:
2195                                jmp_cond = IA32_JE;
2196                                break;
2197                        case BPF_JSET:
2198                        case BPF_JNE:
2199                                jmp_cond = IA32_JNE;
2200                                break;
2201                        case BPF_JGT:
2202                                /* GT is unsigned '>', JA in x86 */
2203                                jmp_cond = IA32_JA;
2204                                break;
2205                        case BPF_JLT:
2206                                /* LT is unsigned '<', JB in x86 */
2207                                jmp_cond = IA32_JB;
2208                                break;
2209                        case BPF_JGE:
2210                                /* GE is unsigned '>=', JAE in x86 */
2211                                jmp_cond = IA32_JAE;
2212                                break;
2213                        case BPF_JLE:
2214                                /* LE is unsigned '<=', JBE in x86 */
2215                                jmp_cond = IA32_JBE;
2216                                break;
2217                        case BPF_JSGT:
2218                                /* Signed '>', GT in x86 */
2219                                jmp_cond = IA32_JG;
2220                                break;
2221                        case BPF_JSLT:
2222                                /* Signed '<', LT in x86 */
2223                                jmp_cond = IA32_JL;
2224                                break;
2225                        case BPF_JSGE:
2226                                /* Signed '>=', GE in x86 */
2227                                jmp_cond = IA32_JGE;
2228                                break;
2229                        case BPF_JSLE:
2230                                /* Signed '<=', LE in x86 */
2231                                jmp_cond = IA32_JLE;
2232                                break;
2233                        default: /* to silence GCC warning */
2234                                return -EFAULT;
2235                        }
2236                        jmp_offset = addrs[i + insn->off] - addrs[i];
2237                        if (is_imm8(jmp_offset)) {
2238                                EMIT2(jmp_cond, jmp_offset);
2239                        } else if (is_simm32(jmp_offset)) {
2240                                EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2241                        } else {
2242                                pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2243                                return -EFAULT;
2244                        }
2245
2246                        break;
2247                }
2248                case BPF_JMP | BPF_JA:
2249                        if (insn->off == -1)
2250                                /* -1 jmp instructions will always jump
2251                                 * backwards two bytes. Explicitly handling
2252                                 * this case avoids wasting too many passes
2253                                 * when there are long sequences of replaced
2254                                 * dead code.
2255                                 */
2256                                jmp_offset = -2;
2257                        else
2258                                jmp_offset = addrs[i + insn->off] - addrs[i];
2259
2260                        if (!jmp_offset)
2261                                /* Optimize out nop jumps */
2262                                break;
2263emit_jmp:
2264                        if (is_imm8(jmp_offset)) {
2265                                EMIT2(0xEB, jmp_offset);
2266                        } else if (is_simm32(jmp_offset)) {
2267                                EMIT1_off32(0xE9, jmp_offset);
2268                        } else {
2269                                pr_err("jmp gen bug %llx\n", jmp_offset);
2270                                return -EFAULT;
2271                        }
2272                        break;
2273                /* STX XADD: lock *(u32 *)(dst + off) += src */
2274                case BPF_STX | BPF_XADD | BPF_W:
2275                /* STX XADD: lock *(u64 *)(dst + off) += src */
2276                case BPF_STX | BPF_XADD | BPF_DW:
2277                        goto notyet;
2278                case BPF_JMP | BPF_EXIT:
2279                        if (seen_exit) {
2280                                jmp_offset = ctx->cleanup_addr - addrs[i];
2281                                goto emit_jmp;
2282                        }
2283                        seen_exit = true;
2284                        /* Update cleanup_addr */
2285                        ctx->cleanup_addr = proglen;
2286                        emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2287                        break;
2288notyet:
2289                        pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2290                        return -EFAULT;
2291                default:
2292                        /*
2293                         * This error will be seen if new instruction was added
2294                         * to interpreter, but not to JIT or if there is junk in
2295                         * bpf_prog
2296                         */
2297                        pr_err("bpf_jit: unknown opcode %02x\n", code);
2298                        return -EINVAL;
2299                }
2300
2301                ilen = prog - temp;
2302                if (ilen > BPF_MAX_INSN_SIZE) {
2303                        pr_err("bpf_jit: fatal insn size error\n");
2304                        return -EFAULT;
2305                }
2306
2307                if (image) {
2308                        if (unlikely(proglen + ilen > oldproglen)) {
2309                                pr_err("bpf_jit: fatal error\n");
2310                                return -EFAULT;
2311                        }
2312                        memcpy(image + proglen, temp, ilen);
2313                }
2314                proglen += ilen;
2315                addrs[i] = proglen;
2316                prog = temp;
2317        }
2318        return proglen;
2319}
2320
2321struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2322{
2323        struct bpf_binary_header *header = NULL;
2324        struct bpf_prog *tmp, *orig_prog = prog;
2325        int proglen, oldproglen = 0;
2326        struct jit_context ctx = {};
2327        bool tmp_blinded = false;
2328        u8 *image = NULL;
2329        int *addrs;
2330        int pass;
2331        int i;
2332
2333        if (!prog->jit_requested)
2334                return orig_prog;
2335
2336        tmp = bpf_jit_blind_constants(prog);
2337        /*
2338         * If blinding was requested and we failed during blinding,
2339         * we must fall back to the interpreter.
2340         */
2341        if (IS_ERR(tmp))
2342                return orig_prog;
2343        if (tmp != prog) {
2344                tmp_blinded = true;
2345                prog = tmp;
2346        }
2347
2348        addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2349        if (!addrs) {
2350                prog = orig_prog;
2351                goto out;
2352        }
2353
2354        /*
2355         * Before first pass, make a rough estimation of addrs[]
2356         * each BPF instruction is translated to less than 64 bytes
2357         */
2358        for (proglen = 0, i = 0; i < prog->len; i++) {
2359                proglen += 64;
2360                addrs[i] = proglen;
2361        }
2362        ctx.cleanup_addr = proglen;
2363
2364        /*
2365         * JITed image shrinks with every pass and the loop iterates
2366         * until the image stops shrinking. Very large BPF programs
2367         * may converge on the last pass. In such case do one more
2368         * pass to emit the final image.
2369         */
2370        for (pass = 0; pass < 20 || image; pass++) {
2371                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2372                if (proglen <= 0) {
2373out_image:
2374                        image = NULL;
2375                        if (header)
2376                                bpf_jit_binary_free(header);
2377                        prog = orig_prog;
2378                        goto out_addrs;
2379                }
2380                if (image) {
2381                        if (proglen != oldproglen) {
2382                                pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2383                                       proglen, oldproglen);
2384                                goto out_image;
2385                        }
2386                        break;
2387                }
2388                if (proglen == oldproglen) {
2389                        header = bpf_jit_binary_alloc(proglen, &image,
2390                                                      1, jit_fill_hole);
2391                        if (!header) {
2392                                prog = orig_prog;
2393                                goto out_addrs;
2394                        }
2395                }
2396                oldproglen = proglen;
2397                cond_resched();
2398        }
2399
2400        if (bpf_jit_enable > 1)
2401                bpf_jit_dump(prog->len, proglen, pass + 1, image);
2402
2403        if (image) {
2404                bpf_jit_binary_lock_ro(header);
2405                prog->bpf_func = (void *)image;
2406                prog->jited = 1;
2407                prog->jited_len = proglen;
2408        } else {
2409                prog = orig_prog;
2410        }
2411
2412out_addrs:
2413        kfree(addrs);
2414out:
2415        if (tmp_blinded)
2416                bpf_jit_prog_release_other(prog, prog == orig_prog ?
2417                                           tmp : orig_prog);
2418        return prog;
2419}
2420