linux/arch/mips/net/bpf_jit.c
<<
>>
Prefs
   1/*
   2 * Just-In-Time compiler for BPF filters on MIPS
   3 *
   4 * Copyright (c) 2014 Imagination Technologies Ltd.
   5 * Author: Markos Chandras <markos.chandras@imgtec.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License as published by the
   9 * Free Software Foundation; version 2 of the License.
  10 */
  11
  12#include <linux/bitops.h>
  13#include <linux/compiler.h>
  14#include <linux/errno.h>
  15#include <linux/filter.h>
  16#include <linux/if_vlan.h>
  17#include <linux/kconfig.h>
  18#include <linux/moduleloader.h>
  19#include <linux/netdevice.h>
  20#include <linux/string.h>
  21#include <linux/slab.h>
  22#include <linux/types.h>
  23#include <asm/bitops.h>
  24#include <asm/cacheflush.h>
  25#include <asm/cpu-features.h>
  26#include <asm/uasm.h>
  27
  28#include "bpf_jit.h"
  29
  30/* ABI
  31 *
  32 * s0   1st scratch register
  33 * s1   2nd scratch register
  34 * s2   offset register
  35 * s3   BPF register A
  36 * s4   BPF register X
  37 * s5   *skb
  38 * s6   *scratch memory
  39 *
  40 * On entry (*bpf_func)(*skb, *filter)
  41 * a0 = MIPS_R_A0 = skb;
  42 * a1 = MIPS_R_A1 = filter;
  43 *
  44 * Stack
  45 * ...
  46 * M[15]
  47 * M[14]
  48 * M[13]
  49 * ...
  50 * M[0] <-- r_M
  51 * saved reg k-1
  52 * saved reg k-2
  53 * ...
  54 * saved reg 0 <-- r_sp
  55 * <no argument area>
  56 *
  57 *                     Packet layout
  58 *
  59 * <--------------------- len ------------------------>
  60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
  61 * ----------------------------------------------------
  62 * |                  skb->data                       |
  63 * ----------------------------------------------------
  64 */
  65
  66#define RSIZE   (sizeof(unsigned long))
  67#define ptr typeof(unsigned long)
  68
  69/* ABI specific return values */
  70#ifdef CONFIG_32BIT /* O32 */
  71#ifdef CONFIG_CPU_LITTLE_ENDIAN
  72#define r_err   MIPS_R_V1
  73#define r_val   MIPS_R_V0
  74#else /* CONFIG_CPU_LITTLE_ENDIAN */
  75#define r_err   MIPS_R_V0
  76#define r_val   MIPS_R_V1
  77#endif
  78#else /* N64 */
  79#define r_err   MIPS_R_V0
  80#define r_val   MIPS_R_V0
  81#endif
  82
  83#define r_ret   MIPS_R_V0
  84
  85/*
  86 * Use 2 scratch registers to avoid pipeline interlocks.
  87 * There is no overhead during epilogue and prologue since
  88 * any of the $s0-$s6 registers will only be preserved if
  89 * they are going to actually be used.
  90 */
  91#define r_s0            MIPS_R_S0 /* scratch reg 1 */
  92#define r_s1            MIPS_R_S1 /* scratch reg 2 */
  93#define r_off           MIPS_R_S2
  94#define r_A             MIPS_R_S3
  95#define r_X             MIPS_R_S4
  96#define r_skb           MIPS_R_S5
  97#define r_M             MIPS_R_S6
  98#define r_tmp_imm       MIPS_R_T6 /* No need to preserve this */
  99#define r_tmp           MIPS_R_T7 /* No need to preserve this */
 100#define r_zero          MIPS_R_ZERO
 101#define r_sp            MIPS_R_SP
 102#define r_ra            MIPS_R_RA
 103
 104#define SCRATCH_OFF(k)          (4 * (k))
 105
 106/* JIT flags */
 107#define SEEN_CALL               (1 << BPF_MEMWORDS)
 108#define SEEN_SREG_SFT           (BPF_MEMWORDS + 1)
 109#define SEEN_SREG_BASE          (1 << SEEN_SREG_SFT)
 110#define SEEN_SREG(x)            (SEEN_SREG_BASE << (x))
 111#define SEEN_S0                 SEEN_SREG(0)
 112#define SEEN_S1                 SEEN_SREG(1)
 113#define SEEN_OFF                SEEN_SREG(2)
 114#define SEEN_A                  SEEN_SREG(3)
 115#define SEEN_X                  SEEN_SREG(4)
 116#define SEEN_SKB                SEEN_SREG(5)
 117#define SEEN_MEM                SEEN_SREG(6)
 118
 119/* Arguments used by JIT */
 120#define ARGS_USED_BY_JIT        2 /* only applicable to 64-bit */
 121
 122#define SBIT(x)                 (1 << (x)) /* Signed version of BIT() */
 123
 124/**
 125 * struct jit_ctx - JIT context
 126 * @skf:                The sk_filter
 127 * @prologue_bytes:     Number of bytes for prologue
 128 * @idx:                Instruction index
 129 * @flags:              JIT flags
 130 * @offsets:            Instruction offsets
 131 * @target:             Memory location for the compiled filter
 132 */
 133struct jit_ctx {
 134        const struct bpf_prog *skf;
 135        unsigned int prologue_bytes;
 136        u32 idx;
 137        u32 flags;
 138        u32 *offsets;
 139        u32 *target;
 140};
 141
 142
 143static inline int optimize_div(u32 *k)
 144{
 145        /* power of 2 divides can be implemented with right shift */
 146        if (!(*k & (*k-1))) {
 147                *k = ilog2(*k);
 148                return 1;
 149        }
 150
 151        return 0;
 152}
 153
 154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
 155
 156/* Simply emit the instruction if the JIT memory space has been allocated */
 157#define emit_instr(ctx, func, ...)                      \
 158do {                                                    \
 159        if ((ctx)->target != NULL) {                    \
 160                u32 *p = &(ctx)->target[ctx->idx];      \
 161                uasm_i_##func(&p, ##__VA_ARGS__);       \
 162        }                                               \
 163        (ctx)->idx++;                                   \
 164} while (0)
 165
 166/*
 167 * Similar to emit_instr but it must be used when we need to emit
 168 * 32-bit or 64-bit instructions
 169 */
 170#define emit_long_instr(ctx, func, ...)                 \
 171do {                                                    \
 172        if ((ctx)->target != NULL) {                    \
 173                u32 *p = &(ctx)->target[ctx->idx];      \
 174                UASM_i_##func(&p, ##__VA_ARGS__);       \
 175        }                                               \
 176        (ctx)->idx++;                                   \
 177} while (0)
 178
 179/* Determine if immediate is within the 16-bit signed range */
 180static inline bool is_range16(s32 imm)
 181{
 182        return !(imm >= SBIT(15) || imm < -SBIT(15));
 183}
 184
 185static inline void emit_addu(unsigned int dst, unsigned int src1,
 186                             unsigned int src2, struct jit_ctx *ctx)
 187{
 188        emit_instr(ctx, addu, dst, src1, src2);
 189}
 190
 191static inline void emit_nop(struct jit_ctx *ctx)
 192{
 193        emit_instr(ctx, nop);
 194}
 195
 196/* Load a u32 immediate to a register */
 197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
 198{
 199        if (ctx->target != NULL) {
 200                /* addiu can only handle s16 */
 201                if (!is_range16(imm)) {
 202                        u32 *p = &ctx->target[ctx->idx];
 203                        uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
 204                        p = &ctx->target[ctx->idx + 1];
 205                        uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
 206                } else {
 207                        u32 *p = &ctx->target[ctx->idx];
 208                        uasm_i_addiu(&p, dst, r_zero, imm);
 209                }
 210        }
 211        ctx->idx++;
 212
 213        if (!is_range16(imm))
 214                ctx->idx++;
 215}
 216
 217static inline void emit_or(unsigned int dst, unsigned int src1,
 218                           unsigned int src2, struct jit_ctx *ctx)
 219{
 220        emit_instr(ctx, or, dst, src1, src2);
 221}
 222
 223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
 224                            struct jit_ctx *ctx)
 225{
 226        if (imm >= BIT(16)) {
 227                emit_load_imm(r_tmp, imm, ctx);
 228                emit_or(dst, src, r_tmp, ctx);
 229        } else {
 230                emit_instr(ctx, ori, dst, src, imm);
 231        }
 232}
 233
 234static inline void emit_daddiu(unsigned int dst, unsigned int src,
 235                               int imm, struct jit_ctx *ctx)
 236{
 237        /*
 238         * Only used for stack, so the imm is relatively small
 239         * and it fits in 15-bits
 240         */
 241        emit_instr(ctx, daddiu, dst, src, imm);
 242}
 243
 244static inline void emit_addiu(unsigned int dst, unsigned int src,
 245                              u32 imm, struct jit_ctx *ctx)
 246{
 247        if (!is_range16(imm)) {
 248                emit_load_imm(r_tmp, imm, ctx);
 249                emit_addu(dst, r_tmp, src, ctx);
 250        } else {
 251                emit_instr(ctx, addiu, dst, src, imm);
 252        }
 253}
 254
 255static inline void emit_and(unsigned int dst, unsigned int src1,
 256                            unsigned int src2, struct jit_ctx *ctx)
 257{
 258        emit_instr(ctx, and, dst, src1, src2);
 259}
 260
 261static inline void emit_andi(unsigned int dst, unsigned int src,
 262                             u32 imm, struct jit_ctx *ctx)
 263{
 264        /* If imm does not fit in u16 then load it to register */
 265        if (imm >= BIT(16)) {
 266                emit_load_imm(r_tmp, imm, ctx);
 267                emit_and(dst, src, r_tmp, ctx);
 268        } else {
 269                emit_instr(ctx, andi, dst, src, imm);
 270        }
 271}
 272
 273static inline void emit_xor(unsigned int dst, unsigned int src1,
 274                            unsigned int src2, struct jit_ctx *ctx)
 275{
 276        emit_instr(ctx, xor, dst, src1, src2);
 277}
 278
 279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
 280{
 281        /* If imm does not fit in u16 then load it to register */
 282        if (imm >= BIT(16)) {
 283                emit_load_imm(r_tmp, imm, ctx);
 284                emit_xor(dst, src, r_tmp, ctx);
 285        } else {
 286                emit_instr(ctx, xori, dst, src, imm);
 287        }
 288}
 289
 290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
 291{
 292        emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
 293}
 294
 295static inline void emit_subu(unsigned int dst, unsigned int src1,
 296                             unsigned int src2, struct jit_ctx *ctx)
 297{
 298        emit_instr(ctx, subu, dst, src1, src2);
 299}
 300
 301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
 302{
 303        emit_subu(reg, r_zero, reg, ctx);
 304}
 305
 306static inline void emit_sllv(unsigned int dst, unsigned int src,
 307                             unsigned int sa, struct jit_ctx *ctx)
 308{
 309        emit_instr(ctx, sllv, dst, src, sa);
 310}
 311
 312static inline void emit_sll(unsigned int dst, unsigned int src,
 313                            unsigned int sa, struct jit_ctx *ctx)
 314{
 315        /* sa is 5-bits long */
 316        if (sa >= BIT(5))
 317                /* Shifting >= 32 results in zero */
 318                emit_jit_reg_move(dst, r_zero, ctx);
 319        else
 320                emit_instr(ctx, sll, dst, src, sa);
 321}
 322
 323static inline void emit_srlv(unsigned int dst, unsigned int src,
 324                             unsigned int sa, struct jit_ctx *ctx)
 325{
 326        emit_instr(ctx, srlv, dst, src, sa);
 327}
 328
 329static inline void emit_srl(unsigned int dst, unsigned int src,
 330                            unsigned int sa, struct jit_ctx *ctx)
 331{
 332        /* sa is 5-bits long */
 333        if (sa >= BIT(5))
 334                /* Shifting >= 32 results in zero */
 335                emit_jit_reg_move(dst, r_zero, ctx);
 336        else
 337                emit_instr(ctx, srl, dst, src, sa);
 338}
 339
 340static inline void emit_slt(unsigned int dst, unsigned int src1,
 341                            unsigned int src2, struct jit_ctx *ctx)
 342{
 343        emit_instr(ctx, slt, dst, src1, src2);
 344}
 345
 346static inline void emit_sltu(unsigned int dst, unsigned int src1,
 347                             unsigned int src2, struct jit_ctx *ctx)
 348{
 349        emit_instr(ctx, sltu, dst, src1, src2);
 350}
 351
 352static inline void emit_sltiu(unsigned dst, unsigned int src,
 353                              unsigned int imm, struct jit_ctx *ctx)
 354{
 355        /* 16 bit immediate */
 356        if (!is_range16((s32)imm)) {
 357                emit_load_imm(r_tmp, imm, ctx);
 358                emit_sltu(dst, src, r_tmp, ctx);
 359        } else {
 360                emit_instr(ctx, sltiu, dst, src, imm);
 361        }
 362
 363}
 364
 365/* Store register on the stack */
 366static inline void emit_store_stack_reg(ptr reg, ptr base,
 367                                        unsigned int offset,
 368                                        struct jit_ctx *ctx)
 369{
 370        emit_long_instr(ctx, SW, reg, offset, base);
 371}
 372
 373static inline void emit_store(ptr reg, ptr base, unsigned int offset,
 374                              struct jit_ctx *ctx)
 375{
 376        emit_instr(ctx, sw, reg, offset, base);
 377}
 378
 379static inline void emit_load_stack_reg(ptr reg, ptr base,
 380                                       unsigned int offset,
 381                                       struct jit_ctx *ctx)
 382{
 383        emit_long_instr(ctx, LW, reg, offset, base);
 384}
 385
 386static inline void emit_load(unsigned int reg, unsigned int base,
 387                             unsigned int offset, struct jit_ctx *ctx)
 388{
 389        emit_instr(ctx, lw, reg, offset, base);
 390}
 391
 392static inline void emit_load_byte(unsigned int reg, unsigned int base,
 393                                  unsigned int offset, struct jit_ctx *ctx)
 394{
 395        emit_instr(ctx, lb, reg, offset, base);
 396}
 397
 398static inline void emit_half_load(unsigned int reg, unsigned int base,
 399                                  unsigned int offset, struct jit_ctx *ctx)
 400{
 401        emit_instr(ctx, lh, reg, offset, base);
 402}
 403
 404static inline void emit_mul(unsigned int dst, unsigned int src1,
 405                            unsigned int src2, struct jit_ctx *ctx)
 406{
 407        emit_instr(ctx, mul, dst, src1, src2);
 408}
 409
 410static inline void emit_div(unsigned int dst, unsigned int src,
 411                            struct jit_ctx *ctx)
 412{
 413        if (ctx->target != NULL) {
 414                u32 *p = &ctx->target[ctx->idx];
 415                uasm_i_divu(&p, dst, src);
 416                p = &ctx->target[ctx->idx + 1];
 417                uasm_i_mflo(&p, dst);
 418        }
 419        ctx->idx += 2; /* 2 insts */
 420}
 421
 422static inline void emit_mod(unsigned int dst, unsigned int src,
 423                            struct jit_ctx *ctx)
 424{
 425        if (ctx->target != NULL) {
 426                u32 *p = &ctx->target[ctx->idx];
 427                uasm_i_divu(&p, dst, src);
 428                p = &ctx->target[ctx->idx + 1];
 429                uasm_i_mflo(&p, dst);
 430        }
 431        ctx->idx += 2; /* 2 insts */
 432}
 433
 434static inline void emit_dsll(unsigned int dst, unsigned int src,
 435                             unsigned int sa, struct jit_ctx *ctx)
 436{
 437        emit_instr(ctx, dsll, dst, src, sa);
 438}
 439
 440static inline void emit_dsrl32(unsigned int dst, unsigned int src,
 441                               unsigned int sa, struct jit_ctx *ctx)
 442{
 443        emit_instr(ctx, dsrl32, dst, src, sa);
 444}
 445
 446static inline void emit_wsbh(unsigned int dst, unsigned int src,
 447                             struct jit_ctx *ctx)
 448{
 449        emit_instr(ctx, wsbh, dst, src);
 450}
 451
 452/* load pointer to register */
 453static inline void emit_load_ptr(unsigned int dst, unsigned int src,
 454                                     int imm, struct jit_ctx *ctx)
 455{
 456        /* src contains the base addr of the 32/64-pointer */
 457        emit_long_instr(ctx, LW, dst, imm, src);
 458}
 459
 460/* load a function pointer to register */
 461static inline void emit_load_func(unsigned int reg, ptr imm,
 462                                  struct jit_ctx *ctx)
 463{
 464        if (config_enabled(CONFIG_64BIT)) {
 465                /* At this point imm is always 64-bit */
 466                emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
 467                emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
 468                emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
 469                emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
 470                emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
 471        } else {
 472                emit_load_imm(reg, imm, ctx);
 473        }
 474}
 475
 476/* Move to real MIPS register */
 477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
 478{
 479        emit_long_instr(ctx, ADDU, dst, src, r_zero);
 480}
 481
 482/* Move to JIT (32-bit) register */
 483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
 484{
 485        emit_addu(dst, src, r_zero, ctx);
 486}
 487
 488/* Compute the immediate value for PC-relative branches. */
 489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
 490{
 491        if (ctx->target == NULL)
 492                return 0;
 493
 494        /*
 495         * We want a pc-relative branch. We only do forward branches
 496         * so tgt is always after pc. tgt is the instruction offset
 497         * we want to jump to.
 498
 499         * Branch on MIPS:
 500         * I: target_offset <- sign_extend(offset)
 501         * I+1: PC += target_offset (delay slot)
 502         *
 503         * ctx->idx currently points to the branch instruction
 504         * but the offset is added to the delay slot so we need
 505         * to subtract 4.
 506         */
 507        return ctx->offsets[tgt] -
 508                (ctx->idx * 4 - ctx->prologue_bytes) - 4;
 509}
 510
 511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
 512                             unsigned int imm, struct jit_ctx *ctx)
 513{
 514        if (ctx->target != NULL) {
 515                u32 *p = &ctx->target[ctx->idx];
 516
 517                switch (cond) {
 518                case MIPS_COND_EQ:
 519                        uasm_i_beq(&p, reg1, reg2, imm);
 520                        break;
 521                case MIPS_COND_NE:
 522                        uasm_i_bne(&p, reg1, reg2, imm);
 523                        break;
 524                case MIPS_COND_ALL:
 525                        uasm_i_b(&p, imm);
 526                        break;
 527                default:
 528                        pr_warn("%s: Unhandled branch conditional: %d\n",
 529                                __func__, cond);
 530                }
 531        }
 532        ctx->idx++;
 533}
 534
 535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
 536{
 537        emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
 538}
 539
 540static inline void emit_jalr(unsigned int link, unsigned int reg,
 541                             struct jit_ctx *ctx)
 542{
 543        emit_instr(ctx, jalr, link, reg);
 544}
 545
 546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
 547{
 548        emit_instr(ctx, jr, reg);
 549}
 550
 551static inline u16 align_sp(unsigned int num)
 552{
 553        /* Double word alignment for 32-bit, quadword for 64-bit */
 554        unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
 555        num = (num + (align - 1)) & -align;
 556        return num;
 557}
 558
 559static bool is_load_to_a(u16 inst)
 560{
 561        switch (inst) {
 562        case BPF_LD | BPF_W | BPF_LEN:
 563        case BPF_LD | BPF_W | BPF_ABS:
 564        case BPF_LD | BPF_H | BPF_ABS:
 565        case BPF_LD | BPF_B | BPF_ABS:
 566                return true;
 567        default:
 568                return false;
 569        }
 570}
 571
 572static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 573{
 574        int i = 0, real_off = 0;
 575        u32 sflags, tmp_flags;
 576
 577        /* Adjust the stack pointer */
 578        emit_stack_offset(-align_sp(offset), ctx);
 579
 580        if (ctx->flags & SEEN_CALL) {
 581                /* Argument save area */
 582                if (config_enabled(CONFIG_64BIT))
 583                        /* Bottom of current frame */
 584                        real_off = align_sp(offset) - RSIZE;
 585                else
 586                        /* Top of previous frame */
 587                        real_off = align_sp(offset) + RSIZE;
 588                emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
 589                emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
 590
 591                real_off = 0;
 592        }
 593
 594        tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 595        /* sflags is essentially a bitmap */
 596        while (tmp_flags) {
 597                if ((sflags >> i) & 0x1) {
 598                        emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
 599                                             ctx);
 600                        real_off += RSIZE;
 601                }
 602                i++;
 603                tmp_flags >>= 1;
 604        }
 605
 606        /* save return address */
 607        if (ctx->flags & SEEN_CALL) {
 608                emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
 609                real_off += RSIZE;
 610        }
 611
 612        /* Setup r_M leaving the alignment gap if necessary */
 613        if (ctx->flags & SEEN_MEM) {
 614                if (real_off % (RSIZE * 2))
 615                        real_off += RSIZE;
 616                emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
 617        }
 618}
 619
 620static void restore_bpf_jit_regs(struct jit_ctx *ctx,
 621                                 unsigned int offset)
 622{
 623        int i, real_off = 0;
 624        u32 sflags, tmp_flags;
 625
 626        if (ctx->flags & SEEN_CALL) {
 627                if (config_enabled(CONFIG_64BIT))
 628                        /* Bottom of current frame */
 629                        real_off = align_sp(offset) - RSIZE;
 630                else
 631                        /* Top of previous frame */
 632                        real_off = align_sp(offset) + RSIZE;
 633                emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
 634                emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
 635
 636                real_off = 0;
 637        }
 638
 639        tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 640        /* sflags is a bitmap */
 641        i = 0;
 642        while (tmp_flags) {
 643                if ((sflags >> i) & 0x1) {
 644                        emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
 645                                            ctx);
 646                        real_off += RSIZE;
 647                }
 648                i++;
 649                tmp_flags >>= 1;
 650        }
 651
 652        /* restore return address */
 653        if (ctx->flags & SEEN_CALL)
 654                emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
 655
 656        /* Restore the sp and discard the scrach memory */
 657        emit_stack_offset(align_sp(offset), ctx);
 658}
 659
 660static unsigned int get_stack_depth(struct jit_ctx *ctx)
 661{
 662        int sp_off = 0;
 663
 664
 665        /* How may s* regs do we need to preserved? */
 666        sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
 667
 668        if (ctx->flags & SEEN_MEM)
 669                sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
 670
 671        if (ctx->flags & SEEN_CALL)
 672                /*
 673                 * The JIT code make calls to external functions using 2
 674                 * arguments. Therefore, for o32 we don't need to allocate
 675                 * space because we don't care if the argumetns are lost
 676                 * across calls. We do need however to preserve incoming
 677                 * arguments but the space is already allocated for us by
 678                 * the caller. On the other hand, for n64, we need to allocate
 679                 * this space ourselves. We need to preserve $ra as well.
 680                 */
 681                sp_off += config_enabled(CONFIG_64BIT) ?
 682                        (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
 683
 684        /*
 685         * Subtract the bytes for the last registers since we only care about
 686         * the location on the stack pointer.
 687         */
 688        return sp_off - RSIZE;
 689}
 690
 691static void build_prologue(struct jit_ctx *ctx)
 692{
 693        u16 first_inst = ctx->skf->insns[0].code;
 694        int sp_off;
 695
 696        /* Calculate the total offset for the stack pointer */
 697        sp_off = get_stack_depth(ctx);
 698        save_bpf_jit_regs(ctx, sp_off);
 699
 700        if (ctx->flags & SEEN_SKB)
 701                emit_reg_move(r_skb, MIPS_R_A0, ctx);
 702
 703        if (ctx->flags & SEEN_X)
 704                emit_jit_reg_move(r_X, r_zero, ctx);
 705
 706        /* Do not leak kernel data to userspace */
 707        if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 708                emit_jit_reg_move(r_A, r_zero, ctx);
 709}
 710
 711static void build_epilogue(struct jit_ctx *ctx)
 712{
 713        unsigned int sp_off;
 714
 715        /* Calculate the total offset for the stack pointer */
 716
 717        sp_off = get_stack_depth(ctx);
 718        restore_bpf_jit_regs(ctx, sp_off);
 719
 720        /* Return */
 721        emit_jr(r_ra, ctx);
 722        emit_nop(ctx);
 723}
 724
 725static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
 726{
 727        u8 ret;
 728        int err;
 729
 730        err = skb_copy_bits(skb, offset, &ret, 1);
 731
 732        return (u64)err << 32 | ret;
 733}
 734
 735static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
 736{
 737        u16 ret;
 738        int err;
 739
 740        err = skb_copy_bits(skb, offset, &ret, 2);
 741
 742        return (u64)err << 32 | ntohs(ret);
 743}
 744
 745static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 746{
 747        u32 ret;
 748        int err;
 749
 750        err = skb_copy_bits(skb, offset, &ret, 4);
 751
 752        return (u64)err << 32 | ntohl(ret);
 753}
 754
 755static int build_body(struct jit_ctx *ctx)
 756{
 757        void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
 758        const struct bpf_prog *prog = ctx->skf;
 759        const struct sock_filter *inst;
 760        unsigned int i, off, load_order, condt;
 761        u32 k, b_off __maybe_unused;
 762
 763        for (i = 0; i < prog->len; i++) {
 764                u16 code;
 765
 766                inst = &(prog->insns[i]);
 767                pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
 768                         __func__, inst->code, inst->jt, inst->jf, inst->k);
 769                k = inst->k;
 770                code = bpf_anc_helper(inst);
 771
 772                if (ctx->target == NULL)
 773                        ctx->offsets[i] = ctx->idx * 4;
 774
 775                switch (code) {
 776                case BPF_LD | BPF_IMM:
 777                        /* A <- k ==> li r_A, k */
 778                        ctx->flags |= SEEN_A;
 779                        emit_load_imm(r_A, k, ctx);
 780                        break;
 781                case BPF_LD | BPF_W | BPF_LEN:
 782                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 783                        /* A <- len ==> lw r_A, offset(skb) */
 784                        ctx->flags |= SEEN_SKB | SEEN_A;
 785                        off = offsetof(struct sk_buff, len);
 786                        emit_load(r_A, r_skb, off, ctx);
 787                        break;
 788                case BPF_LD | BPF_MEM:
 789                        /* A <- M[k] ==> lw r_A, offset(M) */
 790                        ctx->flags |= SEEN_MEM | SEEN_A;
 791                        emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
 792                        break;
 793                case BPF_LD | BPF_W | BPF_ABS:
 794                        /* A <- P[k:4] */
 795                        load_order = 2;
 796                        goto load;
 797                case BPF_LD | BPF_H | BPF_ABS:
 798                        /* A <- P[k:2] */
 799                        load_order = 1;
 800                        goto load;
 801                case BPF_LD | BPF_B | BPF_ABS:
 802                        /* A <- P[k:1] */
 803                        load_order = 0;
 804load:
 805                        /* the interpreter will deal with the negative K */
 806                        if ((int)k < 0)
 807                                return -ENOTSUPP;
 808
 809                        emit_load_imm(r_off, k, ctx);
 810load_common:
 811                        /*
 812                         * We may got here from the indirect loads so
 813                         * return if offset is negative.
 814                         */
 815                        emit_slt(r_s0, r_off, r_zero, ctx);
 816                        emit_bcond(MIPS_COND_NE, r_s0, r_zero,
 817                                   b_imm(prog->len, ctx), ctx);
 818                        emit_reg_move(r_ret, r_zero, ctx);
 819
 820                        ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
 821                                SEEN_SKB | SEEN_A;
 822
 823                        emit_load_func(r_s0, (ptr)load_func[load_order],
 824                                      ctx);
 825                        emit_reg_move(MIPS_R_A0, r_skb, ctx);
 826                        emit_jalr(MIPS_R_RA, r_s0, ctx);
 827                        /* Load second argument to delay slot */
 828                        emit_reg_move(MIPS_R_A1, r_off, ctx);
 829                        /* Check the error value */
 830                        if (config_enabled(CONFIG_64BIT)) {
 831                                /* Get error code from the top 32-bits */
 832                                emit_dsrl32(r_s0, r_val, 0, ctx);
 833                                /* Branch to 3 instructions ahead */
 834                                emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
 835                                           ctx);
 836                        } else {
 837                                /* Branch to 3 instructions ahead */
 838                                emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
 839                                           ctx);
 840                        }
 841                        emit_nop(ctx);
 842                        /* We are good */
 843                        emit_b(b_imm(i + 1, ctx), ctx);
 844                        emit_jit_reg_move(r_A, r_val, ctx);
 845                        /* Return with error */
 846                        emit_b(b_imm(prog->len, ctx), ctx);
 847                        emit_reg_move(r_ret, r_zero, ctx);
 848                        break;
 849                case BPF_LD | BPF_W | BPF_IND:
 850                        /* A <- P[X + k:4] */
 851                        load_order = 2;
 852                        goto load_ind;
 853                case BPF_LD | BPF_H | BPF_IND:
 854                        /* A <- P[X + k:2] */
 855                        load_order = 1;
 856                        goto load_ind;
 857                case BPF_LD | BPF_B | BPF_IND:
 858                        /* A <- P[X + k:1] */
 859                        load_order = 0;
 860load_ind:
 861                        ctx->flags |= SEEN_OFF | SEEN_X;
 862                        emit_addiu(r_off, r_X, k, ctx);
 863                        goto load_common;
 864                case BPF_LDX | BPF_IMM:
 865                        /* X <- k */
 866                        ctx->flags |= SEEN_X;
 867                        emit_load_imm(r_X, k, ctx);
 868                        break;
 869                case BPF_LDX | BPF_MEM:
 870                        /* X <- M[k] */
 871                        ctx->flags |= SEEN_X | SEEN_MEM;
 872                        emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
 873                        break;
 874                case BPF_LDX | BPF_W | BPF_LEN:
 875                        /* X <- len */
 876                        ctx->flags |= SEEN_X | SEEN_SKB;
 877                        off = offsetof(struct sk_buff, len);
 878                        emit_load(r_X, r_skb, off, ctx);
 879                        break;
 880                case BPF_LDX | BPF_B | BPF_MSH:
 881                        /* the interpreter will deal with the negative K */
 882                        if ((int)k < 0)
 883                                return -ENOTSUPP;
 884
 885                        /* X <- 4 * (P[k:1] & 0xf) */
 886                        ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
 887                        /* Load offset to a1 */
 888                        emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
 889                        /*
 890                         * This may emit two instructions so it may not fit
 891                         * in the delay slot. So use a0 in the delay slot.
 892                         */
 893                        emit_load_imm(MIPS_R_A1, k, ctx);
 894                        emit_jalr(MIPS_R_RA, r_s0, ctx);
 895                        emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
 896                        /* Check the error value */
 897                        if (config_enabled(CONFIG_64BIT)) {
 898                                /* Top 32-bits of $v0 on 64-bit */
 899                                emit_dsrl32(r_s0, r_val, 0, ctx);
 900                                emit_bcond(MIPS_COND_NE, r_s0, r_zero,
 901                                           3 << 2, ctx);
 902                        } else {
 903                                emit_bcond(MIPS_COND_NE, r_err, r_zero,
 904                                           3 << 2, ctx);
 905                        }
 906                        /* No need for delay slot */
 907                        /* We are good */
 908                        /* X <- P[1:K] & 0xf */
 909                        emit_andi(r_X, r_val, 0xf, ctx);
 910                        /* X << 2 */
 911                        emit_b(b_imm(i + 1, ctx), ctx);
 912                        emit_sll(r_X, r_X, 2, ctx); /* delay slot */
 913                        /* Return with error */
 914                        emit_b(b_imm(prog->len, ctx), ctx);
 915                        emit_load_imm(r_ret, 0, ctx); /* delay slot */
 916                        break;
 917                case BPF_ST:
 918                        /* M[k] <- A */
 919                        ctx->flags |= SEEN_MEM | SEEN_A;
 920                        emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
 921                        break;
 922                case BPF_STX:
 923                        /* M[k] <- X */
 924                        ctx->flags |= SEEN_MEM | SEEN_X;
 925                        emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
 926                        break;
 927                case BPF_ALU | BPF_ADD | BPF_K:
 928                        /* A += K */
 929                        ctx->flags |= SEEN_A;
 930                        emit_addiu(r_A, r_A, k, ctx);
 931                        break;
 932                case BPF_ALU | BPF_ADD | BPF_X:
 933                        /* A += X */
 934                        ctx->flags |= SEEN_A | SEEN_X;
 935                        emit_addu(r_A, r_A, r_X, ctx);
 936                        break;
 937                case BPF_ALU | BPF_SUB | BPF_K:
 938                        /* A -= K */
 939                        ctx->flags |= SEEN_A;
 940                        emit_addiu(r_A, r_A, -k, ctx);
 941                        break;
 942                case BPF_ALU | BPF_SUB | BPF_X:
 943                        /* A -= X */
 944                        ctx->flags |= SEEN_A | SEEN_X;
 945                        emit_subu(r_A, r_A, r_X, ctx);
 946                        break;
 947                case BPF_ALU | BPF_MUL | BPF_K:
 948                        /* A *= K */
 949                        /* Load K to scratch register before MUL */
 950                        ctx->flags |= SEEN_A | SEEN_S0;
 951                        emit_load_imm(r_s0, k, ctx);
 952                        emit_mul(r_A, r_A, r_s0, ctx);
 953                        break;
 954                case BPF_ALU | BPF_MUL | BPF_X:
 955                        /* A *= X */
 956                        ctx->flags |= SEEN_A | SEEN_X;
 957                        emit_mul(r_A, r_A, r_X, ctx);
 958                        break;
 959                case BPF_ALU | BPF_DIV | BPF_K:
 960                        /* A /= k */
 961                        if (k == 1)
 962                                break;
 963                        if (optimize_div(&k)) {
 964                                ctx->flags |= SEEN_A;
 965                                emit_srl(r_A, r_A, k, ctx);
 966                                break;
 967                        }
 968                        ctx->flags |= SEEN_A | SEEN_S0;
 969                        emit_load_imm(r_s0, k, ctx);
 970                        emit_div(r_A, r_s0, ctx);
 971                        break;
 972                case BPF_ALU | BPF_MOD | BPF_K:
 973                        /* A %= k */
 974                        if (k == 1 || optimize_div(&k)) {
 975                                ctx->flags |= SEEN_A;
 976                                emit_jit_reg_move(r_A, r_zero, ctx);
 977                        } else {
 978                                ctx->flags |= SEEN_A | SEEN_S0;
 979                                emit_load_imm(r_s0, k, ctx);
 980                                emit_mod(r_A, r_s0, ctx);
 981                        }
 982                        break;
 983                case BPF_ALU | BPF_DIV | BPF_X:
 984                        /* A /= X */
 985                        ctx->flags |= SEEN_X | SEEN_A;
 986                        /* Check if r_X is zero */
 987                        emit_bcond(MIPS_COND_EQ, r_X, r_zero,
 988                                   b_imm(prog->len, ctx), ctx);
 989                        emit_load_imm(r_val, 0, ctx); /* delay slot */
 990                        emit_div(r_A, r_X, ctx);
 991                        break;
 992                case BPF_ALU | BPF_MOD | BPF_X:
 993                        /* A %= X */
 994                        ctx->flags |= SEEN_X | SEEN_A;
 995                        /* Check if r_X is zero */
 996                        emit_bcond(MIPS_COND_EQ, r_X, r_zero,
 997                                   b_imm(prog->len, ctx), ctx);
 998                        emit_load_imm(r_val, 0, ctx); /* delay slot */
 999                        emit_mod(r_A, r_X, ctx);
1000                        break;
1001                case BPF_ALU | BPF_OR | BPF_K:
1002                        /* A |= K */
1003                        ctx->flags |= SEEN_A;
1004                        emit_ori(r_A, r_A, k, ctx);
1005                        break;
1006                case BPF_ALU | BPF_OR | BPF_X:
1007                        /* A |= X */
1008                        ctx->flags |= SEEN_A;
1009                        emit_ori(r_A, r_A, r_X, ctx);
1010                        break;
1011                case BPF_ALU | BPF_XOR | BPF_K:
1012                        /* A ^= k */
1013                        ctx->flags |= SEEN_A;
1014                        emit_xori(r_A, r_A, k, ctx);
1015                        break;
1016                case BPF_ANC | SKF_AD_ALU_XOR_X:
1017                case BPF_ALU | BPF_XOR | BPF_X:
1018                        /* A ^= X */
1019                        ctx->flags |= SEEN_A;
1020                        emit_xor(r_A, r_A, r_X, ctx);
1021                        break;
1022                case BPF_ALU | BPF_AND | BPF_K:
1023                        /* A &= K */
1024                        ctx->flags |= SEEN_A;
1025                        emit_andi(r_A, r_A, k, ctx);
1026                        break;
1027                case BPF_ALU | BPF_AND | BPF_X:
1028                        /* A &= X */
1029                        ctx->flags |= SEEN_A | SEEN_X;
1030                        emit_and(r_A, r_A, r_X, ctx);
1031                        break;
1032                case BPF_ALU | BPF_LSH | BPF_K:
1033                        /* A <<= K */
1034                        ctx->flags |= SEEN_A;
1035                        emit_sll(r_A, r_A, k, ctx);
1036                        break;
1037                case BPF_ALU | BPF_LSH | BPF_X:
1038                        /* A <<= X */
1039                        ctx->flags |= SEEN_A | SEEN_X;
1040                        emit_sllv(r_A, r_A, r_X, ctx);
1041                        break;
1042                case BPF_ALU | BPF_RSH | BPF_K:
1043                        /* A >>= K */
1044                        ctx->flags |= SEEN_A;
1045                        emit_srl(r_A, r_A, k, ctx);
1046                        break;
1047                case BPF_ALU | BPF_RSH | BPF_X:
1048                        ctx->flags |= SEEN_A | SEEN_X;
1049                        emit_srlv(r_A, r_A, r_X, ctx);
1050                        break;
1051                case BPF_ALU | BPF_NEG:
1052                        /* A = -A */
1053                        ctx->flags |= SEEN_A;
1054                        emit_neg(r_A, ctx);
1055                        break;
1056                case BPF_JMP | BPF_JA:
1057                        /* pc += K */
1058                        emit_b(b_imm(i + k + 1, ctx), ctx);
1059                        emit_nop(ctx);
1060                        break;
1061                case BPF_JMP | BPF_JEQ | BPF_K:
1062                        /* pc += ( A == K ) ? pc->jt : pc->jf */
1063                        condt = MIPS_COND_EQ | MIPS_COND_K;
1064                        goto jmp_cmp;
1065                case BPF_JMP | BPF_JEQ | BPF_X:
1066                        ctx->flags |= SEEN_X;
1067                        /* pc += ( A == X ) ? pc->jt : pc->jf */
1068                        condt = MIPS_COND_EQ | MIPS_COND_X;
1069                        goto jmp_cmp;
1070                case BPF_JMP | BPF_JGE | BPF_K:
1071                        /* pc += ( A >= K ) ? pc->jt : pc->jf */
1072                        condt = MIPS_COND_GE | MIPS_COND_K;
1073                        goto jmp_cmp;
1074                case BPF_JMP | BPF_JGE | BPF_X:
1075                        ctx->flags |= SEEN_X;
1076                        /* pc += ( A >= X ) ? pc->jt : pc->jf */
1077                        condt = MIPS_COND_GE | MIPS_COND_X;
1078                        goto jmp_cmp;
1079                case BPF_JMP | BPF_JGT | BPF_K:
1080                        /* pc += ( A > K ) ? pc->jt : pc->jf */
1081                        condt = MIPS_COND_GT | MIPS_COND_K;
1082                        goto jmp_cmp;
1083                case BPF_JMP | BPF_JGT | BPF_X:
1084                        ctx->flags |= SEEN_X;
1085                        /* pc += ( A > X ) ? pc->jt : pc->jf */
1086                        condt = MIPS_COND_GT | MIPS_COND_X;
1087jmp_cmp:
1088                        /* Greater or Equal */
1089                        if ((condt & MIPS_COND_GE) ||
1090                            (condt & MIPS_COND_GT)) {
1091                                if (condt & MIPS_COND_K) { /* K */
1092                                        ctx->flags |= SEEN_S0 | SEEN_A;
1093                                        emit_sltiu(r_s0, r_A, k, ctx);
1094                                } else { /* X */
1095                                        ctx->flags |= SEEN_S0 | SEEN_A |
1096                                                SEEN_X;
1097                                        emit_sltu(r_s0, r_A, r_X, ctx);
1098                                }
1099                                /* A < (K|X) ? r_scrach = 1 */
1100                                b_off = b_imm(i + inst->jf + 1, ctx);
1101                                emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
1102                                           ctx);
1103                                emit_nop(ctx);
1104                                /* A > (K|X) ? scratch = 0 */
1105                                if (condt & MIPS_COND_GT) {
1106                                        /* Checking for equality */
1107                                        ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
1108                                        if (condt & MIPS_COND_K)
1109                                                emit_load_imm(r_s0, k, ctx);
1110                                        else
1111                                                emit_jit_reg_move(r_s0, r_X,
1112                                                                  ctx);
1113                                        b_off = b_imm(i + inst->jf + 1, ctx);
1114                                        emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1115                                                   b_off, ctx);
1116                                        emit_nop(ctx);
1117                                        /* Finally, A > K|X */
1118                                        b_off = b_imm(i + inst->jt + 1, ctx);
1119                                        emit_b(b_off, ctx);
1120                                        emit_nop(ctx);
1121                                } else {
1122                                        /* A >= (K|X) so jump */
1123                                        b_off = b_imm(i + inst->jt + 1, ctx);
1124                                        emit_b(b_off, ctx);
1125                                        emit_nop(ctx);
1126                                }
1127                        } else {
1128                                /* A == K|X */
1129                                if (condt & MIPS_COND_K) { /* K */
1130                                        ctx->flags |= SEEN_S0 | SEEN_A;
1131                                        emit_load_imm(r_s0, k, ctx);
1132                                        /* jump true */
1133                                        b_off = b_imm(i + inst->jt + 1, ctx);
1134                                        emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1135                                                   b_off, ctx);
1136                                        emit_nop(ctx);
1137                                        /* jump false */
1138                                        b_off = b_imm(i + inst->jf + 1,
1139                                                      ctx);
1140                                        emit_bcond(MIPS_COND_NE, r_A, r_s0,
1141                                                   b_off, ctx);
1142                                        emit_nop(ctx);
1143                                } else { /* X */
1144                                        /* jump true */
1145                                        ctx->flags |= SEEN_A | SEEN_X;
1146                                        b_off = b_imm(i + inst->jt + 1,
1147                                                      ctx);
1148                                        emit_bcond(MIPS_COND_EQ, r_A, r_X,
1149                                                   b_off, ctx);
1150                                        emit_nop(ctx);
1151                                        /* jump false */
1152                                        b_off = b_imm(i + inst->jf + 1, ctx);
1153                                        emit_bcond(MIPS_COND_NE, r_A, r_X,
1154                                                   b_off, ctx);
1155                                        emit_nop(ctx);
1156                                }
1157                        }
1158                        break;
1159                case BPF_JMP | BPF_JSET | BPF_K:
1160                        ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
1161                        /* pc += (A & K) ? pc -> jt : pc -> jf */
1162                        emit_load_imm(r_s1, k, ctx);
1163                        emit_and(r_s0, r_A, r_s1, ctx);
1164                        /* jump true */
1165                        b_off = b_imm(i + inst->jt + 1, ctx);
1166                        emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1167                        emit_nop(ctx);
1168                        /* jump false */
1169                        b_off = b_imm(i + inst->jf + 1, ctx);
1170                        emit_b(b_off, ctx);
1171                        emit_nop(ctx);
1172                        break;
1173                case BPF_JMP | BPF_JSET | BPF_X:
1174                        ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
1175                        /* pc += (A & X) ? pc -> jt : pc -> jf */
1176                        emit_and(r_s0, r_A, r_X, ctx);
1177                        /* jump true */
1178                        b_off = b_imm(i + inst->jt + 1, ctx);
1179                        emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1180                        emit_nop(ctx);
1181                        /* jump false */
1182                        b_off = b_imm(i + inst->jf + 1, ctx);
1183                        emit_b(b_off, ctx);
1184                        emit_nop(ctx);
1185                        break;
1186                case BPF_RET | BPF_A:
1187                        ctx->flags |= SEEN_A;
1188                        if (i != prog->len - 1)
1189                                /*
1190                                 * If this is not the last instruction
1191                                 * then jump to the epilogue
1192                                 */
1193                                emit_b(b_imm(prog->len, ctx), ctx);
1194                        emit_reg_move(r_ret, r_A, ctx); /* delay slot */
1195                        break;
1196                case BPF_RET | BPF_K:
1197                        /*
1198                         * It can emit two instructions so it does not fit on
1199                         * the delay slot.
1200                         */
1201                        emit_load_imm(r_ret, k, ctx);
1202                        if (i != prog->len - 1) {
1203                                /*
1204                                 * If this is not the last instruction
1205                                 * then jump to the epilogue
1206                                 */
1207                                emit_b(b_imm(prog->len, ctx), ctx);
1208                                emit_nop(ctx);
1209                        }
1210                        break;
1211                case BPF_MISC | BPF_TAX:
1212                        /* X = A */
1213                        ctx->flags |= SEEN_X | SEEN_A;
1214                        emit_jit_reg_move(r_X, r_A, ctx);
1215                        break;
1216                case BPF_MISC | BPF_TXA:
1217                        /* A = X */
1218                        ctx->flags |= SEEN_A | SEEN_X;
1219                        emit_jit_reg_move(r_A, r_X, ctx);
1220                        break;
1221                /* AUX */
1222                case BPF_ANC | SKF_AD_PROTOCOL:
1223                        /* A = ntohs(skb->protocol */
1224                        ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
1225                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1226                                                  protocol) != 2);
1227                        off = offsetof(struct sk_buff, protocol);
1228                        emit_half_load(r_A, r_skb, off, ctx);
1229#ifdef CONFIG_CPU_LITTLE_ENDIAN
1230                        /* This needs little endian fixup */
1231                        if (cpu_has_wsbh) {
1232                                /* R2 and later have the wsbh instruction */
1233                                emit_wsbh(r_A, r_A, ctx);
1234                        } else {
1235                                /* Get first byte */
1236                                emit_andi(r_tmp_imm, r_A, 0xff, ctx);
1237                                /* Shift it */
1238                                emit_sll(r_tmp, r_tmp_imm, 8, ctx);
1239                                /* Get second byte */
1240                                emit_srl(r_tmp_imm, r_A, 8, ctx);
1241                                emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
1242                                /* Put everyting together in r_A */
1243                                emit_or(r_A, r_tmp, r_tmp_imm, ctx);
1244                        }
1245#endif
1246                        break;
1247                case BPF_ANC | SKF_AD_CPU:
1248                        ctx->flags |= SEEN_A | SEEN_OFF;
1249                        /* A = current_thread_info()->cpu */
1250                        BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
1251                                                  cpu) != 4);
1252                        off = offsetof(struct thread_info, cpu);
1253                        /* $28/gp points to the thread_info struct */
1254                        emit_load(r_A, 28, off, ctx);
1255                        break;
1256                case BPF_ANC | SKF_AD_IFINDEX:
1257                        /* A = skb->dev->ifindex */
1258                        ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
1259                        off = offsetof(struct sk_buff, dev);
1260                        /* Load *dev pointer */
1261                        emit_load_ptr(r_s0, r_skb, off, ctx);
1262                        /* error (0) in the delay slot */
1263                        emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
1264                                   b_imm(prog->len, ctx), ctx);
1265                        emit_reg_move(r_ret, r_zero, ctx);
1266                        BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
1267                                                  ifindex) != 4);
1268                        off = offsetof(struct net_device, ifindex);
1269                        emit_load(r_A, r_s0, off, ctx);
1270                        break;
1271                case BPF_ANC | SKF_AD_MARK:
1272                        ctx->flags |= SEEN_SKB | SEEN_A;
1273                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1274                        off = offsetof(struct sk_buff, mark);
1275                        emit_load(r_A, r_skb, off, ctx);
1276                        break;
1277                case BPF_ANC | SKF_AD_RXHASH:
1278                        ctx->flags |= SEEN_SKB | SEEN_A;
1279                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
1280                        off = offsetof(struct sk_buff, hash);
1281                        emit_load(r_A, r_skb, off, ctx);
1282                        break;
1283                case BPF_ANC | SKF_AD_VLAN_TAG:
1284                case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
1285                        ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
1286                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1287                                                  vlan_tci) != 2);
1288                        off = offsetof(struct sk_buff, vlan_tci);
1289                        emit_half_load(r_s0, r_skb, off, ctx);
1290                        if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
1291                                emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
1292                        } else {
1293                                emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
1294                                /* return 1 if present */
1295                                emit_sltu(r_A, r_zero, r_A, ctx);
1296                        }
1297                        break;
1298                case BPF_ANC | SKF_AD_PKTTYPE:
1299                        ctx->flags |= SEEN_SKB;
1300
1301                        emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
1302                        /* Keep only the last 3 bits */
1303                        emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
1304#ifdef __BIG_ENDIAN_BITFIELD
1305                        /* Get the actual packet type to the lower 3 bits */
1306                        emit_srl(r_A, r_A, 5, ctx);
1307#endif
1308                        break;
1309                case BPF_ANC | SKF_AD_QUEUE:
1310                        ctx->flags |= SEEN_SKB | SEEN_A;
1311                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1312                                                  queue_mapping) != 2);
1313                        BUILD_BUG_ON(offsetof(struct sk_buff,
1314                                              queue_mapping) > 0xff);
1315                        off = offsetof(struct sk_buff, queue_mapping);
1316                        emit_half_load(r_A, r_skb, off, ctx);
1317                        break;
1318                default:
1319                        pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
1320                                 inst->code);
1321                        return -1;
1322                }
1323        }
1324
1325        /* compute offsets only during the first pass */
1326        if (ctx->target == NULL)
1327                ctx->offsets[i] = ctx->idx * 4;
1328
1329        return 0;
1330}
1331
1332int bpf_jit_enable __read_mostly;
1333
1334void bpf_jit_compile(struct bpf_prog *fp)
1335{
1336        struct jit_ctx ctx;
1337        unsigned int alloc_size, tmp_idx;
1338
1339        if (!bpf_jit_enable)
1340                return;
1341
1342        memset(&ctx, 0, sizeof(ctx));
1343
1344        ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
1345        if (ctx.offsets == NULL)
1346                return;
1347
1348        ctx.skf = fp;
1349
1350        if (build_body(&ctx))
1351                goto out;
1352
1353        tmp_idx = ctx.idx;
1354        build_prologue(&ctx);
1355        ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1356        /* just to complete the ctx.idx count */
1357        build_epilogue(&ctx);
1358
1359        alloc_size = 4 * ctx.idx;
1360        ctx.target = module_alloc(alloc_size);
1361        if (ctx.target == NULL)
1362                goto out;
1363
1364        /* Clean it */
1365        memset(ctx.target, 0, alloc_size);
1366
1367        ctx.idx = 0;
1368
1369        /* Generate the actual JIT code */
1370        build_prologue(&ctx);
1371        build_body(&ctx);
1372        build_epilogue(&ctx);
1373
1374        /* Update the icache */
1375        flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
1376
1377        if (bpf_jit_enable > 1)
1378                /* Dump JIT code */
1379                bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
1380
1381        fp->bpf_func = (void *)ctx.target;
1382        fp->jited = true;
1383
1384out:
1385        kfree(ctx.offsets);
1386}
1387
1388void bpf_jit_free(struct bpf_prog *fp)
1389{
1390        if (fp->jited)
1391                module_free(NULL, fp->bpf_func);
1392
1393        bpf_prog_unlock_free(fp);
1394}
1395