linux/arch/mips/net/bpf_jit.c
<<
>>
Prefs
   1/*
   2 * Just-In-Time compiler for BPF filters on MIPS
   3 *
   4 * Copyright (c) 2014 Imagination Technologies Ltd.
   5 * Author: Markos Chandras <markos.chandras@imgtec.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License as published by the
   9 * Free Software Foundation; version 2 of the License.
  10 */
  11
  12#include <linux/bitops.h>
  13#include <linux/compiler.h>
  14#include <linux/errno.h>
  15#include <linux/filter.h>
  16#include <linux/if_vlan.h>
  17#include <linux/kconfig.h>
  18#include <linux/moduleloader.h>
  19#include <linux/netdevice.h>
  20#include <linux/string.h>
  21#include <linux/slab.h>
  22#include <linux/types.h>
  23#include <asm/bitops.h>
  24#include <asm/cacheflush.h>
  25#include <asm/cpu-features.h>
  26#include <asm/uasm.h>
  27
  28#include "bpf_jit.h"
  29
  30/* ABI
  31 *
  32 * s0   1st scratch register
  33 * s1   2nd scratch register
  34 * s2   offset register
  35 * s3   BPF register A
  36 * s4   BPF register X
  37 * s5   *skb
  38 * s6   *scratch memory
  39 *
  40 * On entry (*bpf_func)(*skb, *filter)
  41 * a0 = MIPS_R_A0 = skb;
  42 * a1 = MIPS_R_A1 = filter;
  43 *
  44 * Stack
  45 * ...
  46 * M[15]
  47 * M[14]
  48 * M[13]
  49 * ...
  50 * M[0] <-- r_M
  51 * saved reg k-1
  52 * saved reg k-2
  53 * ...
  54 * saved reg 0 <-- r_sp
  55 * <no argument area>
  56 *
  57 *                     Packet layout
  58 *
  59 * <--------------------- len ------------------------>
  60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
  61 * ----------------------------------------------------
  62 * |                  skb->data                       |
  63 * ----------------------------------------------------
  64 */
  65
  66#define RSIZE   (sizeof(unsigned long))
  67#define ptr typeof(unsigned long)
  68
  69/* ABI specific return values */
  70#ifdef CONFIG_32BIT /* O32 */
  71#ifdef CONFIG_CPU_LITTLE_ENDIAN
  72#define r_err   MIPS_R_V1
  73#define r_val   MIPS_R_V0
  74#else /* CONFIG_CPU_LITTLE_ENDIAN */
  75#define r_err   MIPS_R_V0
  76#define r_val   MIPS_R_V1
  77#endif
  78#else /* N64 */
  79#define r_err   MIPS_R_V0
  80#define r_val   MIPS_R_V0
  81#endif
  82
  83#define r_ret   MIPS_R_V0
  84
  85/*
  86 * Use 2 scratch registers to avoid pipeline interlocks.
  87 * There is no overhead during epilogue and prologue since
  88 * any of the $s0-$s6 registers will only be preserved if
  89 * they are going to actually be used.
  90 */
  91#define r_s0            MIPS_R_S0 /* scratch reg 1 */
  92#define r_s1            MIPS_R_S1 /* scratch reg 2 */
  93#define r_off           MIPS_R_S2
  94#define r_A             MIPS_R_S3
  95#define r_X             MIPS_R_S4
  96#define r_skb           MIPS_R_S5
  97#define r_M             MIPS_R_S6
  98#define r_tmp_imm       MIPS_R_T6 /* No need to preserve this */
  99#define r_tmp           MIPS_R_T7 /* No need to preserve this */
 100#define r_zero          MIPS_R_ZERO
 101#define r_sp            MIPS_R_SP
 102#define r_ra            MIPS_R_RA
 103
 104#define SCRATCH_OFF(k)          (4 * (k))
 105
 106/* JIT flags */
 107#define SEEN_CALL               (1 << BPF_MEMWORDS)
 108#define SEEN_SREG_SFT           (BPF_MEMWORDS + 1)
 109#define SEEN_SREG_BASE          (1 << SEEN_SREG_SFT)
 110#define SEEN_SREG(x)            (SEEN_SREG_BASE << (x))
 111#define SEEN_S0                 SEEN_SREG(0)
 112#define SEEN_S1                 SEEN_SREG(1)
 113#define SEEN_OFF                SEEN_SREG(2)
 114#define SEEN_A                  SEEN_SREG(3)
 115#define SEEN_X                  SEEN_SREG(4)
 116#define SEEN_SKB                SEEN_SREG(5)
 117#define SEEN_MEM                SEEN_SREG(6)
 118
 119/* Arguments used by JIT */
 120#define ARGS_USED_BY_JIT        2 /* only applicable to 64-bit */
 121
 122#define SBIT(x)                 (1 << (x)) /* Signed version of BIT() */
 123
 124/**
 125 * struct jit_ctx - JIT context
 126 * @skf:                The sk_filter
 127 * @prologue_bytes:     Number of bytes for prologue
 128 * @idx:                Instruction index
 129 * @flags:              JIT flags
 130 * @offsets:            Instruction offsets
 131 * @target:             Memory location for the compiled filter
 132 */
 133struct jit_ctx {
 134        const struct bpf_prog *skf;
 135        unsigned int prologue_bytes;
 136        u32 idx;
 137        u32 flags;
 138        u32 *offsets;
 139        u32 *target;
 140};
 141
 142
 143static inline int optimize_div(u32 *k)
 144{
 145        /* power of 2 divides can be implemented with right shift */
 146        if (!(*k & (*k-1))) {
 147                *k = ilog2(*k);
 148                return 1;
 149        }
 150
 151        return 0;
 152}
 153
 154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
 155
 156/* Simply emit the instruction if the JIT memory space has been allocated */
 157#define emit_instr(ctx, func, ...)                      \
 158do {                                                    \
 159        if ((ctx)->target != NULL) {                    \
 160                u32 *p = &(ctx)->target[ctx->idx];      \
 161                uasm_i_##func(&p, ##__VA_ARGS__);       \
 162        }                                               \
 163        (ctx)->idx++;                                   \
 164} while (0)
 165
 166/*
 167 * Similar to emit_instr but it must be used when we need to emit
 168 * 32-bit or 64-bit instructions
 169 */
 170#define emit_long_instr(ctx, func, ...)                 \
 171do {                                                    \
 172        if ((ctx)->target != NULL) {                    \
 173                u32 *p = &(ctx)->target[ctx->idx];      \
 174                UASM_i_##func(&p, ##__VA_ARGS__);       \
 175        }                                               \
 176        (ctx)->idx++;                                   \
 177} while (0)
 178
 179/* Determine if immediate is within the 16-bit signed range */
 180static inline bool is_range16(s32 imm)
 181{
 182        return !(imm >= SBIT(15) || imm < -SBIT(15));
 183}
 184
 185static inline void emit_addu(unsigned int dst, unsigned int src1,
 186                             unsigned int src2, struct jit_ctx *ctx)
 187{
 188        emit_instr(ctx, addu, dst, src1, src2);
 189}
 190
 191static inline void emit_nop(struct jit_ctx *ctx)
 192{
 193        emit_instr(ctx, nop);
 194}
 195
 196/* Load a u32 immediate to a register */
 197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
 198{
 199        if (ctx->target != NULL) {
 200                /* addiu can only handle s16 */
 201                if (!is_range16(imm)) {
 202                        u32 *p = &ctx->target[ctx->idx];
 203                        uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
 204                        p = &ctx->target[ctx->idx + 1];
 205                        uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
 206                } else {
 207                        u32 *p = &ctx->target[ctx->idx];
 208                        uasm_i_addiu(&p, dst, r_zero, imm);
 209                }
 210        }
 211        ctx->idx++;
 212
 213        if (!is_range16(imm))
 214                ctx->idx++;
 215}
 216
 217static inline void emit_or(unsigned int dst, unsigned int src1,
 218                           unsigned int src2, struct jit_ctx *ctx)
 219{
 220        emit_instr(ctx, or, dst, src1, src2);
 221}
 222
 223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
 224                            struct jit_ctx *ctx)
 225{
 226        if (imm >= BIT(16)) {
 227                emit_load_imm(r_tmp, imm, ctx);
 228                emit_or(dst, src, r_tmp, ctx);
 229        } else {
 230                emit_instr(ctx, ori, dst, src, imm);
 231        }
 232}
 233
 234static inline void emit_daddiu(unsigned int dst, unsigned int src,
 235                               int imm, struct jit_ctx *ctx)
 236{
 237        /*
 238         * Only used for stack, so the imm is relatively small
 239         * and it fits in 15-bits
 240         */
 241        emit_instr(ctx, daddiu, dst, src, imm);
 242}
 243
 244static inline void emit_addiu(unsigned int dst, unsigned int src,
 245                              u32 imm, struct jit_ctx *ctx)
 246{
 247        if (!is_range16(imm)) {
 248                emit_load_imm(r_tmp, imm, ctx);
 249                emit_addu(dst, r_tmp, src, ctx);
 250        } else {
 251                emit_instr(ctx, addiu, dst, src, imm);
 252        }
 253}
 254
 255static inline void emit_and(unsigned int dst, unsigned int src1,
 256                            unsigned int src2, struct jit_ctx *ctx)
 257{
 258        emit_instr(ctx, and, dst, src1, src2);
 259}
 260
 261static inline void emit_andi(unsigned int dst, unsigned int src,
 262                             u32 imm, struct jit_ctx *ctx)
 263{
 264        /* If imm does not fit in u16 then load it to register */
 265        if (imm >= BIT(16)) {
 266                emit_load_imm(r_tmp, imm, ctx);
 267                emit_and(dst, src, r_tmp, ctx);
 268        } else {
 269                emit_instr(ctx, andi, dst, src, imm);
 270        }
 271}
 272
 273static inline void emit_xor(unsigned int dst, unsigned int src1,
 274                            unsigned int src2, struct jit_ctx *ctx)
 275{
 276        emit_instr(ctx, xor, dst, src1, src2);
 277}
 278
 279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
 280{
 281        /* If imm does not fit in u16 then load it to register */
 282        if (imm >= BIT(16)) {
 283                emit_load_imm(r_tmp, imm, ctx);
 284                emit_xor(dst, src, r_tmp, ctx);
 285        } else {
 286                emit_instr(ctx, xori, dst, src, imm);
 287        }
 288}
 289
 290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
 291{
 292        emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
 293}
 294
 295static inline void emit_subu(unsigned int dst, unsigned int src1,
 296                             unsigned int src2, struct jit_ctx *ctx)
 297{
 298        emit_instr(ctx, subu, dst, src1, src2);
 299}
 300
 301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
 302{
 303        emit_subu(reg, r_zero, reg, ctx);
 304}
 305
 306static inline void emit_sllv(unsigned int dst, unsigned int src,
 307                             unsigned int sa, struct jit_ctx *ctx)
 308{
 309        emit_instr(ctx, sllv, dst, src, sa);
 310}
 311
 312static inline void emit_sll(unsigned int dst, unsigned int src,
 313                            unsigned int sa, struct jit_ctx *ctx)
 314{
 315        /* sa is 5-bits long */
 316        if (sa >= BIT(5))
 317                /* Shifting >= 32 results in zero */
 318                emit_jit_reg_move(dst, r_zero, ctx);
 319        else
 320                emit_instr(ctx, sll, dst, src, sa);
 321}
 322
 323static inline void emit_srlv(unsigned int dst, unsigned int src,
 324                             unsigned int sa, struct jit_ctx *ctx)
 325{
 326        emit_instr(ctx, srlv, dst, src, sa);
 327}
 328
 329static inline void emit_srl(unsigned int dst, unsigned int src,
 330                            unsigned int sa, struct jit_ctx *ctx)
 331{
 332        /* sa is 5-bits long */
 333        if (sa >= BIT(5))
 334                /* Shifting >= 32 results in zero */
 335                emit_jit_reg_move(dst, r_zero, ctx);
 336        else
 337                emit_instr(ctx, srl, dst, src, sa);
 338}
 339
 340static inline void emit_slt(unsigned int dst, unsigned int src1,
 341                            unsigned int src2, struct jit_ctx *ctx)
 342{
 343        emit_instr(ctx, slt, dst, src1, src2);
 344}
 345
 346static inline void emit_sltu(unsigned int dst, unsigned int src1,
 347                             unsigned int src2, struct jit_ctx *ctx)
 348{
 349        emit_instr(ctx, sltu, dst, src1, src2);
 350}
 351
 352static inline void emit_sltiu(unsigned dst, unsigned int src,
 353                              unsigned int imm, struct jit_ctx *ctx)
 354{
 355        /* 16 bit immediate */
 356        if (!is_range16((s32)imm)) {
 357                emit_load_imm(r_tmp, imm, ctx);
 358                emit_sltu(dst, src, r_tmp, ctx);
 359        } else {
 360                emit_instr(ctx, sltiu, dst, src, imm);
 361        }
 362
 363}
 364
 365/* Store register on the stack */
 366static inline void emit_store_stack_reg(ptr reg, ptr base,
 367                                        unsigned int offset,
 368                                        struct jit_ctx *ctx)
 369{
 370        emit_long_instr(ctx, SW, reg, offset, base);
 371}
 372
 373static inline void emit_store(ptr reg, ptr base, unsigned int offset,
 374                              struct jit_ctx *ctx)
 375{
 376        emit_instr(ctx, sw, reg, offset, base);
 377}
 378
 379static inline void emit_load_stack_reg(ptr reg, ptr base,
 380                                       unsigned int offset,
 381                                       struct jit_ctx *ctx)
 382{
 383        emit_long_instr(ctx, LW, reg, offset, base);
 384}
 385
 386static inline void emit_load(unsigned int reg, unsigned int base,
 387                             unsigned int offset, struct jit_ctx *ctx)
 388{
 389        emit_instr(ctx, lw, reg, offset, base);
 390}
 391
 392static inline void emit_load_byte(unsigned int reg, unsigned int base,
 393                                  unsigned int offset, struct jit_ctx *ctx)
 394{
 395        emit_instr(ctx, lb, reg, offset, base);
 396}
 397
 398static inline void emit_half_load(unsigned int reg, unsigned int base,
 399                                  unsigned int offset, struct jit_ctx *ctx)
 400{
 401        emit_instr(ctx, lh, reg, offset, base);
 402}
 403
 404static inline void emit_mul(unsigned int dst, unsigned int src1,
 405                            unsigned int src2, struct jit_ctx *ctx)
 406{
 407        emit_instr(ctx, mul, dst, src1, src2);
 408}
 409
 410static inline void emit_div(unsigned int dst, unsigned int src,
 411                            struct jit_ctx *ctx)
 412{
 413        if (ctx->target != NULL) {
 414                u32 *p = &ctx->target[ctx->idx];
 415                uasm_i_divu(&p, dst, src);
 416                p = &ctx->target[ctx->idx + 1];
 417                uasm_i_mflo(&p, dst);
 418        }
 419        ctx->idx += 2; /* 2 insts */
 420}
 421
 422static inline void emit_mod(unsigned int dst, unsigned int src,
 423                            struct jit_ctx *ctx)
 424{
 425        if (ctx->target != NULL) {
 426                u32 *p = &ctx->target[ctx->idx];
 427                uasm_i_divu(&p, dst, src);
 428                p = &ctx->target[ctx->idx + 1];
 429                uasm_i_mfhi(&p, dst);
 430        }
 431        ctx->idx += 2; /* 2 insts */
 432}
 433
 434static inline void emit_dsll(unsigned int dst, unsigned int src,
 435                             unsigned int sa, struct jit_ctx *ctx)
 436{
 437        emit_instr(ctx, dsll, dst, src, sa);
 438}
 439
 440static inline void emit_dsrl32(unsigned int dst, unsigned int src,
 441                               unsigned int sa, struct jit_ctx *ctx)
 442{
 443        emit_instr(ctx, dsrl32, dst, src, sa);
 444}
 445
 446static inline void emit_wsbh(unsigned int dst, unsigned int src,
 447                             struct jit_ctx *ctx)
 448{
 449        emit_instr(ctx, wsbh, dst, src);
 450}
 451
 452/* load pointer to register */
 453static inline void emit_load_ptr(unsigned int dst, unsigned int src,
 454                                     int imm, struct jit_ctx *ctx)
 455{
 456        /* src contains the base addr of the 32/64-pointer */
 457        emit_long_instr(ctx, LW, dst, imm, src);
 458}
 459
 460/* load a function pointer to register */
 461static inline void emit_load_func(unsigned int reg, ptr imm,
 462                                  struct jit_ctx *ctx)
 463{
 464        if (config_enabled(CONFIG_64BIT)) {
 465                /* At this point imm is always 64-bit */
 466                emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
 467                emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
 468                emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
 469                emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
 470                emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
 471        } else {
 472                emit_load_imm(reg, imm, ctx);
 473        }
 474}
 475
 476/* Move to real MIPS register */
 477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
 478{
 479        emit_long_instr(ctx, ADDU, dst, src, r_zero);
 480}
 481
 482/* Move to JIT (32-bit) register */
 483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
 484{
 485        emit_addu(dst, src, r_zero, ctx);
 486}
 487
 488/* Compute the immediate value for PC-relative branches. */
 489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
 490{
 491        if (ctx->target == NULL)
 492                return 0;
 493
 494        /*
 495         * We want a pc-relative branch. We only do forward branches
 496         * so tgt is always after pc. tgt is the instruction offset
 497         * we want to jump to.
 498
 499         * Branch on MIPS:
 500         * I: target_offset <- sign_extend(offset)
 501         * I+1: PC += target_offset (delay slot)
 502         *
 503         * ctx->idx currently points to the branch instruction
 504         * but the offset is added to the delay slot so we need
 505         * to subtract 4.
 506         */
 507        return ctx->offsets[tgt] -
 508                (ctx->idx * 4 - ctx->prologue_bytes) - 4;
 509}
 510
 511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
 512                             unsigned int imm, struct jit_ctx *ctx)
 513{
 514        if (ctx->target != NULL) {
 515                u32 *p = &ctx->target[ctx->idx];
 516
 517                switch (cond) {
 518                case MIPS_COND_EQ:
 519                        uasm_i_beq(&p, reg1, reg2, imm);
 520                        break;
 521                case MIPS_COND_NE:
 522                        uasm_i_bne(&p, reg1, reg2, imm);
 523                        break;
 524                case MIPS_COND_ALL:
 525                        uasm_i_b(&p, imm);
 526                        break;
 527                default:
 528                        pr_warn("%s: Unhandled branch conditional: %d\n",
 529                                __func__, cond);
 530                }
 531        }
 532        ctx->idx++;
 533}
 534
 535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
 536{
 537        emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
 538}
 539
 540static inline void emit_jalr(unsigned int link, unsigned int reg,
 541                             struct jit_ctx *ctx)
 542{
 543        emit_instr(ctx, jalr, link, reg);
 544}
 545
 546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
 547{
 548        emit_instr(ctx, jr, reg);
 549}
 550
 551static inline u16 align_sp(unsigned int num)
 552{
 553        /* Double word alignment for 32-bit, quadword for 64-bit */
 554        unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
 555        num = (num + (align - 1)) & -align;
 556        return num;
 557}
 558
 559static bool is_load_to_a(u16 inst)
 560{
 561        switch (inst) {
 562        case BPF_LD | BPF_W | BPF_LEN:
 563        case BPF_LD | BPF_W | BPF_ABS:
 564        case BPF_LD | BPF_H | BPF_ABS:
 565        case BPF_LD | BPF_B | BPF_ABS:
 566                return true;
 567        default:
 568                return false;
 569        }
 570}
 571
 572static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
 573{
 574        int i = 0, real_off = 0;
 575        u32 sflags, tmp_flags;
 576
 577        /* Adjust the stack pointer */
 578        emit_stack_offset(-align_sp(offset), ctx);
 579
 580        if (ctx->flags & SEEN_CALL) {
 581                /* Argument save area */
 582                if (config_enabled(CONFIG_64BIT))
 583                        /* Bottom of current frame */
 584                        real_off = align_sp(offset) - RSIZE;
 585                else
 586                        /* Top of previous frame */
 587                        real_off = align_sp(offset) + RSIZE;
 588                emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
 589                emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
 590
 591                real_off = 0;
 592        }
 593
 594        tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 595        /* sflags is essentially a bitmap */
 596        while (tmp_flags) {
 597                if ((sflags >> i) & 0x1) {
 598                        emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
 599                                             ctx);
 600                        real_off += RSIZE;
 601                }
 602                i++;
 603                tmp_flags >>= 1;
 604        }
 605
 606        /* save return address */
 607        if (ctx->flags & SEEN_CALL) {
 608                emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
 609                real_off += RSIZE;
 610        }
 611
 612        /* Setup r_M leaving the alignment gap if necessary */
 613        if (ctx->flags & SEEN_MEM) {
 614                if (real_off % (RSIZE * 2))
 615                        real_off += RSIZE;
 616                emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
 617        }
 618}
 619
 620static void restore_bpf_jit_regs(struct jit_ctx *ctx,
 621                                 unsigned int offset)
 622{
 623        int i, real_off = 0;
 624        u32 sflags, tmp_flags;
 625
 626        if (ctx->flags & SEEN_CALL) {
 627                if (config_enabled(CONFIG_64BIT))
 628                        /* Bottom of current frame */
 629                        real_off = align_sp(offset) - RSIZE;
 630                else
 631                        /* Top of previous frame */
 632                        real_off = align_sp(offset) + RSIZE;
 633                emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
 634                emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
 635
 636                real_off = 0;
 637        }
 638
 639        tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
 640        /* sflags is a bitmap */
 641        i = 0;
 642        while (tmp_flags) {
 643                if ((sflags >> i) & 0x1) {
 644                        emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
 645                                            ctx);
 646                        real_off += RSIZE;
 647                }
 648                i++;
 649                tmp_flags >>= 1;
 650        }
 651
 652        /* restore return address */
 653        if (ctx->flags & SEEN_CALL)
 654                emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
 655
 656        /* Restore the sp and discard the scrach memory */
 657        emit_stack_offset(align_sp(offset), ctx);
 658}
 659
 660static unsigned int get_stack_depth(struct jit_ctx *ctx)
 661{
 662        int sp_off = 0;
 663
 664
 665        /* How may s* regs do we need to preserved? */
 666        sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
 667
 668        if (ctx->flags & SEEN_MEM)
 669                sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
 670
 671        if (ctx->flags & SEEN_CALL)
 672                /*
 673                 * The JIT code make calls to external functions using 2
 674                 * arguments. Therefore, for o32 we don't need to allocate
 675                 * space because we don't care if the argumetns are lost
 676                 * across calls. We do need however to preserve incoming
 677                 * arguments but the space is already allocated for us by
 678                 * the caller. On the other hand, for n64, we need to allocate
 679                 * this space ourselves. We need to preserve $ra as well.
 680                 */
 681                sp_off += config_enabled(CONFIG_64BIT) ?
 682                        (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
 683
 684        return sp_off;
 685}
 686
 687static void build_prologue(struct jit_ctx *ctx)
 688{
 689        u16 first_inst = ctx->skf->insns[0].code;
 690        int sp_off;
 691
 692        /* Calculate the total offset for the stack pointer */
 693        sp_off = get_stack_depth(ctx);
 694        save_bpf_jit_regs(ctx, sp_off);
 695
 696        if (ctx->flags & SEEN_SKB)
 697                emit_reg_move(r_skb, MIPS_R_A0, ctx);
 698
 699        if (ctx->flags & SEEN_X)
 700                emit_jit_reg_move(r_X, r_zero, ctx);
 701
 702        /* Do not leak kernel data to userspace */
 703        if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 704                emit_jit_reg_move(r_A, r_zero, ctx);
 705}
 706
 707static void build_epilogue(struct jit_ctx *ctx)
 708{
 709        unsigned int sp_off;
 710
 711        /* Calculate the total offset for the stack pointer */
 712
 713        sp_off = get_stack_depth(ctx);
 714        restore_bpf_jit_regs(ctx, sp_off);
 715
 716        /* Return */
 717        emit_jr(r_ra, ctx);
 718        emit_nop(ctx);
 719}
 720
 721static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
 722{
 723        u8 ret;
 724        int err;
 725
 726        err = skb_copy_bits(skb, offset, &ret, 1);
 727
 728        return (u64)err << 32 | ret;
 729}
 730
 731static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
 732{
 733        u16 ret;
 734        int err;
 735
 736        err = skb_copy_bits(skb, offset, &ret, 2);
 737
 738        return (u64)err << 32 | ntohs(ret);
 739}
 740
 741static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 742{
 743        u32 ret;
 744        int err;
 745
 746        err = skb_copy_bits(skb, offset, &ret, 4);
 747
 748        return (u64)err << 32 | ntohl(ret);
 749}
 750
 751static int build_body(struct jit_ctx *ctx)
 752{
 753        void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
 754        const struct bpf_prog *prog = ctx->skf;
 755        const struct sock_filter *inst;
 756        unsigned int i, off, load_order, condt;
 757        u32 k, b_off __maybe_unused;
 758
 759        for (i = 0; i < prog->len; i++) {
 760                u16 code;
 761
 762                inst = &(prog->insns[i]);
 763                pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
 764                         __func__, inst->code, inst->jt, inst->jf, inst->k);
 765                k = inst->k;
 766                code = bpf_anc_helper(inst);
 767
 768                if (ctx->target == NULL)
 769                        ctx->offsets[i] = ctx->idx * 4;
 770
 771                switch (code) {
 772                case BPF_LD | BPF_IMM:
 773                        /* A <- k ==> li r_A, k */
 774                        ctx->flags |= SEEN_A;
 775                        emit_load_imm(r_A, k, ctx);
 776                        break;
 777                case BPF_LD | BPF_W | BPF_LEN:
 778                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 779                        /* A <- len ==> lw r_A, offset(skb) */
 780                        ctx->flags |= SEEN_SKB | SEEN_A;
 781                        off = offsetof(struct sk_buff, len);
 782                        emit_load(r_A, r_skb, off, ctx);
 783                        break;
 784                case BPF_LD | BPF_MEM:
 785                        /* A <- M[k] ==> lw r_A, offset(M) */
 786                        ctx->flags |= SEEN_MEM | SEEN_A;
 787                        emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
 788                        break;
 789                case BPF_LD | BPF_W | BPF_ABS:
 790                        /* A <- P[k:4] */
 791                        load_order = 2;
 792                        goto load;
 793                case BPF_LD | BPF_H | BPF_ABS:
 794                        /* A <- P[k:2] */
 795                        load_order = 1;
 796                        goto load;
 797                case BPF_LD | BPF_B | BPF_ABS:
 798                        /* A <- P[k:1] */
 799                        load_order = 0;
 800load:
 801                        /* the interpreter will deal with the negative K */
 802                        if ((int)k < 0)
 803                                return -ENOTSUPP;
 804
 805                        emit_load_imm(r_off, k, ctx);
 806load_common:
 807                        /*
 808                         * We may got here from the indirect loads so
 809                         * return if offset is negative.
 810                         */
 811                        emit_slt(r_s0, r_off, r_zero, ctx);
 812                        emit_bcond(MIPS_COND_NE, r_s0, r_zero,
 813                                   b_imm(prog->len, ctx), ctx);
 814                        emit_reg_move(r_ret, r_zero, ctx);
 815
 816                        ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
 817                                SEEN_SKB | SEEN_A;
 818
 819                        emit_load_func(r_s0, (ptr)load_func[load_order],
 820                                      ctx);
 821                        emit_reg_move(MIPS_R_A0, r_skb, ctx);
 822                        emit_jalr(MIPS_R_RA, r_s0, ctx);
 823                        /* Load second argument to delay slot */
 824                        emit_reg_move(MIPS_R_A1, r_off, ctx);
 825                        /* Check the error value */
 826                        if (config_enabled(CONFIG_64BIT)) {
 827                                /* Get error code from the top 32-bits */
 828                                emit_dsrl32(r_s0, r_val, 0, ctx);
 829                                /* Branch to 3 instructions ahead */
 830                                emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
 831                                           ctx);
 832                        } else {
 833                                /* Branch to 3 instructions ahead */
 834                                emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
 835                                           ctx);
 836                        }
 837                        emit_nop(ctx);
 838                        /* We are good */
 839                        emit_b(b_imm(i + 1, ctx), ctx);
 840                        emit_jit_reg_move(r_A, r_val, ctx);
 841                        /* Return with error */
 842                        emit_b(b_imm(prog->len, ctx), ctx);
 843                        emit_reg_move(r_ret, r_zero, ctx);
 844                        break;
 845                case BPF_LD | BPF_W | BPF_IND:
 846                        /* A <- P[X + k:4] */
 847                        load_order = 2;
 848                        goto load_ind;
 849                case BPF_LD | BPF_H | BPF_IND:
 850                        /* A <- P[X + k:2] */
 851                        load_order = 1;
 852                        goto load_ind;
 853                case BPF_LD | BPF_B | BPF_IND:
 854                        /* A <- P[X + k:1] */
 855                        load_order = 0;
 856load_ind:
 857                        ctx->flags |= SEEN_OFF | SEEN_X;
 858                        emit_addiu(r_off, r_X, k, ctx);
 859                        goto load_common;
 860                case BPF_LDX | BPF_IMM:
 861                        /* X <- k */
 862                        ctx->flags |= SEEN_X;
 863                        emit_load_imm(r_X, k, ctx);
 864                        break;
 865                case BPF_LDX | BPF_MEM:
 866                        /* X <- M[k] */
 867                        ctx->flags |= SEEN_X | SEEN_MEM;
 868                        emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
 869                        break;
 870                case BPF_LDX | BPF_W | BPF_LEN:
 871                        /* X <- len */
 872                        ctx->flags |= SEEN_X | SEEN_SKB;
 873                        off = offsetof(struct sk_buff, len);
 874                        emit_load(r_X, r_skb, off, ctx);
 875                        break;
 876                case BPF_LDX | BPF_B | BPF_MSH:
 877                        /* the interpreter will deal with the negative K */
 878                        if ((int)k < 0)
 879                                return -ENOTSUPP;
 880
 881                        /* X <- 4 * (P[k:1] & 0xf) */
 882                        ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
 883                        /* Load offset to a1 */
 884                        emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
 885                        /*
 886                         * This may emit two instructions so it may not fit
 887                         * in the delay slot. So use a0 in the delay slot.
 888                         */
 889                        emit_load_imm(MIPS_R_A1, k, ctx);
 890                        emit_jalr(MIPS_R_RA, r_s0, ctx);
 891                        emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
 892                        /* Check the error value */
 893                        if (config_enabled(CONFIG_64BIT)) {
 894                                /* Top 32-bits of $v0 on 64-bit */
 895                                emit_dsrl32(r_s0, r_val, 0, ctx);
 896                                emit_bcond(MIPS_COND_NE, r_s0, r_zero,
 897                                           3 << 2, ctx);
 898                        } else {
 899                                emit_bcond(MIPS_COND_NE, r_err, r_zero,
 900                                           3 << 2, ctx);
 901                        }
 902                        /* No need for delay slot */
 903                        /* We are good */
 904                        /* X <- P[1:K] & 0xf */
 905                        emit_andi(r_X, r_val, 0xf, ctx);
 906                        /* X << 2 */
 907                        emit_b(b_imm(i + 1, ctx), ctx);
 908                        emit_sll(r_X, r_X, 2, ctx); /* delay slot */
 909                        /* Return with error */
 910                        emit_b(b_imm(prog->len, ctx), ctx);
 911                        emit_load_imm(r_ret, 0, ctx); /* delay slot */
 912                        break;
 913                case BPF_ST:
 914                        /* M[k] <- A */
 915                        ctx->flags |= SEEN_MEM | SEEN_A;
 916                        emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
 917                        break;
 918                case BPF_STX:
 919                        /* M[k] <- X */
 920                        ctx->flags |= SEEN_MEM | SEEN_X;
 921                        emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
 922                        break;
 923                case BPF_ALU | BPF_ADD | BPF_K:
 924                        /* A += K */
 925                        ctx->flags |= SEEN_A;
 926                        emit_addiu(r_A, r_A, k, ctx);
 927                        break;
 928                case BPF_ALU | BPF_ADD | BPF_X:
 929                        /* A += X */
 930                        ctx->flags |= SEEN_A | SEEN_X;
 931                        emit_addu(r_A, r_A, r_X, ctx);
 932                        break;
 933                case BPF_ALU | BPF_SUB | BPF_K:
 934                        /* A -= K */
 935                        ctx->flags |= SEEN_A;
 936                        emit_addiu(r_A, r_A, -k, ctx);
 937                        break;
 938                case BPF_ALU | BPF_SUB | BPF_X:
 939                        /* A -= X */
 940                        ctx->flags |= SEEN_A | SEEN_X;
 941                        emit_subu(r_A, r_A, r_X, ctx);
 942                        break;
 943                case BPF_ALU | BPF_MUL | BPF_K:
 944                        /* A *= K */
 945                        /* Load K to scratch register before MUL */
 946                        ctx->flags |= SEEN_A | SEEN_S0;
 947                        emit_load_imm(r_s0, k, ctx);
 948                        emit_mul(r_A, r_A, r_s0, ctx);
 949                        break;
 950                case BPF_ALU | BPF_MUL | BPF_X:
 951                        /* A *= X */
 952                        ctx->flags |= SEEN_A | SEEN_X;
 953                        emit_mul(r_A, r_A, r_X, ctx);
 954                        break;
 955                case BPF_ALU | BPF_DIV | BPF_K:
 956                        /* A /= k */
 957                        if (k == 1)
 958                                break;
 959                        if (optimize_div(&k)) {
 960                                ctx->flags |= SEEN_A;
 961                                emit_srl(r_A, r_A, k, ctx);
 962                                break;
 963                        }
 964                        ctx->flags |= SEEN_A | SEEN_S0;
 965                        emit_load_imm(r_s0, k, ctx);
 966                        emit_div(r_A, r_s0, ctx);
 967                        break;
 968                case BPF_ALU | BPF_MOD | BPF_K:
 969                        /* A %= k */
 970                        if (k == 1) {
 971                                ctx->flags |= SEEN_A;
 972                                emit_jit_reg_move(r_A, r_zero, ctx);
 973                        } else {
 974                                ctx->flags |= SEEN_A | SEEN_S0;
 975                                emit_load_imm(r_s0, k, ctx);
 976                                emit_mod(r_A, r_s0, ctx);
 977                        }
 978                        break;
 979                case BPF_ALU | BPF_DIV | BPF_X:
 980                        /* A /= X */
 981                        ctx->flags |= SEEN_X | SEEN_A;
 982                        /* Check if r_X is zero */
 983                        emit_bcond(MIPS_COND_EQ, r_X, r_zero,
 984                                   b_imm(prog->len, ctx), ctx);
 985                        emit_load_imm(r_val, 0, ctx); /* delay slot */
 986                        emit_div(r_A, r_X, ctx);
 987                        break;
 988                case BPF_ALU | BPF_MOD | BPF_X:
 989                        /* A %= X */
 990                        ctx->flags |= SEEN_X | SEEN_A;
 991                        /* Check if r_X is zero */
 992                        emit_bcond(MIPS_COND_EQ, r_X, r_zero,
 993                                   b_imm(prog->len, ctx), ctx);
 994                        emit_load_imm(r_val, 0, ctx); /* delay slot */
 995                        emit_mod(r_A, r_X, ctx);
 996                        break;
 997                case BPF_ALU | BPF_OR | BPF_K:
 998                        /* A |= K */
 999                        ctx->flags |= SEEN_A;
1000                        emit_ori(r_A, r_A, k, ctx);
1001                        break;
1002                case BPF_ALU | BPF_OR | BPF_X:
1003                        /* A |= X */
1004                        ctx->flags |= SEEN_A;
1005                        emit_ori(r_A, r_A, r_X, ctx);
1006                        break;
1007                case BPF_ALU | BPF_XOR | BPF_K:
1008                        /* A ^= k */
1009                        ctx->flags |= SEEN_A;
1010                        emit_xori(r_A, r_A, k, ctx);
1011                        break;
1012                case BPF_ANC | SKF_AD_ALU_XOR_X:
1013                case BPF_ALU | BPF_XOR | BPF_X:
1014                        /* A ^= X */
1015                        ctx->flags |= SEEN_A;
1016                        emit_xor(r_A, r_A, r_X, ctx);
1017                        break;
1018                case BPF_ALU | BPF_AND | BPF_K:
1019                        /* A &= K */
1020                        ctx->flags |= SEEN_A;
1021                        emit_andi(r_A, r_A, k, ctx);
1022                        break;
1023                case BPF_ALU | BPF_AND | BPF_X:
1024                        /* A &= X */
1025                        ctx->flags |= SEEN_A | SEEN_X;
1026                        emit_and(r_A, r_A, r_X, ctx);
1027                        break;
1028                case BPF_ALU | BPF_LSH | BPF_K:
1029                        /* A <<= K */
1030                        ctx->flags |= SEEN_A;
1031                        emit_sll(r_A, r_A, k, ctx);
1032                        break;
1033                case BPF_ALU | BPF_LSH | BPF_X:
1034                        /* A <<= X */
1035                        ctx->flags |= SEEN_A | SEEN_X;
1036                        emit_sllv(r_A, r_A, r_X, ctx);
1037                        break;
1038                case BPF_ALU | BPF_RSH | BPF_K:
1039                        /* A >>= K */
1040                        ctx->flags |= SEEN_A;
1041                        emit_srl(r_A, r_A, k, ctx);
1042                        break;
1043                case BPF_ALU | BPF_RSH | BPF_X:
1044                        ctx->flags |= SEEN_A | SEEN_X;
1045                        emit_srlv(r_A, r_A, r_X, ctx);
1046                        break;
1047                case BPF_ALU | BPF_NEG:
1048                        /* A = -A */
1049                        ctx->flags |= SEEN_A;
1050                        emit_neg(r_A, ctx);
1051                        break;
1052                case BPF_JMP | BPF_JA:
1053                        /* pc += K */
1054                        emit_b(b_imm(i + k + 1, ctx), ctx);
1055                        emit_nop(ctx);
1056                        break;
1057                case BPF_JMP | BPF_JEQ | BPF_K:
1058                        /* pc += ( A == K ) ? pc->jt : pc->jf */
1059                        condt = MIPS_COND_EQ | MIPS_COND_K;
1060                        goto jmp_cmp;
1061                case BPF_JMP | BPF_JEQ | BPF_X:
1062                        ctx->flags |= SEEN_X;
1063                        /* pc += ( A == X ) ? pc->jt : pc->jf */
1064                        condt = MIPS_COND_EQ | MIPS_COND_X;
1065                        goto jmp_cmp;
1066                case BPF_JMP | BPF_JGE | BPF_K:
1067                        /* pc += ( A >= K ) ? pc->jt : pc->jf */
1068                        condt = MIPS_COND_GE | MIPS_COND_K;
1069                        goto jmp_cmp;
1070                case BPF_JMP | BPF_JGE | BPF_X:
1071                        ctx->flags |= SEEN_X;
1072                        /* pc += ( A >= X ) ? pc->jt : pc->jf */
1073                        condt = MIPS_COND_GE | MIPS_COND_X;
1074                        goto jmp_cmp;
1075                case BPF_JMP | BPF_JGT | BPF_K:
1076                        /* pc += ( A > K ) ? pc->jt : pc->jf */
1077                        condt = MIPS_COND_GT | MIPS_COND_K;
1078                        goto jmp_cmp;
1079                case BPF_JMP | BPF_JGT | BPF_X:
1080                        ctx->flags |= SEEN_X;
1081                        /* pc += ( A > X ) ? pc->jt : pc->jf */
1082                        condt = MIPS_COND_GT | MIPS_COND_X;
1083jmp_cmp:
1084                        /* Greater or Equal */
1085                        if ((condt & MIPS_COND_GE) ||
1086                            (condt & MIPS_COND_GT)) {
1087                                if (condt & MIPS_COND_K) { /* K */
1088                                        ctx->flags |= SEEN_S0 | SEEN_A;
1089                                        emit_sltiu(r_s0, r_A, k, ctx);
1090                                } else { /* X */
1091                                        ctx->flags |= SEEN_S0 | SEEN_A |
1092                                                SEEN_X;
1093                                        emit_sltu(r_s0, r_A, r_X, ctx);
1094                                }
1095                                /* A < (K|X) ? r_scrach = 1 */
1096                                b_off = b_imm(i + inst->jf + 1, ctx);
1097                                emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
1098                                           ctx);
1099                                emit_nop(ctx);
1100                                /* A > (K|X) ? scratch = 0 */
1101                                if (condt & MIPS_COND_GT) {
1102                                        /* Checking for equality */
1103                                        ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
1104                                        if (condt & MIPS_COND_K)
1105                                                emit_load_imm(r_s0, k, ctx);
1106                                        else
1107                                                emit_jit_reg_move(r_s0, r_X,
1108                                                                  ctx);
1109                                        b_off = b_imm(i + inst->jf + 1, ctx);
1110                                        emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1111                                                   b_off, ctx);
1112                                        emit_nop(ctx);
1113                                        /* Finally, A > K|X */
1114                                        b_off = b_imm(i + inst->jt + 1, ctx);
1115                                        emit_b(b_off, ctx);
1116                                        emit_nop(ctx);
1117                                } else {
1118                                        /* A >= (K|X) so jump */
1119                                        b_off = b_imm(i + inst->jt + 1, ctx);
1120                                        emit_b(b_off, ctx);
1121                                        emit_nop(ctx);
1122                                }
1123                        } else {
1124                                /* A == K|X */
1125                                if (condt & MIPS_COND_K) { /* K */
1126                                        ctx->flags |= SEEN_S0 | SEEN_A;
1127                                        emit_load_imm(r_s0, k, ctx);
1128                                        /* jump true */
1129                                        b_off = b_imm(i + inst->jt + 1, ctx);
1130                                        emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1131                                                   b_off, ctx);
1132                                        emit_nop(ctx);
1133                                        /* jump false */
1134                                        b_off = b_imm(i + inst->jf + 1,
1135                                                      ctx);
1136                                        emit_bcond(MIPS_COND_NE, r_A, r_s0,
1137                                                   b_off, ctx);
1138                                        emit_nop(ctx);
1139                                } else { /* X */
1140                                        /* jump true */
1141                                        ctx->flags |= SEEN_A | SEEN_X;
1142                                        b_off = b_imm(i + inst->jt + 1,
1143                                                      ctx);
1144                                        emit_bcond(MIPS_COND_EQ, r_A, r_X,
1145                                                   b_off, ctx);
1146                                        emit_nop(ctx);
1147                                        /* jump false */
1148                                        b_off = b_imm(i + inst->jf + 1, ctx);
1149                                        emit_bcond(MIPS_COND_NE, r_A, r_X,
1150                                                   b_off, ctx);
1151                                        emit_nop(ctx);
1152                                }
1153                        }
1154                        break;
1155                case BPF_JMP | BPF_JSET | BPF_K:
1156                        ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
1157                        /* pc += (A & K) ? pc -> jt : pc -> jf */
1158                        emit_load_imm(r_s1, k, ctx);
1159                        emit_and(r_s0, r_A, r_s1, ctx);
1160                        /* jump true */
1161                        b_off = b_imm(i + inst->jt + 1, ctx);
1162                        emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1163                        emit_nop(ctx);
1164                        /* jump false */
1165                        b_off = b_imm(i + inst->jf + 1, ctx);
1166                        emit_b(b_off, ctx);
1167                        emit_nop(ctx);
1168                        break;
1169                case BPF_JMP | BPF_JSET | BPF_X:
1170                        ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
1171                        /* pc += (A & X) ? pc -> jt : pc -> jf */
1172                        emit_and(r_s0, r_A, r_X, ctx);
1173                        /* jump true */
1174                        b_off = b_imm(i + inst->jt + 1, ctx);
1175                        emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1176                        emit_nop(ctx);
1177                        /* jump false */
1178                        b_off = b_imm(i + inst->jf + 1, ctx);
1179                        emit_b(b_off, ctx);
1180                        emit_nop(ctx);
1181                        break;
1182                case BPF_RET | BPF_A:
1183                        ctx->flags |= SEEN_A;
1184                        if (i != prog->len - 1)
1185                                /*
1186                                 * If this is not the last instruction
1187                                 * then jump to the epilogue
1188                                 */
1189                                emit_b(b_imm(prog->len, ctx), ctx);
1190                        emit_reg_move(r_ret, r_A, ctx); /* delay slot */
1191                        break;
1192                case BPF_RET | BPF_K:
1193                        /*
1194                         * It can emit two instructions so it does not fit on
1195                         * the delay slot.
1196                         */
1197                        emit_load_imm(r_ret, k, ctx);
1198                        if (i != prog->len - 1) {
1199                                /*
1200                                 * If this is not the last instruction
1201                                 * then jump to the epilogue
1202                                 */
1203                                emit_b(b_imm(prog->len, ctx), ctx);
1204                                emit_nop(ctx);
1205                        }
1206                        break;
1207                case BPF_MISC | BPF_TAX:
1208                        /* X = A */
1209                        ctx->flags |= SEEN_X | SEEN_A;
1210                        emit_jit_reg_move(r_X, r_A, ctx);
1211                        break;
1212                case BPF_MISC | BPF_TXA:
1213                        /* A = X */
1214                        ctx->flags |= SEEN_A | SEEN_X;
1215                        emit_jit_reg_move(r_A, r_X, ctx);
1216                        break;
1217                /* AUX */
1218                case BPF_ANC | SKF_AD_PROTOCOL:
1219                        /* A = ntohs(skb->protocol */
1220                        ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
1221                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1222                                                  protocol) != 2);
1223                        off = offsetof(struct sk_buff, protocol);
1224                        emit_half_load(r_A, r_skb, off, ctx);
1225#ifdef CONFIG_CPU_LITTLE_ENDIAN
1226                        /* This needs little endian fixup */
1227                        if (cpu_has_wsbh) {
1228                                /* R2 and later have the wsbh instruction */
1229                                emit_wsbh(r_A, r_A, ctx);
1230                        } else {
1231                                /* Get first byte */
1232                                emit_andi(r_tmp_imm, r_A, 0xff, ctx);
1233                                /* Shift it */
1234                                emit_sll(r_tmp, r_tmp_imm, 8, ctx);
1235                                /* Get second byte */
1236                                emit_srl(r_tmp_imm, r_A, 8, ctx);
1237                                emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
1238                                /* Put everyting together in r_A */
1239                                emit_or(r_A, r_tmp, r_tmp_imm, ctx);
1240                        }
1241#endif
1242                        break;
1243                case BPF_ANC | SKF_AD_CPU:
1244                        ctx->flags |= SEEN_A | SEEN_OFF;
1245                        /* A = current_thread_info()->cpu */
1246                        BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
1247                                                  cpu) != 4);
1248                        off = offsetof(struct thread_info, cpu);
1249                        /* $28/gp points to the thread_info struct */
1250                        emit_load(r_A, 28, off, ctx);
1251                        break;
1252                case BPF_ANC | SKF_AD_IFINDEX:
1253                        /* A = skb->dev->ifindex */
1254                        ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
1255                        off = offsetof(struct sk_buff, dev);
1256                        /* Load *dev pointer */
1257                        emit_load_ptr(r_s0, r_skb, off, ctx);
1258                        /* error (0) in the delay slot */
1259                        emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
1260                                   b_imm(prog->len, ctx), ctx);
1261                        emit_reg_move(r_ret, r_zero, ctx);
1262                        BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
1263                                                  ifindex) != 4);
1264                        off = offsetof(struct net_device, ifindex);
1265                        emit_load(r_A, r_s0, off, ctx);
1266                        break;
1267                case BPF_ANC | SKF_AD_MARK:
1268                        ctx->flags |= SEEN_SKB | SEEN_A;
1269                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1270                        off = offsetof(struct sk_buff, mark);
1271                        emit_load(r_A, r_skb, off, ctx);
1272                        break;
1273                case BPF_ANC | SKF_AD_RXHASH:
1274                        ctx->flags |= SEEN_SKB | SEEN_A;
1275                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
1276                        off = offsetof(struct sk_buff, hash);
1277                        emit_load(r_A, r_skb, off, ctx);
1278                        break;
1279                case BPF_ANC | SKF_AD_VLAN_TAG:
1280                case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
1281                        ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
1282                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1283                                                  vlan_tci) != 2);
1284                        off = offsetof(struct sk_buff, vlan_tci);
1285                        emit_half_load(r_s0, r_skb, off, ctx);
1286                        if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
1287                                emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
1288                        } else {
1289                                emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
1290                                /* return 1 if present */
1291                                emit_sltu(r_A, r_zero, r_A, ctx);
1292                        }
1293                        break;
1294                case BPF_ANC | SKF_AD_PKTTYPE:
1295                        ctx->flags |= SEEN_SKB;
1296
1297                        emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
1298                        /* Keep only the last 3 bits */
1299                        emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
1300#ifdef __BIG_ENDIAN_BITFIELD
1301                        /* Get the actual packet type to the lower 3 bits */
1302                        emit_srl(r_A, r_A, 5, ctx);
1303#endif
1304                        break;
1305                case BPF_ANC | SKF_AD_QUEUE:
1306                        ctx->flags |= SEEN_SKB | SEEN_A;
1307                        BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1308                                                  queue_mapping) != 2);
1309                        BUILD_BUG_ON(offsetof(struct sk_buff,
1310                                              queue_mapping) > 0xff);
1311                        off = offsetof(struct sk_buff, queue_mapping);
1312                        emit_half_load(r_A, r_skb, off, ctx);
1313                        break;
1314                default:
1315                        pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
1316                                 inst->code);
1317                        return -1;
1318                }
1319        }
1320
1321        /* compute offsets only during the first pass */
1322        if (ctx->target == NULL)
1323                ctx->offsets[i] = ctx->idx * 4;
1324
1325        return 0;
1326}
1327
1328int bpf_jit_enable __read_mostly;
1329
1330void bpf_jit_compile(struct bpf_prog *fp)
1331{
1332        struct jit_ctx ctx;
1333        unsigned int alloc_size, tmp_idx;
1334
1335        if (!bpf_jit_enable)
1336                return;
1337
1338        memset(&ctx, 0, sizeof(ctx));
1339
1340        ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
1341        if (ctx.offsets == NULL)
1342                return;
1343
1344        ctx.skf = fp;
1345
1346        if (build_body(&ctx))
1347                goto out;
1348
1349        tmp_idx = ctx.idx;
1350        build_prologue(&ctx);
1351        ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1352        /* just to complete the ctx.idx count */
1353        build_epilogue(&ctx);
1354
1355        alloc_size = 4 * ctx.idx;
1356        ctx.target = module_alloc(alloc_size);
1357        if (ctx.target == NULL)
1358                goto out;
1359
1360        /* Clean it */
1361        memset(ctx.target, 0, alloc_size);
1362
1363        ctx.idx = 0;
1364
1365        /* Generate the actual JIT code */
1366        build_prologue(&ctx);
1367        build_body(&ctx);
1368        build_epilogue(&ctx);
1369
1370        /* Update the icache */
1371        flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
1372
1373        if (bpf_jit_enable > 1)
1374                /* Dump JIT code */
1375                bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
1376
1377        fp->bpf_func = (void *)ctx.target;
1378        fp->jited = true;
1379
1380out:
1381        kfree(ctx.offsets);
1382}
1383
1384void bpf_jit_free(struct bpf_prog *fp)
1385{
1386        if (fp->jited)
1387                module_memfree(fp->bpf_func);
1388
1389        bpf_prog_unlock_free(fp);
1390}
1391