linux/arch/x86/kernel/kprobes/opt.c
<<
>>
Prefs
   1/*
   2 *  Kernel Probes Jump Optimization (Optprobes)
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright (C) IBM Corporation, 2002, 2004
  19 * Copyright (C) Hitachi Ltd., 2012
  20 */
  21#include <linux/kprobes.h>
  22#include <linux/ptrace.h>
  23#include <linux/string.h>
  24#include <linux/slab.h>
  25#include <linux/hardirq.h>
  26#include <linux/preempt.h>
  27#include <linux/module.h>
  28#include <linux/kdebug.h>
  29#include <linux/kallsyms.h>
  30#include <linux/ftrace.h>
  31
  32#include <asm/cacheflush.h>
  33#include <asm/desc.h>
  34#include <asm/pgtable.h>
  35#include <asm/uaccess.h>
  36#include <asm/alternative.h>
  37#include <asm/insn.h>
  38#include <asm/debugreg.h>
  39
  40#include "common.h"
  41
  42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
  43{
  44        struct optimized_kprobe *op;
  45        struct kprobe *kp;
  46        long offs;
  47        int i;
  48
  49        for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
  50                kp = get_kprobe((void *)addr - i);
  51                /* This function only handles jump-optimized kprobe */
  52                if (kp && kprobe_optimized(kp)) {
  53                        op = container_of(kp, struct optimized_kprobe, kp);
  54                        /* If op->list is not empty, op is under optimizing */
  55                        if (list_empty(&op->list))
  56                                goto found;
  57                }
  58        }
  59
  60        return addr;
  61found:
  62        /*
  63         * If the kprobe can be optimized, original bytes which can be
  64         * overwritten by jump destination address. In this case, original
  65         * bytes must be recovered from op->optinsn.copied_insn buffer.
  66         */
  67        memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
  68        if (addr == (unsigned long)kp->addr) {
  69                buf[0] = kp->opcode;
  70                memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
  71        } else {
  72                offs = addr - (unsigned long)kp->addr - 1;
  73                memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
  74        }
  75
  76        return (unsigned long)buf;
  77}
  78
  79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
  80static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
  81{
  82#ifdef CONFIG_X86_64
  83        *addr++ = 0x48;
  84        *addr++ = 0xbf;
  85#else
  86        *addr++ = 0xb8;
  87#endif
  88        *(unsigned long *)addr = val;
  89}
  90
  91asm (
  92                        ".global optprobe_template_entry\n"
  93                        "optprobe_template_entry:\n"
  94#ifdef CONFIG_X86_64
  95                        /* We don't bother saving the ss register */
  96                        "       pushq %rsp\n"
  97                        "       pushfq\n"
  98                        SAVE_REGS_STRING
  99                        "       movq %rsp, %rsi\n"
 100                        ".global optprobe_template_val\n"
 101                        "optprobe_template_val:\n"
 102                        ASM_NOP5
 103                        ASM_NOP5
 104                        ".global optprobe_template_call\n"
 105                        "optprobe_template_call:\n"
 106                        ASM_NOP5
 107                        /* Move flags to rsp */
 108                        "       movq 144(%rsp), %rdx\n"
 109                        "       movq %rdx, 152(%rsp)\n"
 110                        RESTORE_REGS_STRING
 111                        /* Skip flags entry */
 112                        "       addq $8, %rsp\n"
 113                        "       popfq\n"
 114#else /* CONFIG_X86_32 */
 115                        "       pushf\n"
 116                        SAVE_REGS_STRING
 117                        "       movl %esp, %edx\n"
 118                        ".global optprobe_template_val\n"
 119                        "optprobe_template_val:\n"
 120                        ASM_NOP5
 121                        ".global optprobe_template_call\n"
 122                        "optprobe_template_call:\n"
 123                        ASM_NOP5
 124                        RESTORE_REGS_STRING
 125                        "       addl $4, %esp\n"        /* skip cs */
 126                        "       popf\n"
 127#endif
 128                        ".global optprobe_template_end\n"
 129                        "optprobe_template_end:\n");
 130
 131#define TMPL_MOVE_IDX \
 132        ((long)&optprobe_template_val - (long)&optprobe_template_entry)
 133#define TMPL_CALL_IDX \
 134        ((long)&optprobe_template_call - (long)&optprobe_template_entry)
 135#define TMPL_END_IDX \
 136        ((long)&optprobe_template_end - (long)&optprobe_template_entry)
 137
 138#define INT3_SIZE sizeof(kprobe_opcode_t)
 139
 140/* Optimized kprobe call back function: called from optinsn */
 141static void
 142optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 143{
 144        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 145        unsigned long flags;
 146
 147        /* This is possible if op is under delayed unoptimizing */
 148        if (kprobe_disabled(&op->kp))
 149                return;
 150
 151        local_irq_save(flags);
 152        if (kprobe_running()) {
 153                kprobes_inc_nmissed_count(&op->kp);
 154        } else {
 155                /* Save skipped registers */
 156#ifdef CONFIG_X86_64
 157                regs->cs = __KERNEL_CS;
 158#else
 159                regs->cs = __KERNEL_CS | get_kernel_rpl();
 160                regs->gs = 0;
 161#endif
 162                regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
 163                regs->orig_ax = ~0UL;
 164
 165                __this_cpu_write(current_kprobe, &op->kp);
 166                kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 167                opt_pre_handler(&op->kp, regs);
 168                __this_cpu_write(current_kprobe, NULL);
 169        }
 170        local_irq_restore(flags);
 171}
 172NOKPROBE_SYMBOL(optimized_callback);
 173
 174static int copy_optimized_instructions(u8 *dest, u8 *src)
 175{
 176        int len = 0, ret;
 177
 178        while (len < RELATIVEJUMP_SIZE) {
 179                ret = __copy_instruction(dest + len, src + len);
 180                if (!ret || !can_boost(dest + len))
 181                        return -EINVAL;
 182                len += ret;
 183        }
 184        /* Check whether the address range is reserved */
 185        if (ftrace_text_reserved(src, src + len - 1) ||
 186            alternatives_text_reserved(src, src + len - 1) ||
 187            jump_label_text_reserved(src, src + len - 1))
 188                return -EBUSY;
 189
 190        return len;
 191}
 192
 193/* Check whether insn is indirect jump */
 194static int insn_is_indirect_jump(struct insn *insn)
 195{
 196        return ((insn->opcode.bytes[0] == 0xff &&
 197                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
 198                insn->opcode.bytes[0] == 0xea); /* Segment based jump */
 199}
 200
 201/* Check whether insn jumps into specified address range */
 202static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
 203{
 204        unsigned long target = 0;
 205
 206        switch (insn->opcode.bytes[0]) {
 207        case 0xe0:      /* loopne */
 208        case 0xe1:      /* loope */
 209        case 0xe2:      /* loop */
 210        case 0xe3:      /* jcxz */
 211        case 0xe9:      /* near relative jump */
 212        case 0xeb:      /* short relative jump */
 213                break;
 214        case 0x0f:
 215                if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
 216                        break;
 217                return 0;
 218        default:
 219                if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
 220                        break;
 221                return 0;
 222        }
 223        target = (unsigned long)insn->next_byte + insn->immediate.value;
 224
 225        return (start <= target && target <= start + len);
 226}
 227
 228/* Decode whole function to ensure any instructions don't jump into target */
 229static int can_optimize(unsigned long paddr)
 230{
 231        unsigned long addr, size = 0, offset = 0;
 232        struct insn insn;
 233        kprobe_opcode_t buf[MAX_INSN_SIZE];
 234
 235        /* Lookup symbol including addr */
 236        if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
 237                return 0;
 238
 239        /*
 240         * Do not optimize in the entry code due to the unstable
 241         * stack handling.
 242         */
 243        if ((paddr >= (unsigned long)__entry_text_start) &&
 244            (paddr <  (unsigned long)__entry_text_end))
 245                return 0;
 246
 247        /* Check there is enough space for a relative jump. */
 248        if (size - offset < RELATIVEJUMP_SIZE)
 249                return 0;
 250
 251        /* Decode instructions */
 252        addr = paddr - offset;
 253        while (addr < paddr - offset + size) { /* Decode until function end */
 254                unsigned long recovered_insn;
 255                if (search_exception_tables(addr))
 256                        /*
 257                         * Since some fixup code will jumps into this function,
 258                         * we can't optimize kprobe in this function.
 259                         */
 260                        return 0;
 261                recovered_insn = recover_probed_instruction(buf, addr);
 262                if (!recovered_insn)
 263                        return 0;
 264                kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 265                insn_get_length(&insn);
 266                /* Another subsystem puts a breakpoint */
 267                if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 268                        return 0;
 269                /* Recover address */
 270                insn.kaddr = (void *)addr;
 271                insn.next_byte = (void *)(addr + insn.length);
 272                /* Check any instructions don't jump into target */
 273                if (insn_is_indirect_jump(&insn) ||
 274                    insn_jump_into_range(&insn, paddr + INT3_SIZE,
 275                                         RELATIVE_ADDR_SIZE))
 276                        return 0;
 277                addr += insn.length;
 278        }
 279
 280        return 1;
 281}
 282
 283/* Check optimized_kprobe can actually be optimized. */
 284int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 285{
 286        int i;
 287        struct kprobe *p;
 288
 289        for (i = 1; i < op->optinsn.size; i++) {
 290                p = get_kprobe(op->kp.addr + i);
 291                if (p && !kprobe_disabled(p))
 292                        return -EEXIST;
 293        }
 294
 295        return 0;
 296}
 297
 298/* Check the addr is within the optimized instructions. */
 299int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 300                                 unsigned long addr)
 301{
 302        return ((unsigned long)op->kp.addr <= addr &&
 303                (unsigned long)op->kp.addr + op->optinsn.size > addr);
 304}
 305
 306/* Free optimized instruction slot */
 307static
 308void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 309{
 310        if (op->optinsn.insn) {
 311                free_optinsn_slot(op->optinsn.insn, dirty);
 312                op->optinsn.insn = NULL;
 313                op->optinsn.size = 0;
 314        }
 315}
 316
 317void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 318{
 319        __arch_remove_optimized_kprobe(op, 1);
 320}
 321
 322/*
 323 * Copy replacing target instructions
 324 * Target instructions MUST be relocatable (checked inside)
 325 * This is called when new aggr(opt)probe is allocated or reused.
 326 */
 327int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 328                                  struct kprobe *__unused)
 329{
 330        u8 *buf;
 331        int ret;
 332        long rel;
 333
 334        if (!can_optimize((unsigned long)op->kp.addr))
 335                return -EILSEQ;
 336
 337        op->optinsn.insn = get_optinsn_slot();
 338        if (!op->optinsn.insn)
 339                return -ENOMEM;
 340
 341        /*
 342         * Verify if the address gap is in 2GB range, because this uses
 343         * a relative jump.
 344         */
 345        rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
 346        if (abs(rel) > 0x7fffffff) {
 347                __arch_remove_optimized_kprobe(op, 0);
 348                return -ERANGE;
 349        }
 350
 351        buf = (u8 *)op->optinsn.insn;
 352
 353        /* Copy instructions into the out-of-line buffer */
 354        ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
 355        if (ret < 0) {
 356                __arch_remove_optimized_kprobe(op, 0);
 357                return ret;
 358        }
 359        op->optinsn.size = ret;
 360
 361        /* Copy arch-dep-instance from template */
 362        memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
 363
 364        /* Set probe information */
 365        synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 366
 367        /* Set probe function call */
 368        synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
 369
 370        /* Set returning jmp instruction at the tail of out-of-line buffer */
 371        synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
 372                           (u8 *)op->kp.addr + op->optinsn.size);
 373
 374        flush_icache_range((unsigned long) buf,
 375                           (unsigned long) buf + TMPL_END_IDX +
 376                           op->optinsn.size + RELATIVEJUMP_SIZE);
 377        return 0;
 378}
 379
 380/*
 381 * Replace breakpoints (int3) with relative jumps.
 382 * Caller must call with locking kprobe_mutex and text_mutex.
 383 */
 384void arch_optimize_kprobes(struct list_head *oplist)
 385{
 386        struct optimized_kprobe *op, *tmp;
 387        u8 insn_buf[RELATIVEJUMP_SIZE];
 388
 389        list_for_each_entry_safe(op, tmp, oplist, list) {
 390                s32 rel = (s32)((long)op->optinsn.insn -
 391                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 392
 393                WARN_ON(kprobe_disabled(&op->kp));
 394
 395                /* Backup instructions which will be replaced by jump address */
 396                memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 397                       RELATIVE_ADDR_SIZE);
 398
 399                insn_buf[0] = RELATIVEJUMP_OPCODE;
 400                *(s32 *)(&insn_buf[1]) = rel;
 401
 402                text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
 403                             op->optinsn.insn);
 404
 405                list_del_init(&op->list);
 406        }
 407}
 408
 409/* Replace a relative jump with a breakpoint (int3).  */
 410void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 411{
 412        u8 insn_buf[RELATIVEJUMP_SIZE];
 413
 414        /* Set int3 to first byte for kprobes */
 415        insn_buf[0] = BREAKPOINT_INSTRUCTION;
 416        memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 417        text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
 418                     op->optinsn.insn);
 419}
 420
 421/*
 422 * Recover original instructions and breakpoints from relative jumps.
 423 * Caller must call with locking kprobe_mutex.
 424 */
 425extern void arch_unoptimize_kprobes(struct list_head *oplist,
 426                                    struct list_head *done_list)
 427{
 428        struct optimized_kprobe *op, *tmp;
 429
 430        list_for_each_entry_safe(op, tmp, oplist, list) {
 431                arch_unoptimize_kprobe(op);
 432                list_move(&op->list, done_list);
 433        }
 434}
 435
 436int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 437{
 438        struct optimized_kprobe *op;
 439
 440        if (p->flags & KPROBE_FLAG_OPTIMIZED) {
 441                /* This kprobe is really able to run optimized path. */
 442                op = container_of(p, struct optimized_kprobe, kp);
 443                /* Detour through copied instructions */
 444                regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
 445                if (!reenter)
 446                        reset_current_kprobe();
 447                preempt_enable_no_resched();
 448                return 1;
 449        }
 450        return 0;
 451}
 452NOKPROBE_SYMBOL(setup_detour_execution);
 453