linux/arch/x86/kernel/kprobes/opt.c
<<
>>
Prefs
   1/*
   2 *  Kernel Probes Jump Optimization (Optprobes)
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright (C) IBM Corporation, 2002, 2004
  19 * Copyright (C) Hitachi Ltd., 2012
  20 */
  21#include <linux/kprobes.h>
  22#include <linux/ptrace.h>
  23#include <linux/string.h>
  24#include <linux/slab.h>
  25#include <linux/hardirq.h>
  26#include <linux/preempt.h>
  27#include <linux/extable.h>
  28#include <linux/kdebug.h>
  29#include <linux/kallsyms.h>
  30#include <linux/ftrace.h>
  31#include <linux/frame.h>
  32
  33#include <asm/text-patching.h>
  34#include <asm/cacheflush.h>
  35#include <asm/desc.h>
  36#include <asm/pgtable.h>
  37#include <linux/uaccess.h>
  38#include <asm/alternative.h>
  39#include <asm/insn.h>
  40#include <asm/debugreg.h>
  41#include <asm/set_memory.h>
  42#include <asm/sections.h>
  43
  44#include "common.h"
  45
  46unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
  47{
  48        struct optimized_kprobe *op;
  49        struct kprobe *kp;
  50        long offs;
  51        int i;
  52
  53        for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
  54                kp = get_kprobe((void *)addr - i);
  55                /* This function only handles jump-optimized kprobe */
  56                if (kp && kprobe_optimized(kp)) {
  57                        op = container_of(kp, struct optimized_kprobe, kp);
  58                        /* If op->list is not empty, op is under optimizing */
  59                        if (list_empty(&op->list))
  60                                goto found;
  61                }
  62        }
  63
  64        return addr;
  65found:
  66        /*
  67         * If the kprobe can be optimized, original bytes which can be
  68         * overwritten by jump destination address. In this case, original
  69         * bytes must be recovered from op->optinsn.copied_insn buffer.
  70         */
  71        if (probe_kernel_read(buf, (void *)addr,
  72                MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
  73                return 0UL;
  74
  75        if (addr == (unsigned long)kp->addr) {
  76                buf[0] = kp->opcode;
  77                memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
  78        } else {
  79                offs = addr - (unsigned long)kp->addr - 1;
  80                memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
  81        }
  82
  83        return (unsigned long)buf;
  84}
  85
  86/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
  87static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
  88{
  89#ifdef CONFIG_X86_64
  90        *addr++ = 0x48;
  91        *addr++ = 0xbf;
  92#else
  93        *addr++ = 0xb8;
  94#endif
  95        *(unsigned long *)addr = val;
  96}
  97
  98asm (
  99                        "optprobe_template_func:\n"
 100                        ".global optprobe_template_entry\n"
 101                        "optprobe_template_entry:\n"
 102#ifdef CONFIG_X86_64
 103                        /* We don't bother saving the ss register */
 104                        "       pushq %rsp\n"
 105                        "       pushfq\n"
 106                        SAVE_REGS_STRING
 107                        "       movq %rsp, %rsi\n"
 108                        ".global optprobe_template_val\n"
 109                        "optprobe_template_val:\n"
 110                        ASM_NOP5
 111                        ASM_NOP5
 112                        ".global optprobe_template_call\n"
 113                        "optprobe_template_call:\n"
 114                        ASM_NOP5
 115                        /* Move flags to rsp */
 116                        "       movq 144(%rsp), %rdx\n"
 117                        "       movq %rdx, 152(%rsp)\n"
 118                        RESTORE_REGS_STRING
 119                        /* Skip flags entry */
 120                        "       addq $8, %rsp\n"
 121                        "       popfq\n"
 122#else /* CONFIG_X86_32 */
 123                        "       pushf\n"
 124                        SAVE_REGS_STRING
 125                        "       movl %esp, %edx\n"
 126                        ".global optprobe_template_val\n"
 127                        "optprobe_template_val:\n"
 128                        ASM_NOP5
 129                        ".global optprobe_template_call\n"
 130                        "optprobe_template_call:\n"
 131                        ASM_NOP5
 132                        RESTORE_REGS_STRING
 133                        "       addl $4, %esp\n"        /* skip cs */
 134                        "       popf\n"
 135#endif
 136                        ".global optprobe_template_end\n"
 137                        "optprobe_template_end:\n"
 138                        ".type optprobe_template_func, @function\n"
 139                        ".size optprobe_template_func, .-optprobe_template_func\n");
 140
 141void optprobe_template_func(void);
 142STACK_FRAME_NON_STANDARD(optprobe_template_func);
 143
 144#define TMPL_MOVE_IDX \
 145        ((long)&optprobe_template_val - (long)&optprobe_template_entry)
 146#define TMPL_CALL_IDX \
 147        ((long)&optprobe_template_call - (long)&optprobe_template_entry)
 148#define TMPL_END_IDX \
 149        ((long)&optprobe_template_end - (long)&optprobe_template_entry)
 150
 151#define INT3_SIZE sizeof(kprobe_opcode_t)
 152
 153/* Optimized kprobe call back function: called from optinsn */
 154static void
 155optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 156{
 157        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 158        unsigned long flags;
 159
 160        /* This is possible if op is under delayed unoptimizing */
 161        if (kprobe_disabled(&op->kp))
 162                return;
 163
 164        local_irq_save(flags);
 165        if (kprobe_running()) {
 166                kprobes_inc_nmissed_count(&op->kp);
 167        } else {
 168                /* Save skipped registers */
 169#ifdef CONFIG_X86_64
 170                regs->cs = __KERNEL_CS;
 171#else
 172                regs->cs = __KERNEL_CS | get_kernel_rpl();
 173                regs->gs = 0;
 174#endif
 175                regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
 176                regs->orig_ax = ~0UL;
 177
 178                __this_cpu_write(current_kprobe, &op->kp);
 179                kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 180                opt_pre_handler(&op->kp, regs);
 181                __this_cpu_write(current_kprobe, NULL);
 182        }
 183        local_irq_restore(flags);
 184}
 185NOKPROBE_SYMBOL(optimized_callback);
 186
 187static int copy_optimized_instructions(u8 *dest, u8 *src)
 188{
 189        struct insn insn;
 190        int len = 0, ret;
 191
 192        while (len < RELATIVEJUMP_SIZE) {
 193                ret = __copy_instruction(dest + len, src + len, &insn);
 194                if (!ret || !can_boost(&insn, src + len))
 195                        return -EINVAL;
 196                len += ret;
 197        }
 198        /* Check whether the address range is reserved */
 199        if (ftrace_text_reserved(src, src + len - 1) ||
 200            alternatives_text_reserved(src, src + len - 1) ||
 201            jump_label_text_reserved(src, src + len - 1))
 202                return -EBUSY;
 203
 204        return len;
 205}
 206
 207/* Check whether insn is indirect jump */
 208static int insn_is_indirect_jump(struct insn *insn)
 209{
 210        return ((insn->opcode.bytes[0] == 0xff &&
 211                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
 212                insn->opcode.bytes[0] == 0xea); /* Segment based jump */
 213}
 214
 215/* Check whether insn jumps into specified address range */
 216static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
 217{
 218        unsigned long target = 0;
 219
 220        switch (insn->opcode.bytes[0]) {
 221        case 0xe0:      /* loopne */
 222        case 0xe1:      /* loope */
 223        case 0xe2:      /* loop */
 224        case 0xe3:      /* jcxz */
 225        case 0xe9:      /* near relative jump */
 226        case 0xeb:      /* short relative jump */
 227                break;
 228        case 0x0f:
 229                if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
 230                        break;
 231                return 0;
 232        default:
 233                if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
 234                        break;
 235                return 0;
 236        }
 237        target = (unsigned long)insn->next_byte + insn->immediate.value;
 238
 239        return (start <= target && target <= start + len);
 240}
 241
 242/* Decode whole function to ensure any instructions don't jump into target */
 243static int can_optimize(unsigned long paddr)
 244{
 245        unsigned long addr, size = 0, offset = 0;
 246        struct insn insn;
 247        kprobe_opcode_t buf[MAX_INSN_SIZE];
 248
 249        /* Lookup symbol including addr */
 250        if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
 251                return 0;
 252
 253        /*
 254         * Do not optimize in the entry code due to the unstable
 255         * stack handling and registers setup.
 256         */
 257        if (((paddr >= (unsigned long)__entry_text_start) &&
 258             (paddr <  (unsigned long)__entry_text_end)) ||
 259            ((paddr >= (unsigned long)__irqentry_text_start) &&
 260             (paddr <  (unsigned long)__irqentry_text_end)))
 261                return 0;
 262
 263        /* Check there is enough space for a relative jump. */
 264        if (size - offset < RELATIVEJUMP_SIZE)
 265                return 0;
 266
 267        /* Decode instructions */
 268        addr = paddr - offset;
 269        while (addr < paddr - offset + size) { /* Decode until function end */
 270                unsigned long recovered_insn;
 271                if (search_exception_tables(addr))
 272                        /*
 273                         * Since some fixup code will jumps into this function,
 274                         * we can't optimize kprobe in this function.
 275                         */
 276                        return 0;
 277                recovered_insn = recover_probed_instruction(buf, addr);
 278                if (!recovered_insn)
 279                        return 0;
 280                kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 281                insn_get_length(&insn);
 282                /* Another subsystem puts a breakpoint */
 283                if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 284                        return 0;
 285                /* Recover address */
 286                insn.kaddr = (void *)addr;
 287                insn.next_byte = (void *)(addr + insn.length);
 288                /* Check any instructions don't jump into target */
 289                if (insn_is_indirect_jump(&insn) ||
 290                    insn_jump_into_range(&insn, paddr + INT3_SIZE,
 291                                         RELATIVE_ADDR_SIZE))
 292                        return 0;
 293                addr += insn.length;
 294        }
 295
 296        return 1;
 297}
 298
 299/* Check optimized_kprobe can actually be optimized. */
 300int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 301{
 302        int i;
 303        struct kprobe *p;
 304
 305        for (i = 1; i < op->optinsn.size; i++) {
 306                p = get_kprobe(op->kp.addr + i);
 307                if (p && !kprobe_disabled(p))
 308                        return -EEXIST;
 309        }
 310
 311        return 0;
 312}
 313
 314/* Check the addr is within the optimized instructions. */
 315int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 316                                 unsigned long addr)
 317{
 318        return ((unsigned long)op->kp.addr <= addr &&
 319                (unsigned long)op->kp.addr + op->optinsn.size > addr);
 320}
 321
 322/* Free optimized instruction slot */
 323static
 324void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 325{
 326        if (op->optinsn.insn) {
 327                free_optinsn_slot(op->optinsn.insn, dirty);
 328                op->optinsn.insn = NULL;
 329                op->optinsn.size = 0;
 330        }
 331}
 332
 333void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 334{
 335        __arch_remove_optimized_kprobe(op, 1);
 336}
 337
 338/*
 339 * Copy replacing target instructions
 340 * Target instructions MUST be relocatable (checked inside)
 341 * This is called when new aggr(opt)probe is allocated or reused.
 342 */
 343int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 344                                  struct kprobe *__unused)
 345{
 346        u8 *buf;
 347        int ret;
 348        long rel;
 349
 350        if (!can_optimize((unsigned long)op->kp.addr))
 351                return -EILSEQ;
 352
 353        op->optinsn.insn = get_optinsn_slot();
 354        if (!op->optinsn.insn)
 355                return -ENOMEM;
 356
 357        /*
 358         * Verify if the address gap is in 2GB range, because this uses
 359         * a relative jump.
 360         */
 361        rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
 362        if (abs(rel) > 0x7fffffff) {
 363                __arch_remove_optimized_kprobe(op, 0);
 364                return -ERANGE;
 365        }
 366
 367        buf = (u8 *)op->optinsn.insn;
 368        set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
 369
 370        /* Copy instructions into the out-of-line buffer */
 371        ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
 372        if (ret < 0) {
 373                __arch_remove_optimized_kprobe(op, 0);
 374                return ret;
 375        }
 376        op->optinsn.size = ret;
 377
 378        /* Copy arch-dep-instance from template */
 379        memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
 380
 381        /* Set probe information */
 382        synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 383
 384        /* Set probe function call */
 385        synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
 386
 387        /* Set returning jmp instruction at the tail of out-of-line buffer */
 388        synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
 389                           (u8 *)op->kp.addr + op->optinsn.size);
 390
 391        set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
 392
 393        flush_icache_range((unsigned long) buf,
 394                           (unsigned long) buf + TMPL_END_IDX +
 395                           op->optinsn.size + RELATIVEJUMP_SIZE);
 396        return 0;
 397}
 398
 399/*
 400 * Replace breakpoints (int3) with relative jumps.
 401 * Caller must call with locking kprobe_mutex and text_mutex.
 402 */
 403void arch_optimize_kprobes(struct list_head *oplist)
 404{
 405        struct optimized_kprobe *op, *tmp;
 406        u8 insn_buf[RELATIVEJUMP_SIZE];
 407
 408        list_for_each_entry_safe(op, tmp, oplist, list) {
 409                s32 rel = (s32)((long)op->optinsn.insn -
 410                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 411
 412                WARN_ON(kprobe_disabled(&op->kp));
 413
 414                /* Backup instructions which will be replaced by jump address */
 415                memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 416                       RELATIVE_ADDR_SIZE);
 417
 418                insn_buf[0] = RELATIVEJUMP_OPCODE;
 419                *(s32 *)(&insn_buf[1]) = rel;
 420
 421                text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
 422                             op->optinsn.insn);
 423
 424                list_del_init(&op->list);
 425        }
 426}
 427
 428/* Replace a relative jump with a breakpoint (int3).  */
 429void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 430{
 431        u8 insn_buf[RELATIVEJUMP_SIZE];
 432
 433        /* Set int3 to first byte for kprobes */
 434        insn_buf[0] = BREAKPOINT_INSTRUCTION;
 435        memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 436        text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
 437                     op->optinsn.insn);
 438}
 439
 440/*
 441 * Recover original instructions and breakpoints from relative jumps.
 442 * Caller must call with locking kprobe_mutex.
 443 */
 444extern void arch_unoptimize_kprobes(struct list_head *oplist,
 445                                    struct list_head *done_list)
 446{
 447        struct optimized_kprobe *op, *tmp;
 448
 449        list_for_each_entry_safe(op, tmp, oplist, list) {
 450                arch_unoptimize_kprobe(op);
 451                list_move(&op->list, done_list);
 452        }
 453}
 454
 455int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 456{
 457        struct optimized_kprobe *op;
 458
 459        if (p->flags & KPROBE_FLAG_OPTIMIZED) {
 460                /* This kprobe is really able to run optimized path. */
 461                op = container_of(p, struct optimized_kprobe, kp);
 462                /* Detour through copied instructions */
 463                regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
 464                if (!reenter)
 465                        reset_current_kprobe();
 466                preempt_enable_no_resched();
 467                return 1;
 468        }
 469        return 0;
 470}
 471NOKPROBE_SYMBOL(setup_detour_execution);
 472