linux/arch/x86/kernel/kprobes/opt.c
<<
>>
Prefs
   1/*
   2 *  Kernel Probes Jump Optimization (Optprobes)
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright (C) IBM Corporation, 2002, 2004
  19 * Copyright (C) Hitachi Ltd., 2012
  20 */
  21#include <linux/kprobes.h>
  22#include <linux/ptrace.h>
  23#include <linux/string.h>
  24#include <linux/slab.h>
  25#include <linux/hardirq.h>
  26#include <linux/preempt.h>
  27#include <linux/module.h>
  28#include <linux/kdebug.h>
  29#include <linux/kallsyms.h>
  30#include <linux/ftrace.h>
  31
  32#include <asm/cacheflush.h>
  33#include <asm/desc.h>
  34#include <asm/pgtable.h>
  35#include <asm/uaccess.h>
  36#include <asm/alternative.h>
  37#include <asm/insn.h>
  38#include <asm/debugreg.h>
  39
  40#include "common.h"
  41
  42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
  43{
  44        struct optimized_kprobe *op;
  45        struct kprobe *kp;
  46        long offs;
  47        int i;
  48
  49        for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
  50                kp = get_kprobe((void *)addr - i);
  51                /* This function only handles jump-optimized kprobe */
  52                if (kp && kprobe_optimized(kp)) {
  53                        op = container_of(kp, struct optimized_kprobe, kp);
  54                        /* If op->list is not empty, op is under optimizing */
  55                        if (list_empty(&op->list))
  56                                goto found;
  57                }
  58        }
  59
  60        return addr;
  61found:
  62        /*
  63         * If the kprobe can be optimized, original bytes which can be
  64         * overwritten by jump destination address. In this case, original
  65         * bytes must be recovered from op->optinsn.copied_insn buffer.
  66         */
  67        memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
  68        if (addr == (unsigned long)kp->addr) {
  69                buf[0] = kp->opcode;
  70                memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
  71        } else {
  72                offs = addr - (unsigned long)kp->addr - 1;
  73                memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
  74        }
  75
  76        return (unsigned long)buf;
  77}
  78
  79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
  80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
  81{
  82#ifdef CONFIG_X86_64
  83        *addr++ = 0x48;
  84        *addr++ = 0xbf;
  85#else
  86        *addr++ = 0xb8;
  87#endif
  88        *(unsigned long *)addr = val;
  89}
  90
  91asm (
  92                        ".global optprobe_template_entry\n"
  93                        "optprobe_template_entry:\n"
  94#ifdef CONFIG_X86_64
  95                        /* We don't bother saving the ss register */
  96                        "       pushq %rsp\n"
  97                        "       pushfq\n"
  98                        SAVE_REGS_STRING
  99                        "       movq %rsp, %rsi\n"
 100                        ".global optprobe_template_val\n"
 101                        "optprobe_template_val:\n"
 102                        ASM_NOP5
 103                        ASM_NOP5
 104                        ".global optprobe_template_call\n"
 105                        "optprobe_template_call:\n"
 106                        ASM_NOP5
 107                        /* Move flags to rsp */
 108                        "       movq 144(%rsp), %rdx\n"
 109                        "       movq %rdx, 152(%rsp)\n"
 110                        RESTORE_REGS_STRING
 111                        /* Skip flags entry */
 112                        "       addq $8, %rsp\n"
 113                        "       popfq\n"
 114#else /* CONFIG_X86_32 */
 115                        "       pushf\n"
 116                        SAVE_REGS_STRING
 117                        "       movl %esp, %edx\n"
 118                        ".global optprobe_template_val\n"
 119                        "optprobe_template_val:\n"
 120                        ASM_NOP5
 121                        ".global optprobe_template_call\n"
 122                        "optprobe_template_call:\n"
 123                        ASM_NOP5
 124                        RESTORE_REGS_STRING
 125                        "       addl $4, %esp\n"        /* skip cs */
 126                        "       popf\n"
 127#endif
 128                        ".global optprobe_template_end\n"
 129                        "optprobe_template_end:\n");
 130
 131#define TMPL_MOVE_IDX \
 132        ((long)&optprobe_template_val - (long)&optprobe_template_entry)
 133#define TMPL_CALL_IDX \
 134        ((long)&optprobe_template_call - (long)&optprobe_template_entry)
 135#define TMPL_END_IDX \
 136        ((long)&optprobe_template_end - (long)&optprobe_template_entry)
 137
 138#define INT3_SIZE sizeof(kprobe_opcode_t)
 139
 140/* Optimized kprobe call back function: called from optinsn */
 141static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 142{
 143        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 144        unsigned long flags;
 145
 146        /* This is possible if op is under delayed unoptimizing */
 147        if (kprobe_disabled(&op->kp))
 148                return;
 149
 150        local_irq_save(flags);
 151        if (kprobe_running()) {
 152                kprobes_inc_nmissed_count(&op->kp);
 153        } else {
 154                /* Save skipped registers */
 155#ifdef CONFIG_X86_64
 156                regs->cs = __KERNEL_CS;
 157#else
 158                regs->cs = __KERNEL_CS | get_kernel_rpl();
 159                regs->gs = 0;
 160#endif
 161                regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
 162                regs->orig_ax = ~0UL;
 163
 164                __this_cpu_write(current_kprobe, &op->kp);
 165                kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 166                opt_pre_handler(&op->kp, regs);
 167                __this_cpu_write(current_kprobe, NULL);
 168        }
 169        local_irq_restore(flags);
 170}
 171
 172static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
 173{
 174        int len = 0, ret;
 175
 176        while (len < RELATIVEJUMP_SIZE) {
 177                ret = __copy_instruction(dest + len, src + len);
 178                if (!ret || !can_boost(dest + len))
 179                        return -EINVAL;
 180                len += ret;
 181        }
 182        /* Check whether the address range is reserved */
 183        if (ftrace_text_reserved(src, src + len - 1) ||
 184            alternatives_text_reserved(src, src + len - 1) ||
 185            jump_label_text_reserved(src, src + len - 1))
 186                return -EBUSY;
 187
 188        return len;
 189}
 190
 191/* Check whether insn is indirect jump */
 192static int __kprobes insn_is_indirect_jump(struct insn *insn)
 193{
 194        return ((insn->opcode.bytes[0] == 0xff &&
 195                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
 196                insn->opcode.bytes[0] == 0xea); /* Segment based jump */
 197}
 198
 199/* Check whether insn jumps into specified address range */
 200static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
 201{
 202        unsigned long target = 0;
 203
 204        switch (insn->opcode.bytes[0]) {
 205        case 0xe0:      /* loopne */
 206        case 0xe1:      /* loope */
 207        case 0xe2:      /* loop */
 208        case 0xe3:      /* jcxz */
 209        case 0xe9:      /* near relative jump */
 210        case 0xeb:      /* short relative jump */
 211                break;
 212        case 0x0f:
 213                if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
 214                        break;
 215                return 0;
 216        default:
 217                if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
 218                        break;
 219                return 0;
 220        }
 221        target = (unsigned long)insn->next_byte + insn->immediate.value;
 222
 223        return (start <= target && target <= start + len);
 224}
 225
 226/* Decode whole function to ensure any instructions don't jump into target */
 227static int __kprobes can_optimize(unsigned long paddr)
 228{
 229        unsigned long addr, size = 0, offset = 0;
 230        struct insn insn;
 231        kprobe_opcode_t buf[MAX_INSN_SIZE];
 232
 233        /* Lookup symbol including addr */
 234        if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
 235                return 0;
 236
 237        /*
 238         * Do not optimize in the entry code due to the unstable
 239         * stack handling.
 240         */
 241        if ((paddr >= (unsigned long)__entry_text_start) &&
 242            (paddr <  (unsigned long)__entry_text_end))
 243                return 0;
 244
 245        /* Check there is enough space for a relative jump. */
 246        if (size - offset < RELATIVEJUMP_SIZE)
 247                return 0;
 248
 249        /* Decode instructions */
 250        addr = paddr - offset;
 251        while (addr < paddr - offset + size) { /* Decode until function end */
 252                unsigned long recovered_insn;
 253                if (search_exception_tables(addr))
 254                        /*
 255                         * Since some fixup code will jumps into this function,
 256                         * we can't optimize kprobe in this function.
 257                         */
 258                        return 0;
 259                recovered_insn = recover_probed_instruction(buf, addr);
 260                kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 261                insn_get_length(&insn);
 262                /* Another subsystem puts a breakpoint */
 263                if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 264                        return 0;
 265                /* Recover address */
 266                insn.kaddr = (void *)addr;
 267                insn.next_byte = (void *)(addr + insn.length);
 268                /* Check any instructions don't jump into target */
 269                if (insn_is_indirect_jump(&insn) ||
 270                    insn_jump_into_range(&insn, paddr + INT3_SIZE,
 271                                         RELATIVE_ADDR_SIZE))
 272                        return 0;
 273                addr += insn.length;
 274        }
 275
 276        return 1;
 277}
 278
 279/* Check optimized_kprobe can actually be optimized. */
 280int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
 281{
 282        int i;
 283        struct kprobe *p;
 284
 285        for (i = 1; i < op->optinsn.size; i++) {
 286                p = get_kprobe(op->kp.addr + i);
 287                if (p && !kprobe_disabled(p))
 288                        return -EEXIST;
 289        }
 290
 291        return 0;
 292}
 293
 294/* Check the addr is within the optimized instructions. */
 295int __kprobes
 296arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
 297{
 298        return ((unsigned long)op->kp.addr <= addr &&
 299                (unsigned long)op->kp.addr + op->optinsn.size > addr);
 300}
 301
 302/* Free optimized instruction slot */
 303static __kprobes
 304void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 305{
 306        if (op->optinsn.insn) {
 307                free_optinsn_slot(op->optinsn.insn, dirty);
 308                op->optinsn.insn = NULL;
 309                op->optinsn.size = 0;
 310        }
 311}
 312
 313void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 314{
 315        __arch_remove_optimized_kprobe(op, 1);
 316}
 317
 318/*
 319 * Copy replacing target instructions
 320 * Target instructions MUST be relocatable (checked inside)
 321 * This is called when new aggr(opt)probe is allocated or reused.
 322 */
 323int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 324{
 325        u8 *buf;
 326        int ret;
 327        long rel;
 328
 329        if (!can_optimize((unsigned long)op->kp.addr))
 330                return -EILSEQ;
 331
 332        op->optinsn.insn = get_optinsn_slot();
 333        if (!op->optinsn.insn)
 334                return -ENOMEM;
 335
 336        /*
 337         * Verify if the address gap is in 2GB range, because this uses
 338         * a relative jump.
 339         */
 340        rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
 341        if (abs(rel) > 0x7fffffff)
 342                return -ERANGE;
 343
 344        buf = (u8 *)op->optinsn.insn;
 345
 346        /* Copy instructions into the out-of-line buffer */
 347        ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
 348        if (ret < 0) {
 349                __arch_remove_optimized_kprobe(op, 0);
 350                return ret;
 351        }
 352        op->optinsn.size = ret;
 353
 354        /* Copy arch-dep-instance from template */
 355        memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
 356
 357        /* Set probe information */
 358        synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 359
 360        /* Set probe function call */
 361        synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
 362
 363        /* Set returning jmp instruction at the tail of out-of-line buffer */
 364        synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
 365                           (u8 *)op->kp.addr + op->optinsn.size);
 366
 367        flush_icache_range((unsigned long) buf,
 368                           (unsigned long) buf + TMPL_END_IDX +
 369                           op->optinsn.size + RELATIVEJUMP_SIZE);
 370        return 0;
 371}
 372
 373#define MAX_OPTIMIZE_PROBES 256
 374static struct text_poke_param *jump_poke_params;
 375static struct jump_poke_buffer {
 376        u8 buf[RELATIVEJUMP_SIZE];
 377} *jump_poke_bufs;
 378
 379static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
 380                                            u8 *insn_buf,
 381                                            struct optimized_kprobe *op)
 382{
 383        s32 rel = (s32)((long)op->optinsn.insn -
 384                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 385
 386        /* Backup instructions which will be replaced by jump address */
 387        memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 388               RELATIVE_ADDR_SIZE);
 389
 390        insn_buf[0] = RELATIVEJUMP_OPCODE;
 391        *(s32 *)(&insn_buf[1]) = rel;
 392
 393        tprm->addr = op->kp.addr;
 394        tprm->opcode = insn_buf;
 395        tprm->len = RELATIVEJUMP_SIZE;
 396}
 397
 398/*
 399 * Replace breakpoints (int3) with relative jumps.
 400 * Caller must call with locking kprobe_mutex and text_mutex.
 401 */
 402void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 403{
 404        struct optimized_kprobe *op, *tmp;
 405        int c = 0;
 406
 407        list_for_each_entry_safe(op, tmp, oplist, list) {
 408                WARN_ON(kprobe_disabled(&op->kp));
 409                /* Setup param */
 410                setup_optimize_kprobe(&jump_poke_params[c],
 411                                      jump_poke_bufs[c].buf, op);
 412                list_del_init(&op->list);
 413                if (++c >= MAX_OPTIMIZE_PROBES)
 414                        break;
 415        }
 416
 417        /*
 418         * text_poke_smp doesn't support NMI/MCE code modifying.
 419         * However, since kprobes itself also doesn't support NMI/MCE
 420         * code probing, it's not a problem.
 421         */
 422        text_poke_smp_batch(jump_poke_params, c);
 423}
 424
 425static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
 426                                              u8 *insn_buf,
 427                                              struct optimized_kprobe *op)
 428{
 429        /* Set int3 to first byte for kprobes */
 430        insn_buf[0] = BREAKPOINT_INSTRUCTION;
 431        memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 432
 433        tprm->addr = op->kp.addr;
 434        tprm->opcode = insn_buf;
 435        tprm->len = RELATIVEJUMP_SIZE;
 436}
 437
 438/*
 439 * Recover original instructions and breakpoints from relative jumps.
 440 * Caller must call with locking kprobe_mutex.
 441 */
 442extern void arch_unoptimize_kprobes(struct list_head *oplist,
 443                                    struct list_head *done_list)
 444{
 445        struct optimized_kprobe *op, *tmp;
 446        int c = 0;
 447
 448        list_for_each_entry_safe(op, tmp, oplist, list) {
 449                /* Setup param */
 450                setup_unoptimize_kprobe(&jump_poke_params[c],
 451                                        jump_poke_bufs[c].buf, op);
 452                list_move(&op->list, done_list);
 453                if (++c >= MAX_OPTIMIZE_PROBES)
 454                        break;
 455        }
 456
 457        /*
 458         * text_poke_smp doesn't support NMI/MCE code modifying.
 459         * However, since kprobes itself also doesn't support NMI/MCE
 460         * code probing, it's not a problem.
 461         */
 462        text_poke_smp_batch(jump_poke_params, c);
 463}
 464
 465/* Replace a relative jump with a breakpoint (int3).  */
 466void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
 467{
 468        u8 buf[RELATIVEJUMP_SIZE];
 469
 470        /* Set int3 to first byte for kprobes */
 471        buf[0] = BREAKPOINT_INSTRUCTION;
 472        memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 473        text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
 474}
 475
 476int  __kprobes
 477setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 478{
 479        struct optimized_kprobe *op;
 480
 481        if (p->flags & KPROBE_FLAG_OPTIMIZED) {
 482                /* This kprobe is really able to run optimized path. */
 483                op = container_of(p, struct optimized_kprobe, kp);
 484                /* Detour through copied instructions */
 485                regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
 486                if (!reenter)
 487                        reset_current_kprobe();
 488                preempt_enable_no_resched();
 489                return 1;
 490        }
 491        return 0;
 492}
 493
 494int __kprobes arch_init_optprobes(void)
 495{
 496        /* Allocate code buffer and parameter array */
 497        jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
 498                                 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
 499        if (!jump_poke_bufs)
 500                return -ENOMEM;
 501
 502        jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
 503                                   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
 504        if (!jump_poke_params) {
 505                kfree(jump_poke_bufs);
 506                jump_poke_bufs = NULL;
 507                return -ENOMEM;
 508        }
 509
 510        return 0;
 511}
 512