linux/arch/x86/kernel/ftrace.c
<<
>>
Prefs
   1/*
   2 * Code for replacing ftrace calls with jumps.
   3 *
   4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
   5 *
   6 * Thanks goes to Ingo Molnar, for suggesting the idea.
   7 * Mathieu Desnoyers, for suggesting postponing the modifications.
   8 * Arjan van de Ven, for keeping me straight, and explaining to me
   9 * the dangers of modifying code on the run.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/spinlock.h>
  15#include <linux/hardirq.h>
  16#include <linux/uaccess.h>
  17#include <linux/ftrace.h>
  18#include <linux/percpu.h>
  19#include <linux/sched.h>
  20#include <linux/init.h>
  21#include <linux/list.h>
  22#include <linux/module.h>
  23
  24#include <trace/syscall.h>
  25
  26#include <asm/cacheflush.h>
  27#include <asm/kprobes.h>
  28#include <asm/ftrace.h>
  29#include <asm/nops.h>
  30
  31#ifdef CONFIG_DYNAMIC_FTRACE
  32
  33int ftrace_arch_code_modify_prepare(void)
  34{
  35        set_kernel_text_rw();
  36        set_all_modules_text_rw();
  37        return 0;
  38}
  39
  40int ftrace_arch_code_modify_post_process(void)
  41{
  42        set_all_modules_text_ro();
  43        set_kernel_text_ro();
  44        return 0;
  45}
  46
  47union ftrace_code_union {
  48        char code[MCOUNT_INSN_SIZE];
  49        struct {
  50                char e8;
  51                int offset;
  52        } __attribute__((packed));
  53};
  54
  55static int ftrace_calc_offset(long ip, long addr)
  56{
  57        return (int)(addr - ip);
  58}
  59
  60static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  61{
  62        static union ftrace_code_union calc;
  63
  64        calc.e8         = 0xe8;
  65        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
  66
  67        /*
  68         * No locking needed, this must be called via kstop_machine
  69         * which in essence is like running on a uniprocessor machine.
  70         */
  71        return calc.code;
  72}
  73
  74static inline int
  75within(unsigned long addr, unsigned long start, unsigned long end)
  76{
  77        return addr >= start && addr < end;
  78}
  79
  80static unsigned long text_ip_addr(unsigned long ip)
  81{
  82        /*
  83         * On x86_64, kernel text mappings are mapped read-only with
  84         * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
  85         * of the kernel text mapping to modify the kernel text.
  86         *
  87         * For 32bit kernels, these mappings are same and we can use
  88         * kernel identity mapping to modify code.
  89         */
  90        if (within(ip, (unsigned long)_text, (unsigned long)_etext))
  91                ip = (unsigned long)__va(__pa_symbol(ip));
  92
  93        return ip;
  94}
  95
  96static const unsigned char *ftrace_nop_replace(void)
  97{
  98        return ideal_nops[NOP_ATOMIC5];
  99}
 100
 101static int
 102ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 103                   unsigned const char *new_code)
 104{
 105        unsigned char replaced[MCOUNT_INSN_SIZE];
 106
 107        /*
 108         * Note: Due to modules and __init, code can
 109         *  disappear and change, we need to protect against faulting
 110         *  as well as code changing. We do this by using the
 111         *  probe_kernel_* functions.
 112         *
 113         * No real locking needed, this code is run through
 114         * kstop_machine, or before SMP starts.
 115         */
 116
 117        /* read the text we want to modify */
 118        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 119                return -EFAULT;
 120
 121        /* Make sure it is what we expect it to be */
 122        if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 123                return -EINVAL;
 124
 125        ip = text_ip_addr(ip);
 126
 127        /* replace the text with the new text */
 128        if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
 129                return -EPERM;
 130
 131        sync_core();
 132
 133        return 0;
 134}
 135
 136int ftrace_make_nop(struct module *mod,
 137                    struct dyn_ftrace *rec, unsigned long addr)
 138{
 139        unsigned const char *new, *old;
 140        unsigned long ip = rec->ip;
 141
 142        old = ftrace_call_replace(ip, addr);
 143        new = ftrace_nop_replace();
 144
 145        /*
 146         * On boot up, and when modules are loaded, the MCOUNT_ADDR
 147         * is converted to a nop, and will never become MCOUNT_ADDR
 148         * again. This code is either running before SMP (on boot up)
 149         * or before the code will ever be executed (module load).
 150         * We do not want to use the breakpoint version in this case,
 151         * just modify the code directly.
 152         */
 153        if (addr == MCOUNT_ADDR)
 154                return ftrace_modify_code_direct(rec->ip, old, new);
 155
 156        /* Normal cases use add_brk_on_nop */
 157        WARN_ONCE(1, "invalid use of ftrace_make_nop");
 158        return -EINVAL;
 159}
 160
 161int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 162{
 163        unsigned const char *new, *old;
 164        unsigned long ip = rec->ip;
 165
 166        old = ftrace_nop_replace();
 167        new = ftrace_call_replace(ip, addr);
 168
 169        /* Should only be called when module is loaded */
 170        return ftrace_modify_code_direct(rec->ip, old, new);
 171}
 172
 173/*
 174 * The modifying_ftrace_code is used to tell the breakpoint
 175 * handler to call ftrace_int3_handler(). If it fails to
 176 * call this handler for a breakpoint added by ftrace, then
 177 * the kernel may crash.
 178 *
 179 * As atomic_writes on x86 do not need a barrier, we do not
 180 * need to add smp_mb()s for this to work. It is also considered
 181 * that we can not read the modifying_ftrace_code before
 182 * executing the breakpoint. That would be quite remarkable if
 183 * it could do that. Here's the flow that is required:
 184 *
 185 *   CPU-0                          CPU-1
 186 *
 187 * atomic_inc(mfc);
 188 * write int3s
 189 *                              <trap-int3> // implicit (r)mb
 190 *                              if (atomic_read(mfc))
 191 *                                      call ftrace_int3_handler()
 192 *
 193 * Then when we are finished:
 194 *
 195 * atomic_dec(mfc);
 196 *
 197 * If we hit a breakpoint that was not set by ftrace, it does not
 198 * matter if ftrace_int3_handler() is called or not. It will
 199 * simply be ignored. But it is crucial that a ftrace nop/caller
 200 * breakpoint is handled. No other user should ever place a
 201 * breakpoint on an ftrace nop/caller location. It must only
 202 * be done by this code.
 203 */
 204atomic_t modifying_ftrace_code __read_mostly;
 205
 206static int
 207ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 208                   unsigned const char *new_code);
 209
 210/*
 211 * Should never be called:
 212 *  As it is only called by __ftrace_replace_code() which is called by
 213 *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
 214 *  which is called to turn mcount into nops or nops into function calls
 215 *  but not to convert a function from not using regs to one that uses
 216 *  regs, which ftrace_modify_call() is for.
 217 */
 218int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 219                                 unsigned long addr)
 220{
 221        WARN_ON(1);
 222        return -EINVAL;
 223}
 224
 225static unsigned long ftrace_update_func;
 226
 227static int update_ftrace_func(unsigned long ip, void *new)
 228{
 229        unsigned char old[MCOUNT_INSN_SIZE];
 230        int ret;
 231
 232        memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
 233
 234        ftrace_update_func = ip;
 235        /* Make sure the breakpoints see the ftrace_update_func update */
 236        smp_wmb();
 237
 238        /* See comment above by declaration of modifying_ftrace_code */
 239        atomic_inc(&modifying_ftrace_code);
 240
 241        ret = ftrace_modify_code(ip, old, new);
 242
 243        atomic_dec(&modifying_ftrace_code);
 244
 245        return ret;
 246}
 247
 248int ftrace_update_ftrace_func(ftrace_func_t func)
 249{
 250        unsigned long ip = (unsigned long)(&ftrace_call);
 251        unsigned char *new;
 252        int ret;
 253
 254        new = ftrace_call_replace(ip, (unsigned long)func);
 255        ret = update_ftrace_func(ip, new);
 256
 257        /* Also update the regs callback function */
 258        if (!ret) {
 259                ip = (unsigned long)(&ftrace_regs_call);
 260                new = ftrace_call_replace(ip, (unsigned long)func);
 261                ret = update_ftrace_func(ip, new);
 262        }
 263
 264        return ret;
 265}
 266
 267static int is_ftrace_caller(unsigned long ip)
 268{
 269        if (ip == ftrace_update_func)
 270                return 1;
 271
 272        return 0;
 273}
 274
 275/*
 276 * A breakpoint was added to the code address we are about to
 277 * modify, and this is the handle that will just skip over it.
 278 * We are either changing a nop into a trace call, or a trace
 279 * call to a nop. While the change is taking place, we treat
 280 * it just like it was a nop.
 281 */
 282int ftrace_int3_handler(struct pt_regs *regs)
 283{
 284        unsigned long ip;
 285
 286        if (WARN_ON_ONCE(!regs))
 287                return 0;
 288
 289        ip = regs->ip - 1;
 290        if (!ftrace_location(ip) && !is_ftrace_caller(ip))
 291                return 0;
 292
 293        regs->ip += MCOUNT_INSN_SIZE - 1;
 294
 295        return 1;
 296}
 297
 298static int ftrace_write(unsigned long ip, const char *val, int size)
 299{
 300        ip = text_ip_addr(ip);
 301
 302        if (probe_kernel_write((void *)ip, val, size))
 303                return -EPERM;
 304
 305        return 0;
 306}
 307
 308static int add_break(unsigned long ip, const char *old)
 309{
 310        unsigned char replaced[MCOUNT_INSN_SIZE];
 311        unsigned char brk = BREAKPOINT_INSTRUCTION;
 312
 313        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 314                return -EFAULT;
 315
 316        /* Make sure it is what we expect it to be */
 317        if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
 318                return -EINVAL;
 319
 320        return ftrace_write(ip, &brk, 1);
 321}
 322
 323static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
 324{
 325        unsigned const char *old;
 326        unsigned long ip = rec->ip;
 327
 328        old = ftrace_call_replace(ip, addr);
 329
 330        return add_break(rec->ip, old);
 331}
 332
 333
 334static int add_brk_on_nop(struct dyn_ftrace *rec)
 335{
 336        unsigned const char *old;
 337
 338        old = ftrace_nop_replace();
 339
 340        return add_break(rec->ip, old);
 341}
 342
 343static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 344{
 345        unsigned long ftrace_addr;
 346        int ret;
 347
 348        ftrace_addr = ftrace_get_addr_curr(rec);
 349
 350        ret = ftrace_test_record(rec, enable);
 351
 352        switch (ret) {
 353        case FTRACE_UPDATE_IGNORE:
 354                return 0;
 355
 356        case FTRACE_UPDATE_MAKE_CALL:
 357                /* converting nop to call */
 358                return add_brk_on_nop(rec);
 359
 360        case FTRACE_UPDATE_MODIFY_CALL:
 361        case FTRACE_UPDATE_MAKE_NOP:
 362                /* converting a call to a nop */
 363                return add_brk_on_call(rec, ftrace_addr);
 364        }
 365        return 0;
 366}
 367
 368/*
 369 * On error, we need to remove breakpoints. This needs to
 370 * be done caefully. If the address does not currently have a
 371 * breakpoint, we know we are done. Otherwise, we look at the
 372 * remaining 4 bytes of the instruction. If it matches a nop
 373 * we replace the breakpoint with the nop. Otherwise we replace
 374 * it with the call instruction.
 375 */
 376static int remove_breakpoint(struct dyn_ftrace *rec)
 377{
 378        unsigned char ins[MCOUNT_INSN_SIZE];
 379        unsigned char brk = BREAKPOINT_INSTRUCTION;
 380        const unsigned char *nop;
 381        unsigned long ftrace_addr;
 382        unsigned long ip = rec->ip;
 383
 384        /* If we fail the read, just give up */
 385        if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
 386                return -EFAULT;
 387
 388        /* If this does not have a breakpoint, we are done */
 389        if (ins[0] != brk)
 390                return 0;
 391
 392        nop = ftrace_nop_replace();
 393
 394        /*
 395         * If the last 4 bytes of the instruction do not match
 396         * a nop, then we assume that this is a call to ftrace_addr.
 397         */
 398        if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
 399                /*
 400                 * For extra paranoidism, we check if the breakpoint is on
 401                 * a call that would actually jump to the ftrace_addr.
 402                 * If not, don't touch the breakpoint, we make just create
 403                 * a disaster.
 404                 */
 405                ftrace_addr = ftrace_get_addr_new(rec);
 406                nop = ftrace_call_replace(ip, ftrace_addr);
 407
 408                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 409                        goto update;
 410
 411                /* Check both ftrace_addr and ftrace_old_addr */
 412                ftrace_addr = ftrace_get_addr_curr(rec);
 413                nop = ftrace_call_replace(ip, ftrace_addr);
 414
 415                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
 416                        return -EINVAL;
 417        }
 418
 419 update:
 420        return ftrace_write(ip, nop, 1);
 421}
 422
 423static int add_update_code(unsigned long ip, unsigned const char *new)
 424{
 425        /* skip breakpoint */
 426        ip++;
 427        new++;
 428        return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
 429}
 430
 431static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
 432{
 433        unsigned long ip = rec->ip;
 434        unsigned const char *new;
 435
 436        new = ftrace_call_replace(ip, addr);
 437        return add_update_code(ip, new);
 438}
 439
 440static int add_update_nop(struct dyn_ftrace *rec)
 441{
 442        unsigned long ip = rec->ip;
 443        unsigned const char *new;
 444
 445        new = ftrace_nop_replace();
 446        return add_update_code(ip, new);
 447}
 448
 449static int add_update(struct dyn_ftrace *rec, int enable)
 450{
 451        unsigned long ftrace_addr;
 452        int ret;
 453
 454        ret = ftrace_test_record(rec, enable);
 455
 456        ftrace_addr  = ftrace_get_addr_new(rec);
 457
 458        switch (ret) {
 459        case FTRACE_UPDATE_IGNORE:
 460                return 0;
 461
 462        case FTRACE_UPDATE_MODIFY_CALL:
 463        case FTRACE_UPDATE_MAKE_CALL:
 464                /* converting nop to call */
 465                return add_update_call(rec, ftrace_addr);
 466
 467        case FTRACE_UPDATE_MAKE_NOP:
 468                /* converting a call to a nop */
 469                return add_update_nop(rec);
 470        }
 471
 472        return 0;
 473}
 474
 475static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
 476{
 477        unsigned long ip = rec->ip;
 478        unsigned const char *new;
 479
 480        new = ftrace_call_replace(ip, addr);
 481
 482        return ftrace_write(ip, new, 1);
 483}
 484
 485static int finish_update_nop(struct dyn_ftrace *rec)
 486{
 487        unsigned long ip = rec->ip;
 488        unsigned const char *new;
 489
 490        new = ftrace_nop_replace();
 491
 492        return ftrace_write(ip, new, 1);
 493}
 494
 495static int finish_update(struct dyn_ftrace *rec, int enable)
 496{
 497        unsigned long ftrace_addr;
 498        int ret;
 499
 500        ret = ftrace_update_record(rec, enable);
 501
 502        ftrace_addr = ftrace_get_addr_new(rec);
 503
 504        switch (ret) {
 505        case FTRACE_UPDATE_IGNORE:
 506                return 0;
 507
 508        case FTRACE_UPDATE_MODIFY_CALL:
 509        case FTRACE_UPDATE_MAKE_CALL:
 510                /* converting nop to call */
 511                return finish_update_call(rec, ftrace_addr);
 512
 513        case FTRACE_UPDATE_MAKE_NOP:
 514                /* converting a call to a nop */
 515                return finish_update_nop(rec);
 516        }
 517
 518        return 0;
 519}
 520
 521static void do_sync_core(void *data)
 522{
 523        sync_core();
 524}
 525
 526static void run_sync(void)
 527{
 528        int enable_irqs = irqs_disabled();
 529
 530        /* We may be called with interrupts disbled (on bootup). */
 531        if (enable_irqs)
 532                local_irq_enable();
 533        on_each_cpu(do_sync_core, NULL, 1);
 534        if (enable_irqs)
 535                local_irq_disable();
 536}
 537
 538void ftrace_replace_code(int enable)
 539{
 540        struct ftrace_rec_iter *iter;
 541        struct dyn_ftrace *rec;
 542        const char *report = "adding breakpoints";
 543        int count = 0;
 544        int ret;
 545
 546        for_ftrace_rec_iter(iter) {
 547                rec = ftrace_rec_iter_record(iter);
 548
 549                ret = add_breakpoints(rec, enable);
 550                if (ret)
 551                        goto remove_breakpoints;
 552                count++;
 553        }
 554
 555        run_sync();
 556
 557        report = "updating code";
 558
 559        for_ftrace_rec_iter(iter) {
 560                rec = ftrace_rec_iter_record(iter);
 561
 562                ret = add_update(rec, enable);
 563                if (ret)
 564                        goto remove_breakpoints;
 565        }
 566
 567        run_sync();
 568
 569        report = "removing breakpoints";
 570
 571        for_ftrace_rec_iter(iter) {
 572                rec = ftrace_rec_iter_record(iter);
 573
 574                ret = finish_update(rec, enable);
 575                if (ret)
 576                        goto remove_breakpoints;
 577        }
 578
 579        run_sync();
 580
 581        return;
 582
 583 remove_breakpoints:
 584        pr_warn("Failed on %s (%d):\n", report, count);
 585        ftrace_bug(ret, rec ? rec->ip : 0);
 586        for_ftrace_rec_iter(iter) {
 587                rec = ftrace_rec_iter_record(iter);
 588                /*
 589                 * Breakpoints are handled only when this function is in
 590                 * progress. The system could not work with them.
 591                 */
 592                if (remove_breakpoint(rec))
 593                        BUG();
 594        }
 595        run_sync();
 596}
 597
 598static int
 599ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 600                   unsigned const char *new_code)
 601{
 602        int ret;
 603
 604        ret = add_break(ip, old_code);
 605        if (ret)
 606                goto out;
 607
 608        run_sync();
 609
 610        ret = add_update_code(ip, new_code);
 611        if (ret)
 612                goto fail_update;
 613
 614        run_sync();
 615
 616        ret = ftrace_write(ip, new_code, 1);
 617        /*
 618         * The breakpoint is handled only when this function is in progress.
 619         * The system could not work if we could not remove it.
 620         */
 621        BUG_ON(ret);
 622 out:
 623        run_sync();
 624        return ret;
 625
 626 fail_update:
 627        /* Also here the system could not work with the breakpoint */
 628        if (ftrace_write(ip, old_code, 1))
 629                BUG();
 630        goto out;
 631}
 632
 633void arch_ftrace_update_code(int command)
 634{
 635        /* See comment above by declaration of modifying_ftrace_code */
 636        atomic_inc(&modifying_ftrace_code);
 637
 638        ftrace_modify_all_code(command);
 639
 640        atomic_dec(&modifying_ftrace_code);
 641}
 642
 643int __init ftrace_dyn_arch_init(void)
 644{
 645        return 0;
 646}
 647#endif
 648
 649#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 650
 651#ifdef CONFIG_DYNAMIC_FTRACE
 652extern void ftrace_graph_call(void);
 653
 654static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 655{
 656        static union ftrace_code_union calc;
 657
 658        /* Jmp not a call (ignore the .e8) */
 659        calc.e8         = 0xe9;
 660        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
 661
 662        /*
 663         * ftrace external locks synchronize the access to the static variable.
 664         */
 665        return calc.code;
 666}
 667
 668static int ftrace_mod_jmp(unsigned long ip, void *func)
 669{
 670        unsigned char *new;
 671
 672        new = ftrace_jmp_replace(ip, (unsigned long)func);
 673
 674        return update_ftrace_func(ip, new);
 675}
 676
 677int ftrace_enable_ftrace_graph_caller(void)
 678{
 679        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 680
 681        return ftrace_mod_jmp(ip, &ftrace_graph_caller);
 682}
 683
 684int ftrace_disable_ftrace_graph_caller(void)
 685{
 686        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 687
 688        return ftrace_mod_jmp(ip, &ftrace_stub);
 689}
 690
 691#endif /* !CONFIG_DYNAMIC_FTRACE */
 692
 693/*
 694 * Hook the return address and push it in the stack of return addrs
 695 * in current thread info.
 696 */
 697void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 698                           unsigned long frame_pointer)
 699{
 700        unsigned long old;
 701        int faulted;
 702        struct ftrace_graph_ent trace;
 703        unsigned long return_hooker = (unsigned long)
 704                                &return_to_handler;
 705
 706        if (unlikely(ftrace_graph_is_dead()))
 707                return;
 708
 709        if (unlikely(atomic_read(&current->tracing_graph_pause)))
 710                return;
 711
 712        /*
 713         * Protect against fault, even if it shouldn't
 714         * happen. This tool is too much intrusive to
 715         * ignore such a protection.
 716         */
 717        asm volatile(
 718                "1: " _ASM_MOV " (%[parent]), %[old]\n"
 719                "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 720                "   movl $0, %[faulted]\n"
 721                "3:\n"
 722
 723                ".section .fixup, \"ax\"\n"
 724                "4: movl $1, %[faulted]\n"
 725                "   jmp 3b\n"
 726                ".previous\n"
 727
 728                _ASM_EXTABLE(1b, 4b)
 729                _ASM_EXTABLE(2b, 4b)
 730
 731                : [old] "=&r" (old), [faulted] "=r" (faulted)
 732                : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
 733                : "memory"
 734        );
 735
 736        if (unlikely(faulted)) {
 737                ftrace_graph_stop();
 738                WARN_ON(1);
 739                return;
 740        }
 741
 742        trace.func = self_addr;
 743        trace.depth = current->curr_ret_stack + 1;
 744
 745        /* Only trace if the calling function expects to */
 746        if (!ftrace_graph_entry(&trace)) {
 747                *parent = old;
 748                return;
 749        }
 750
 751        if (ftrace_push_return_trace(old, self_addr, &trace.depth,
 752                    frame_pointer) == -EBUSY) {
 753                *parent = old;
 754                return;
 755        }
 756}
 757#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 758