linux/arch/x86/kernel/ftrace.c
<<
>>
Prefs
   1/*
   2 * Code for replacing ftrace calls with jumps.
   3 *
   4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
   5 *
   6 * Thanks goes to Ingo Molnar, for suggesting the idea.
   7 * Mathieu Desnoyers, for suggesting postponing the modifications.
   8 * Arjan van de Ven, for keeping me straight, and explaining to me
   9 * the dangers of modifying code on the run.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/spinlock.h>
  15#include <linux/hardirq.h>
  16#include <linux/uaccess.h>
  17#include <linux/ftrace.h>
  18#include <linux/percpu.h>
  19#include <linux/sched.h>
  20#include <linux/slab.h>
  21#include <linux/init.h>
  22#include <linux/list.h>
  23#include <linux/module.h>
  24
  25#include <trace/syscall.h>
  26
  27#include <asm/cacheflush.h>
  28#include <asm/kprobes.h>
  29#include <asm/ftrace.h>
  30#include <asm/nops.h>
  31
  32#ifdef CONFIG_DYNAMIC_FTRACE
  33
  34int ftrace_arch_code_modify_prepare(void)
  35{
  36        set_kernel_text_rw();
  37        set_all_modules_text_rw();
  38        return 0;
  39}
  40
  41int ftrace_arch_code_modify_post_process(void)
  42{
  43        set_all_modules_text_ro();
  44        set_kernel_text_ro();
  45        return 0;
  46}
  47
  48union ftrace_code_union {
  49        char code[MCOUNT_INSN_SIZE];
  50        struct {
  51                unsigned char e8;
  52                int offset;
  53        } __attribute__((packed));
  54};
  55
  56static int ftrace_calc_offset(long ip, long addr)
  57{
  58        return (int)(addr - ip);
  59}
  60
  61static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  62{
  63        static union ftrace_code_union calc;
  64
  65        calc.e8         = 0xe8;
  66        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
  67
  68        /*
  69         * No locking needed, this must be called via kstop_machine
  70         * which in essence is like running on a uniprocessor machine.
  71         */
  72        return calc.code;
  73}
  74
  75static inline int
  76within(unsigned long addr, unsigned long start, unsigned long end)
  77{
  78        return addr >= start && addr < end;
  79}
  80
  81static unsigned long text_ip_addr(unsigned long ip)
  82{
  83        /*
  84         * On x86_64, kernel text mappings are mapped read-only with
  85         * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
  86         * of the kernel text mapping to modify the kernel text.
  87         *
  88         * For 32bit kernels, these mappings are same and we can use
  89         * kernel identity mapping to modify code.
  90         */
  91        if (within(ip, (unsigned long)_text, (unsigned long)_etext))
  92                ip = (unsigned long)__va(__pa_symbol(ip));
  93
  94        return ip;
  95}
  96
  97static const unsigned char *ftrace_nop_replace(void)
  98{
  99        return ideal_nops[NOP_ATOMIC5];
 100}
 101
 102static int
 103ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 104                   unsigned const char *new_code)
 105{
 106        unsigned char replaced[MCOUNT_INSN_SIZE];
 107
 108        /*
 109         * Note: Due to modules and __init, code can
 110         *  disappear and change, we need to protect against faulting
 111         *  as well as code changing. We do this by using the
 112         *  probe_kernel_* functions.
 113         *
 114         * No real locking needed, this code is run through
 115         * kstop_machine, or before SMP starts.
 116         */
 117
 118        /* read the text we want to modify */
 119        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 120                return -EFAULT;
 121
 122        /* Make sure it is what we expect it to be */
 123        if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 124                return -EINVAL;
 125
 126        ip = text_ip_addr(ip);
 127
 128        /* replace the text with the new text */
 129        if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
 130                return -EPERM;
 131
 132        sync_core();
 133
 134        return 0;
 135}
 136
 137int ftrace_make_nop(struct module *mod,
 138                    struct dyn_ftrace *rec, unsigned long addr)
 139{
 140        unsigned const char *new, *old;
 141        unsigned long ip = rec->ip;
 142
 143        old = ftrace_call_replace(ip, addr);
 144        new = ftrace_nop_replace();
 145
 146        /*
 147         * On boot up, and when modules are loaded, the MCOUNT_ADDR
 148         * is converted to a nop, and will never become MCOUNT_ADDR
 149         * again. This code is either running before SMP (on boot up)
 150         * or before the code will ever be executed (module load).
 151         * We do not want to use the breakpoint version in this case,
 152         * just modify the code directly.
 153         */
 154        if (addr == MCOUNT_ADDR)
 155                return ftrace_modify_code_direct(rec->ip, old, new);
 156
 157        /* Normal cases use add_brk_on_nop */
 158        WARN_ONCE(1, "invalid use of ftrace_make_nop");
 159        return -EINVAL;
 160}
 161
 162int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 163{
 164        unsigned const char *new, *old;
 165        unsigned long ip = rec->ip;
 166
 167        old = ftrace_nop_replace();
 168        new = ftrace_call_replace(ip, addr);
 169
 170        /* Should only be called when module is loaded */
 171        return ftrace_modify_code_direct(rec->ip, old, new);
 172}
 173
 174/*
 175 * The modifying_ftrace_code is used to tell the breakpoint
 176 * handler to call ftrace_int3_handler(). If it fails to
 177 * call this handler for a breakpoint added by ftrace, then
 178 * the kernel may crash.
 179 *
 180 * As atomic_writes on x86 do not need a barrier, we do not
 181 * need to add smp_mb()s for this to work. It is also considered
 182 * that we can not read the modifying_ftrace_code before
 183 * executing the breakpoint. That would be quite remarkable if
 184 * it could do that. Here's the flow that is required:
 185 *
 186 *   CPU-0                          CPU-1
 187 *
 188 * atomic_inc(mfc);
 189 * write int3s
 190 *                              <trap-int3> // implicit (r)mb
 191 *                              if (atomic_read(mfc))
 192 *                                      call ftrace_int3_handler()
 193 *
 194 * Then when we are finished:
 195 *
 196 * atomic_dec(mfc);
 197 *
 198 * If we hit a breakpoint that was not set by ftrace, it does not
 199 * matter if ftrace_int3_handler() is called or not. It will
 200 * simply be ignored. But it is crucial that a ftrace nop/caller
 201 * breakpoint is handled. No other user should ever place a
 202 * breakpoint on an ftrace nop/caller location. It must only
 203 * be done by this code.
 204 */
 205atomic_t modifying_ftrace_code __read_mostly;
 206
 207static int
 208ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 209                   unsigned const char *new_code);
 210
 211/*
 212 * Should never be called:
 213 *  As it is only called by __ftrace_replace_code() which is called by
 214 *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
 215 *  which is called to turn mcount into nops or nops into function calls
 216 *  but not to convert a function from not using regs to one that uses
 217 *  regs, which ftrace_modify_call() is for.
 218 */
 219int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 220                                 unsigned long addr)
 221{
 222        WARN_ON(1);
 223        return -EINVAL;
 224}
 225
 226static unsigned long ftrace_update_func;
 227
 228static int update_ftrace_func(unsigned long ip, void *new)
 229{
 230        unsigned char old[MCOUNT_INSN_SIZE];
 231        int ret;
 232
 233        memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
 234
 235        ftrace_update_func = ip;
 236        /* Make sure the breakpoints see the ftrace_update_func update */
 237        smp_wmb();
 238
 239        /* See comment above by declaration of modifying_ftrace_code */
 240        atomic_inc(&modifying_ftrace_code);
 241
 242        ret = ftrace_modify_code(ip, old, new);
 243
 244        atomic_dec(&modifying_ftrace_code);
 245
 246        return ret;
 247}
 248
 249int ftrace_update_ftrace_func(ftrace_func_t func)
 250{
 251        unsigned long ip = (unsigned long)(&ftrace_call);
 252        unsigned char *new;
 253        int ret;
 254
 255        new = ftrace_call_replace(ip, (unsigned long)func);
 256        ret = update_ftrace_func(ip, new);
 257
 258        /* Also update the regs callback function */
 259        if (!ret) {
 260                ip = (unsigned long)(&ftrace_regs_call);
 261                new = ftrace_call_replace(ip, (unsigned long)func);
 262                ret = update_ftrace_func(ip, new);
 263        }
 264
 265        return ret;
 266}
 267
 268static int is_ftrace_caller(unsigned long ip)
 269{
 270        if (ip == ftrace_update_func)
 271                return 1;
 272
 273        return 0;
 274}
 275
 276/*
 277 * A breakpoint was added to the code address we are about to
 278 * modify, and this is the handle that will just skip over it.
 279 * We are either changing a nop into a trace call, or a trace
 280 * call to a nop. While the change is taking place, we treat
 281 * it just like it was a nop.
 282 */
 283int ftrace_int3_handler(struct pt_regs *regs)
 284{
 285        unsigned long ip;
 286
 287        if (WARN_ON_ONCE(!regs))
 288                return 0;
 289
 290        ip = regs->ip - 1;
 291        if (!ftrace_location(ip) && !is_ftrace_caller(ip))
 292                return 0;
 293
 294        regs->ip += MCOUNT_INSN_SIZE - 1;
 295
 296        return 1;
 297}
 298
 299static int ftrace_write(unsigned long ip, const char *val, int size)
 300{
 301        ip = text_ip_addr(ip);
 302
 303        if (probe_kernel_write((void *)ip, val, size))
 304                return -EPERM;
 305
 306        return 0;
 307}
 308
 309static int add_break(unsigned long ip, const char *old)
 310{
 311        unsigned char replaced[MCOUNT_INSN_SIZE];
 312        unsigned char brk = BREAKPOINT_INSTRUCTION;
 313
 314        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 315                return -EFAULT;
 316
 317        /* Make sure it is what we expect it to be */
 318        if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
 319                return -EINVAL;
 320
 321        return ftrace_write(ip, &brk, 1);
 322}
 323
 324static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
 325{
 326        unsigned const char *old;
 327        unsigned long ip = rec->ip;
 328
 329        old = ftrace_call_replace(ip, addr);
 330
 331        return add_break(rec->ip, old);
 332}
 333
 334
 335static int add_brk_on_nop(struct dyn_ftrace *rec)
 336{
 337        unsigned const char *old;
 338
 339        old = ftrace_nop_replace();
 340
 341        return add_break(rec->ip, old);
 342}
 343
 344static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 345{
 346        unsigned long ftrace_addr;
 347        int ret;
 348
 349        ftrace_addr = ftrace_get_addr_curr(rec);
 350
 351        ret = ftrace_test_record(rec, enable);
 352
 353        switch (ret) {
 354        case FTRACE_UPDATE_IGNORE:
 355                return 0;
 356
 357        case FTRACE_UPDATE_MAKE_CALL:
 358                /* converting nop to call */
 359                return add_brk_on_nop(rec);
 360
 361        case FTRACE_UPDATE_MODIFY_CALL:
 362        case FTRACE_UPDATE_MAKE_NOP:
 363                /* converting a call to a nop */
 364                return add_brk_on_call(rec, ftrace_addr);
 365        }
 366        return 0;
 367}
 368
 369/*
 370 * On error, we need to remove breakpoints. This needs to
 371 * be done caefully. If the address does not currently have a
 372 * breakpoint, we know we are done. Otherwise, we look at the
 373 * remaining 4 bytes of the instruction. If it matches a nop
 374 * we replace the breakpoint with the nop. Otherwise we replace
 375 * it with the call instruction.
 376 */
 377static int remove_breakpoint(struct dyn_ftrace *rec)
 378{
 379        unsigned char ins[MCOUNT_INSN_SIZE];
 380        unsigned char brk = BREAKPOINT_INSTRUCTION;
 381        const unsigned char *nop;
 382        unsigned long ftrace_addr;
 383        unsigned long ip = rec->ip;
 384
 385        /* If we fail the read, just give up */
 386        if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
 387                return -EFAULT;
 388
 389        /* If this does not have a breakpoint, we are done */
 390        if (ins[0] != brk)
 391                return 0;
 392
 393        nop = ftrace_nop_replace();
 394
 395        /*
 396         * If the last 4 bytes of the instruction do not match
 397         * a nop, then we assume that this is a call to ftrace_addr.
 398         */
 399        if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
 400                /*
 401                 * For extra paranoidism, we check if the breakpoint is on
 402                 * a call that would actually jump to the ftrace_addr.
 403                 * If not, don't touch the breakpoint, we make just create
 404                 * a disaster.
 405                 */
 406                ftrace_addr = ftrace_get_addr_new(rec);
 407                nop = ftrace_call_replace(ip, ftrace_addr);
 408
 409                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 410                        goto update;
 411
 412                /* Check both ftrace_addr and ftrace_old_addr */
 413                ftrace_addr = ftrace_get_addr_curr(rec);
 414                nop = ftrace_call_replace(ip, ftrace_addr);
 415
 416                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
 417                        return -EINVAL;
 418        }
 419
 420 update:
 421        return ftrace_write(ip, nop, 1);
 422}
 423
 424static int add_update_code(unsigned long ip, unsigned const char *new)
 425{
 426        /* skip breakpoint */
 427        ip++;
 428        new++;
 429        return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
 430}
 431
 432static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
 433{
 434        unsigned long ip = rec->ip;
 435        unsigned const char *new;
 436
 437        new = ftrace_call_replace(ip, addr);
 438        return add_update_code(ip, new);
 439}
 440
 441static int add_update_nop(struct dyn_ftrace *rec)
 442{
 443        unsigned long ip = rec->ip;
 444        unsigned const char *new;
 445
 446        new = ftrace_nop_replace();
 447        return add_update_code(ip, new);
 448}
 449
 450static int add_update(struct dyn_ftrace *rec, int enable)
 451{
 452        unsigned long ftrace_addr;
 453        int ret;
 454
 455        ret = ftrace_test_record(rec, enable);
 456
 457        ftrace_addr  = ftrace_get_addr_new(rec);
 458
 459        switch (ret) {
 460        case FTRACE_UPDATE_IGNORE:
 461                return 0;
 462
 463        case FTRACE_UPDATE_MODIFY_CALL:
 464        case FTRACE_UPDATE_MAKE_CALL:
 465                /* converting nop to call */
 466                return add_update_call(rec, ftrace_addr);
 467
 468        case FTRACE_UPDATE_MAKE_NOP:
 469                /* converting a call to a nop */
 470                return add_update_nop(rec);
 471        }
 472
 473        return 0;
 474}
 475
 476static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
 477{
 478        unsigned long ip = rec->ip;
 479        unsigned const char *new;
 480
 481        new = ftrace_call_replace(ip, addr);
 482
 483        return ftrace_write(ip, new, 1);
 484}
 485
 486static int finish_update_nop(struct dyn_ftrace *rec)
 487{
 488        unsigned long ip = rec->ip;
 489        unsigned const char *new;
 490
 491        new = ftrace_nop_replace();
 492
 493        return ftrace_write(ip, new, 1);
 494}
 495
 496static int finish_update(struct dyn_ftrace *rec, int enable)
 497{
 498        unsigned long ftrace_addr;
 499        int ret;
 500
 501        ret = ftrace_update_record(rec, enable);
 502
 503        ftrace_addr = ftrace_get_addr_new(rec);
 504
 505        switch (ret) {
 506        case FTRACE_UPDATE_IGNORE:
 507                return 0;
 508
 509        case FTRACE_UPDATE_MODIFY_CALL:
 510        case FTRACE_UPDATE_MAKE_CALL:
 511                /* converting nop to call */
 512                return finish_update_call(rec, ftrace_addr);
 513
 514        case FTRACE_UPDATE_MAKE_NOP:
 515                /* converting a call to a nop */
 516                return finish_update_nop(rec);
 517        }
 518
 519        return 0;
 520}
 521
 522static void do_sync_core(void *data)
 523{
 524        sync_core();
 525}
 526
 527static void run_sync(void)
 528{
 529        int enable_irqs = irqs_disabled();
 530
 531        /* We may be called with interrupts disbled (on bootup). */
 532        if (enable_irqs)
 533                local_irq_enable();
 534        on_each_cpu(do_sync_core, NULL, 1);
 535        if (enable_irqs)
 536                local_irq_disable();
 537}
 538
 539void ftrace_replace_code(int enable)
 540{
 541        struct ftrace_rec_iter *iter;
 542        struct dyn_ftrace *rec;
 543        const char *report = "adding breakpoints";
 544        int count = 0;
 545        int ret;
 546
 547        for_ftrace_rec_iter(iter) {
 548                rec = ftrace_rec_iter_record(iter);
 549
 550                ret = add_breakpoints(rec, enable);
 551                if (ret)
 552                        goto remove_breakpoints;
 553                count++;
 554        }
 555
 556        run_sync();
 557
 558        report = "updating code";
 559
 560        for_ftrace_rec_iter(iter) {
 561                rec = ftrace_rec_iter_record(iter);
 562
 563                ret = add_update(rec, enable);
 564                if (ret)
 565                        goto remove_breakpoints;
 566        }
 567
 568        run_sync();
 569
 570        report = "removing breakpoints";
 571
 572        for_ftrace_rec_iter(iter) {
 573                rec = ftrace_rec_iter_record(iter);
 574
 575                ret = finish_update(rec, enable);
 576                if (ret)
 577                        goto remove_breakpoints;
 578        }
 579
 580        run_sync();
 581
 582        return;
 583
 584 remove_breakpoints:
 585        pr_warn("Failed on %s (%d):\n", report, count);
 586        ftrace_bug(ret, rec);
 587        for_ftrace_rec_iter(iter) {
 588                rec = ftrace_rec_iter_record(iter);
 589                /*
 590                 * Breakpoints are handled only when this function is in
 591                 * progress. The system could not work with them.
 592                 */
 593                if (remove_breakpoint(rec))
 594                        BUG();
 595        }
 596        run_sync();
 597}
 598
 599static int
 600ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 601                   unsigned const char *new_code)
 602{
 603        int ret;
 604
 605        ret = add_break(ip, old_code);
 606        if (ret)
 607                goto out;
 608
 609        run_sync();
 610
 611        ret = add_update_code(ip, new_code);
 612        if (ret)
 613                goto fail_update;
 614
 615        run_sync();
 616
 617        ret = ftrace_write(ip, new_code, 1);
 618        /*
 619         * The breakpoint is handled only when this function is in progress.
 620         * The system could not work if we could not remove it.
 621         */
 622        BUG_ON(ret);
 623 out:
 624        run_sync();
 625        return ret;
 626
 627 fail_update:
 628        /* Also here the system could not work with the breakpoint */
 629        if (ftrace_write(ip, old_code, 1))
 630                BUG();
 631        goto out;
 632}
 633
 634void arch_ftrace_update_code(int command)
 635{
 636        /* See comment above by declaration of modifying_ftrace_code */
 637        atomic_inc(&modifying_ftrace_code);
 638
 639        ftrace_modify_all_code(command);
 640
 641        atomic_dec(&modifying_ftrace_code);
 642}
 643
 644int __init ftrace_dyn_arch_init(void)
 645{
 646        return 0;
 647}
 648
 649#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
 650static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 651{
 652        static union ftrace_code_union calc;
 653
 654        /* Jmp not a call (ignore the .e8) */
 655        calc.e8         = 0xe9;
 656        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
 657
 658        /*
 659         * ftrace external locks synchronize the access to the static variable.
 660         */
 661        return calc.code;
 662}
 663#endif
 664
 665/* Currently only x86_64 supports dynamic trampolines */
 666#ifdef CONFIG_X86_64
 667
 668#ifdef CONFIG_MODULES
 669#include <linux/moduleloader.h>
 670/* Module allocation simplifies allocating memory for code */
 671static inline void *alloc_tramp(unsigned long size)
 672{
 673        return module_alloc(size);
 674}
 675static inline void tramp_free(void *tramp)
 676{
 677        module_memfree(tramp);
 678}
 679#else
 680/* Trampolines can only be created if modules are supported */
 681static inline void *alloc_tramp(unsigned long size)
 682{
 683        return NULL;
 684}
 685static inline void tramp_free(void *tramp) { }
 686#endif
 687
 688/* Defined as markers to the end of the ftrace default trampolines */
 689extern void ftrace_caller_end(void);
 690extern void ftrace_regs_caller_end(void);
 691extern void ftrace_return(void);
 692extern void ftrace_caller_op_ptr(void);
 693extern void ftrace_regs_caller_op_ptr(void);
 694
 695/* movq function_trace_op(%rip), %rdx */
 696/* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
 697#define OP_REF_SIZE     7
 698
 699/*
 700 * The ftrace_ops is passed to the function callback. Since the
 701 * trampoline only services a single ftrace_ops, we can pass in
 702 * that ops directly.
 703 *
 704 * The ftrace_op_code_union is used to create a pointer to the
 705 * ftrace_ops that will be passed to the callback function.
 706 */
 707union ftrace_op_code_union {
 708        char code[OP_REF_SIZE];
 709        struct {
 710                char op[3];
 711                int offset;
 712        } __attribute__((packed));
 713};
 714
 715static unsigned long
 716create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 717{
 718        unsigned const char *jmp;
 719        unsigned long start_offset;
 720        unsigned long end_offset;
 721        unsigned long op_offset;
 722        unsigned long offset;
 723        unsigned long size;
 724        unsigned long ip;
 725        unsigned long *ptr;
 726        void *trampoline;
 727        /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
 728        unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
 729        union ftrace_op_code_union op_ptr;
 730        int ret;
 731
 732        if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
 733                start_offset = (unsigned long)ftrace_regs_caller;
 734                end_offset = (unsigned long)ftrace_regs_caller_end;
 735                op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
 736        } else {
 737                start_offset = (unsigned long)ftrace_caller;
 738                end_offset = (unsigned long)ftrace_caller_end;
 739                op_offset = (unsigned long)ftrace_caller_op_ptr;
 740        }
 741
 742        size = end_offset - start_offset;
 743
 744        /*
 745         * Allocate enough size to store the ftrace_caller code,
 746         * the jmp to ftrace_return, as well as the address of
 747         * the ftrace_ops this trampoline is used for.
 748         */
 749        trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
 750        if (!trampoline)
 751                return 0;
 752
 753        *tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *);
 754
 755        /* Copy ftrace_caller onto the trampoline memory */
 756        ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 757        if (WARN_ON(ret < 0)) {
 758                tramp_free(trampoline);
 759                return 0;
 760        }
 761
 762        ip = (unsigned long)trampoline + size;
 763
 764        /* The trampoline ends with a jmp to ftrace_return */
 765        jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
 766        memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
 767
 768        /*
 769         * The address of the ftrace_ops that is used for this trampoline
 770         * is stored at the end of the trampoline. This will be used to
 771         * load the third parameter for the callback. Basically, that
 772         * location at the end of the trampoline takes the place of
 773         * the global function_trace_op variable.
 774         */
 775
 776        ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE);
 777        *ptr = (unsigned long)ops;
 778
 779        op_offset -= start_offset;
 780        memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
 781
 782        /* Are we pointing to the reference? */
 783        if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) {
 784                tramp_free(trampoline);
 785                return 0;
 786        }
 787
 788        /* Load the contents of ptr into the callback parameter */
 789        offset = (unsigned long)ptr;
 790        offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
 791
 792        op_ptr.offset = offset;
 793
 794        /* put in the new offset to the ftrace_ops */
 795        memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
 796
 797        /* ALLOC_TRAMP flags lets us know we created it */
 798        ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 799
 800        return (unsigned long)trampoline;
 801}
 802
 803static unsigned long calc_trampoline_call_offset(bool save_regs)
 804{
 805        unsigned long start_offset;
 806        unsigned long call_offset;
 807
 808        if (save_regs) {
 809                start_offset = (unsigned long)ftrace_regs_caller;
 810                call_offset = (unsigned long)ftrace_regs_call;
 811        } else {
 812                start_offset = (unsigned long)ftrace_caller;
 813                call_offset = (unsigned long)ftrace_call;
 814        }
 815
 816        return call_offset - start_offset;
 817}
 818
 819void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 820{
 821        ftrace_func_t func;
 822        unsigned char *new;
 823        unsigned long offset;
 824        unsigned long ip;
 825        unsigned int size;
 826        int ret;
 827
 828        if (ops->trampoline) {
 829                /*
 830                 * The ftrace_ops caller may set up its own trampoline.
 831                 * In such a case, this code must not modify it.
 832                 */
 833                if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 834                        return;
 835        } else {
 836                ops->trampoline = create_trampoline(ops, &size);
 837                if (!ops->trampoline)
 838                        return;
 839                ops->trampoline_size = size;
 840        }
 841
 842        offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
 843        ip = ops->trampoline + offset;
 844
 845        func = ftrace_ops_get_func(ops);
 846
 847        /* Do a safe modify in case the trampoline is executing */
 848        new = ftrace_call_replace(ip, (unsigned long)func);
 849        ret = update_ftrace_func(ip, new);
 850
 851        /* The update should never fail */
 852        WARN_ON(ret);
 853}
 854
 855/* Return the address of the function the trampoline calls */
 856static void *addr_from_call(void *ptr)
 857{
 858        union ftrace_code_union calc;
 859        int ret;
 860
 861        ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
 862        if (WARN_ON_ONCE(ret < 0))
 863                return NULL;
 864
 865        /* Make sure this is a call */
 866        if (WARN_ON_ONCE(calc.e8 != 0xe8)) {
 867                pr_warn("Expected e8, got %x\n", calc.e8);
 868                return NULL;
 869        }
 870
 871        return ptr + MCOUNT_INSN_SIZE + calc.offset;
 872}
 873
 874void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 875                           unsigned long frame_pointer);
 876
 877/*
 878 * If the ops->trampoline was not allocated, then it probably
 879 * has a static trampoline func, or is the ftrace caller itself.
 880 */
 881static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 882{
 883        unsigned long offset;
 884        bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
 885        void *ptr;
 886
 887        if (ops && ops->trampoline) {
 888#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 889                /*
 890                 * We only know about function graph tracer setting as static
 891                 * trampoline.
 892                 */
 893                if (ops->trampoline == FTRACE_GRAPH_ADDR)
 894                        return (void *)prepare_ftrace_return;
 895#endif
 896                return NULL;
 897        }
 898
 899        offset = calc_trampoline_call_offset(save_regs);
 900
 901        if (save_regs)
 902                ptr = (void *)FTRACE_REGS_ADDR + offset;
 903        else
 904                ptr = (void *)FTRACE_ADDR + offset;
 905
 906        return addr_from_call(ptr);
 907}
 908
 909void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 910{
 911        unsigned long offset;
 912
 913        /* If we didn't allocate this trampoline, consider it static */
 914        if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 915                return static_tramp_func(ops, rec);
 916
 917        offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
 918        return addr_from_call((void *)ops->trampoline + offset);
 919}
 920
 921void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 922{
 923        if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 924                return;
 925
 926        tramp_free((void *)ops->trampoline);
 927        ops->trampoline = 0;
 928}
 929
 930#endif /* CONFIG_X86_64 */
 931#endif /* CONFIG_DYNAMIC_FTRACE */
 932
 933#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 934
 935#ifdef CONFIG_DYNAMIC_FTRACE
 936extern void ftrace_graph_call(void);
 937
 938static int ftrace_mod_jmp(unsigned long ip, void *func)
 939{
 940        unsigned char *new;
 941
 942        new = ftrace_jmp_replace(ip, (unsigned long)func);
 943
 944        return update_ftrace_func(ip, new);
 945}
 946
 947int ftrace_enable_ftrace_graph_caller(void)
 948{
 949        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 950
 951        return ftrace_mod_jmp(ip, &ftrace_graph_caller);
 952}
 953
 954int ftrace_disable_ftrace_graph_caller(void)
 955{
 956        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 957
 958        return ftrace_mod_jmp(ip, &ftrace_stub);
 959}
 960
 961#endif /* !CONFIG_DYNAMIC_FTRACE */
 962
 963/*
 964 * Hook the return address and push it in the stack of return addrs
 965 * in current thread info.
 966 */
 967void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 968                           unsigned long frame_pointer)
 969{
 970        unsigned long old;
 971        int faulted;
 972        struct ftrace_graph_ent trace;
 973        unsigned long return_hooker = (unsigned long)
 974                                &return_to_handler;
 975
 976        if (unlikely(ftrace_graph_is_dead()))
 977                return;
 978
 979        if (unlikely(atomic_read(&current->tracing_graph_pause)))
 980                return;
 981
 982        /*
 983         * Protect against fault, even if it shouldn't
 984         * happen. This tool is too much intrusive to
 985         * ignore such a protection.
 986         */
 987        asm volatile(
 988                "1: " _ASM_MOV " (%[parent]), %[old]\n"
 989                "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
 990                "   movl $0, %[faulted]\n"
 991                "3:\n"
 992
 993                ".section .fixup, \"ax\"\n"
 994                "4: movl $1, %[faulted]\n"
 995                "   jmp 3b\n"
 996                ".previous\n"
 997
 998                _ASM_EXTABLE(1b, 4b)
 999                _ASM_EXTABLE(2b, 4b)
1000
1001                : [old] "=&r" (old), [faulted] "=r" (faulted)
1002                : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
1003                : "memory"
1004        );
1005
1006        if (unlikely(faulted)) {
1007                ftrace_graph_stop();
1008                WARN_ON(1);
1009                return;
1010        }
1011
1012        trace.func = self_addr;
1013        trace.depth = current->curr_ret_stack + 1;
1014
1015        /* Only trace if the calling function expects to */
1016        if (!ftrace_graph_entry(&trace)) {
1017                *parent = old;
1018                return;
1019        }
1020
1021        if (ftrace_push_return_trace(old, self_addr, &trace.depth,
1022                    frame_pointer) == -EBUSY) {
1023                *parent = old;
1024                return;
1025        }
1026}
1027#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1028