linux/arch/x86/kernel/ftrace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Dynamic function tracing support.
   4 *
   5 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
   6 *
   7 * Thanks goes to Ingo Molnar, for suggesting the idea.
   8 * Mathieu Desnoyers, for suggesting postponing the modifications.
   9 * Arjan van de Ven, for keeping me straight, and explaining to me
  10 * the dangers of modifying code on the run.
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/spinlock.h>
  16#include <linux/hardirq.h>
  17#include <linux/uaccess.h>
  18#include <linux/ftrace.h>
  19#include <linux/percpu.h>
  20#include <linux/sched.h>
  21#include <linux/slab.h>
  22#include <linux/init.h>
  23#include <linux/list.h>
  24#include <linux/module.h>
  25
  26#include <trace/syscall.h>
  27
  28#include <asm/set_memory.h>
  29#include <asm/kprobes.h>
  30#include <asm/ftrace.h>
  31#include <asm/nops.h>
  32
  33#ifdef CONFIG_DYNAMIC_FTRACE
  34
  35int ftrace_arch_code_modify_prepare(void)
  36{
  37        set_kernel_text_rw();
  38        set_all_modules_text_rw();
  39        return 0;
  40}
  41
  42int ftrace_arch_code_modify_post_process(void)
  43{
  44        set_all_modules_text_ro();
  45        set_kernel_text_ro();
  46        return 0;
  47}
  48
  49union ftrace_code_union {
  50        char code[MCOUNT_INSN_SIZE];
  51        struct {
  52                unsigned char e8;
  53                int offset;
  54        } __attribute__((packed));
  55};
  56
  57static int ftrace_calc_offset(long ip, long addr)
  58{
  59        return (int)(addr - ip);
  60}
  61
  62static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  63{
  64        static union ftrace_code_union calc;
  65
  66        calc.e8         = 0xe8;
  67        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
  68
  69        /*
  70         * No locking needed, this must be called via kstop_machine
  71         * which in essence is like running on a uniprocessor machine.
  72         */
  73        return calc.code;
  74}
  75
  76static inline int
  77within(unsigned long addr, unsigned long start, unsigned long end)
  78{
  79        return addr >= start && addr < end;
  80}
  81
  82static unsigned long text_ip_addr(unsigned long ip)
  83{
  84        /*
  85         * On x86_64, kernel text mappings are mapped read-only, so we use
  86         * the kernel identity mapping instead of the kernel text mapping
  87         * to modify the kernel text.
  88         *
  89         * For 32bit kernels, these mappings are same and we can use
  90         * kernel identity mapping to modify code.
  91         */
  92        if (within(ip, (unsigned long)_text, (unsigned long)_etext))
  93                ip = (unsigned long)__va(__pa_symbol(ip));
  94
  95        return ip;
  96}
  97
  98static const unsigned char *ftrace_nop_replace(void)
  99{
 100        return ideal_nops[NOP_ATOMIC5];
 101}
 102
 103static int
 104ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
 105                   unsigned const char *new_code)
 106{
 107        unsigned char replaced[MCOUNT_INSN_SIZE];
 108
 109        ftrace_expected = old_code;
 110
 111        /*
 112         * Note:
 113         * We are paranoid about modifying text, as if a bug was to happen, it
 114         * could cause us to read or write to someplace that could cause harm.
 115         * Carefully read and modify the code with probe_kernel_*(), and make
 116         * sure what we read is what we expected it to be before modifying it.
 117         */
 118
 119        /* read the text we want to modify */
 120        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 121                return -EFAULT;
 122
 123        /* Make sure it is what we expect it to be */
 124        if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 125                return -EINVAL;
 126
 127        ip = text_ip_addr(ip);
 128
 129        /* replace the text with the new text */
 130        if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
 131                return -EPERM;
 132
 133        sync_core();
 134
 135        return 0;
 136}
 137
 138int ftrace_make_nop(struct module *mod,
 139                    struct dyn_ftrace *rec, unsigned long addr)
 140{
 141        unsigned const char *new, *old;
 142        unsigned long ip = rec->ip;
 143
 144        old = ftrace_call_replace(ip, addr);
 145        new = ftrace_nop_replace();
 146
 147        /*
 148         * On boot up, and when modules are loaded, the MCOUNT_ADDR
 149         * is converted to a nop, and will never become MCOUNT_ADDR
 150         * again. This code is either running before SMP (on boot up)
 151         * or before the code will ever be executed (module load).
 152         * We do not want to use the breakpoint version in this case,
 153         * just modify the code directly.
 154         */
 155        if (addr == MCOUNT_ADDR)
 156                return ftrace_modify_code_direct(rec->ip, old, new);
 157
 158        ftrace_expected = NULL;
 159
 160        /* Normal cases use add_brk_on_nop */
 161        WARN_ONCE(1, "invalid use of ftrace_make_nop");
 162        return -EINVAL;
 163}
 164
 165int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 166{
 167        unsigned const char *new, *old;
 168        unsigned long ip = rec->ip;
 169
 170        old = ftrace_nop_replace();
 171        new = ftrace_call_replace(ip, addr);
 172
 173        /* Should only be called when module is loaded */
 174        return ftrace_modify_code_direct(rec->ip, old, new);
 175}
 176
 177/*
 178 * The modifying_ftrace_code is used to tell the breakpoint
 179 * handler to call ftrace_int3_handler(). If it fails to
 180 * call this handler for a breakpoint added by ftrace, then
 181 * the kernel may crash.
 182 *
 183 * As atomic_writes on x86 do not need a barrier, we do not
 184 * need to add smp_mb()s for this to work. It is also considered
 185 * that we can not read the modifying_ftrace_code before
 186 * executing the breakpoint. That would be quite remarkable if
 187 * it could do that. Here's the flow that is required:
 188 *
 189 *   CPU-0                          CPU-1
 190 *
 191 * atomic_inc(mfc);
 192 * write int3s
 193 *                              <trap-int3> // implicit (r)mb
 194 *                              if (atomic_read(mfc))
 195 *                                      call ftrace_int3_handler()
 196 *
 197 * Then when we are finished:
 198 *
 199 * atomic_dec(mfc);
 200 *
 201 * If we hit a breakpoint that was not set by ftrace, it does not
 202 * matter if ftrace_int3_handler() is called or not. It will
 203 * simply be ignored. But it is crucial that a ftrace nop/caller
 204 * breakpoint is handled. No other user should ever place a
 205 * breakpoint on an ftrace nop/caller location. It must only
 206 * be done by this code.
 207 */
 208atomic_t modifying_ftrace_code __read_mostly;
 209
 210static int
 211ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 212                   unsigned const char *new_code);
 213
 214/*
 215 * Should never be called:
 216 *  As it is only called by __ftrace_replace_code() which is called by
 217 *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
 218 *  which is called to turn mcount into nops or nops into function calls
 219 *  but not to convert a function from not using regs to one that uses
 220 *  regs, which ftrace_modify_call() is for.
 221 */
 222int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 223                                 unsigned long addr)
 224{
 225        WARN_ON(1);
 226        ftrace_expected = NULL;
 227        return -EINVAL;
 228}
 229
 230static unsigned long ftrace_update_func;
 231
 232static int update_ftrace_func(unsigned long ip, void *new)
 233{
 234        unsigned char old[MCOUNT_INSN_SIZE];
 235        int ret;
 236
 237        memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
 238
 239        ftrace_update_func = ip;
 240        /* Make sure the breakpoints see the ftrace_update_func update */
 241        smp_wmb();
 242
 243        /* See comment above by declaration of modifying_ftrace_code */
 244        atomic_inc(&modifying_ftrace_code);
 245
 246        ret = ftrace_modify_code(ip, old, new);
 247
 248        atomic_dec(&modifying_ftrace_code);
 249
 250        return ret;
 251}
 252
 253int ftrace_update_ftrace_func(ftrace_func_t func)
 254{
 255        unsigned long ip = (unsigned long)(&ftrace_call);
 256        unsigned char *new;
 257        int ret;
 258
 259        new = ftrace_call_replace(ip, (unsigned long)func);
 260        ret = update_ftrace_func(ip, new);
 261
 262        /* Also update the regs callback function */
 263        if (!ret) {
 264                ip = (unsigned long)(&ftrace_regs_call);
 265                new = ftrace_call_replace(ip, (unsigned long)func);
 266                ret = update_ftrace_func(ip, new);
 267        }
 268
 269        return ret;
 270}
 271
 272static int is_ftrace_caller(unsigned long ip)
 273{
 274        if (ip == ftrace_update_func)
 275                return 1;
 276
 277        return 0;
 278}
 279
 280/*
 281 * A breakpoint was added to the code address we are about to
 282 * modify, and this is the handle that will just skip over it.
 283 * We are either changing a nop into a trace call, or a trace
 284 * call to a nop. While the change is taking place, we treat
 285 * it just like it was a nop.
 286 */
 287int ftrace_int3_handler(struct pt_regs *regs)
 288{
 289        unsigned long ip;
 290
 291        if (WARN_ON_ONCE(!regs))
 292                return 0;
 293
 294        ip = regs->ip - 1;
 295        if (!ftrace_location(ip) && !is_ftrace_caller(ip))
 296                return 0;
 297
 298        regs->ip += MCOUNT_INSN_SIZE - 1;
 299
 300        return 1;
 301}
 302
 303static int ftrace_write(unsigned long ip, const char *val, int size)
 304{
 305        ip = text_ip_addr(ip);
 306
 307        if (probe_kernel_write((void *)ip, val, size))
 308                return -EPERM;
 309
 310        return 0;
 311}
 312
 313static int add_break(unsigned long ip, const char *old)
 314{
 315        unsigned char replaced[MCOUNT_INSN_SIZE];
 316        unsigned char brk = BREAKPOINT_INSTRUCTION;
 317
 318        if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 319                return -EFAULT;
 320
 321        ftrace_expected = old;
 322
 323        /* Make sure it is what we expect it to be */
 324        if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
 325                return -EINVAL;
 326
 327        return ftrace_write(ip, &brk, 1);
 328}
 329
 330static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
 331{
 332        unsigned const char *old;
 333        unsigned long ip = rec->ip;
 334
 335        old = ftrace_call_replace(ip, addr);
 336
 337        return add_break(rec->ip, old);
 338}
 339
 340
 341static int add_brk_on_nop(struct dyn_ftrace *rec)
 342{
 343        unsigned const char *old;
 344
 345        old = ftrace_nop_replace();
 346
 347        return add_break(rec->ip, old);
 348}
 349
 350static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 351{
 352        unsigned long ftrace_addr;
 353        int ret;
 354
 355        ftrace_addr = ftrace_get_addr_curr(rec);
 356
 357        ret = ftrace_test_record(rec, enable);
 358
 359        switch (ret) {
 360        case FTRACE_UPDATE_IGNORE:
 361                return 0;
 362
 363        case FTRACE_UPDATE_MAKE_CALL:
 364                /* converting nop to call */
 365                return add_brk_on_nop(rec);
 366
 367        case FTRACE_UPDATE_MODIFY_CALL:
 368        case FTRACE_UPDATE_MAKE_NOP:
 369                /* converting a call to a nop */
 370                return add_brk_on_call(rec, ftrace_addr);
 371        }
 372        return 0;
 373}
 374
 375/*
 376 * On error, we need to remove breakpoints. This needs to
 377 * be done caefully. If the address does not currently have a
 378 * breakpoint, we know we are done. Otherwise, we look at the
 379 * remaining 4 bytes of the instruction. If it matches a nop
 380 * we replace the breakpoint with the nop. Otherwise we replace
 381 * it with the call instruction.
 382 */
 383static int remove_breakpoint(struct dyn_ftrace *rec)
 384{
 385        unsigned char ins[MCOUNT_INSN_SIZE];
 386        unsigned char brk = BREAKPOINT_INSTRUCTION;
 387        const unsigned char *nop;
 388        unsigned long ftrace_addr;
 389        unsigned long ip = rec->ip;
 390
 391        /* If we fail the read, just give up */
 392        if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
 393                return -EFAULT;
 394
 395        /* If this does not have a breakpoint, we are done */
 396        if (ins[0] != brk)
 397                return 0;
 398
 399        nop = ftrace_nop_replace();
 400
 401        /*
 402         * If the last 4 bytes of the instruction do not match
 403         * a nop, then we assume that this is a call to ftrace_addr.
 404         */
 405        if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
 406                /*
 407                 * For extra paranoidism, we check if the breakpoint is on
 408                 * a call that would actually jump to the ftrace_addr.
 409                 * If not, don't touch the breakpoint, we make just create
 410                 * a disaster.
 411                 */
 412                ftrace_addr = ftrace_get_addr_new(rec);
 413                nop = ftrace_call_replace(ip, ftrace_addr);
 414
 415                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 416                        goto update;
 417
 418                /* Check both ftrace_addr and ftrace_old_addr */
 419                ftrace_addr = ftrace_get_addr_curr(rec);
 420                nop = ftrace_call_replace(ip, ftrace_addr);
 421
 422                ftrace_expected = nop;
 423
 424                if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
 425                        return -EINVAL;
 426        }
 427
 428 update:
 429        return ftrace_write(ip, nop, 1);
 430}
 431
 432static int add_update_code(unsigned long ip, unsigned const char *new)
 433{
 434        /* skip breakpoint */
 435        ip++;
 436        new++;
 437        return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
 438}
 439
 440static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
 441{
 442        unsigned long ip = rec->ip;
 443        unsigned const char *new;
 444
 445        new = ftrace_call_replace(ip, addr);
 446        return add_update_code(ip, new);
 447}
 448
 449static int add_update_nop(struct dyn_ftrace *rec)
 450{
 451        unsigned long ip = rec->ip;
 452        unsigned const char *new;
 453
 454        new = ftrace_nop_replace();
 455        return add_update_code(ip, new);
 456}
 457
 458static int add_update(struct dyn_ftrace *rec, int enable)
 459{
 460        unsigned long ftrace_addr;
 461        int ret;
 462
 463        ret = ftrace_test_record(rec, enable);
 464
 465        ftrace_addr  = ftrace_get_addr_new(rec);
 466
 467        switch (ret) {
 468        case FTRACE_UPDATE_IGNORE:
 469                return 0;
 470
 471        case FTRACE_UPDATE_MODIFY_CALL:
 472        case FTRACE_UPDATE_MAKE_CALL:
 473                /* converting nop to call */
 474                return add_update_call(rec, ftrace_addr);
 475
 476        case FTRACE_UPDATE_MAKE_NOP:
 477                /* converting a call to a nop */
 478                return add_update_nop(rec);
 479        }
 480
 481        return 0;
 482}
 483
 484static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
 485{
 486        unsigned long ip = rec->ip;
 487        unsigned const char *new;
 488
 489        new = ftrace_call_replace(ip, addr);
 490
 491        return ftrace_write(ip, new, 1);
 492}
 493
 494static int finish_update_nop(struct dyn_ftrace *rec)
 495{
 496        unsigned long ip = rec->ip;
 497        unsigned const char *new;
 498
 499        new = ftrace_nop_replace();
 500
 501        return ftrace_write(ip, new, 1);
 502}
 503
 504static int finish_update(struct dyn_ftrace *rec, int enable)
 505{
 506        unsigned long ftrace_addr;
 507        int ret;
 508
 509        ret = ftrace_update_record(rec, enable);
 510
 511        ftrace_addr = ftrace_get_addr_new(rec);
 512
 513        switch (ret) {
 514        case FTRACE_UPDATE_IGNORE:
 515                return 0;
 516
 517        case FTRACE_UPDATE_MODIFY_CALL:
 518        case FTRACE_UPDATE_MAKE_CALL:
 519                /* converting nop to call */
 520                return finish_update_call(rec, ftrace_addr);
 521
 522        case FTRACE_UPDATE_MAKE_NOP:
 523                /* converting a call to a nop */
 524                return finish_update_nop(rec);
 525        }
 526
 527        return 0;
 528}
 529
 530static void do_sync_core(void *data)
 531{
 532        sync_core();
 533}
 534
 535static void run_sync(void)
 536{
 537        int enable_irqs;
 538
 539        /* No need to sync if there's only one CPU */
 540        if (num_online_cpus() == 1)
 541                return;
 542
 543        enable_irqs = irqs_disabled();
 544
 545        /* We may be called with interrupts disabled (on bootup). */
 546        if (enable_irqs)
 547                local_irq_enable();
 548        on_each_cpu(do_sync_core, NULL, 1);
 549        if (enable_irqs)
 550                local_irq_disable();
 551}
 552
 553void ftrace_replace_code(int enable)
 554{
 555        struct ftrace_rec_iter *iter;
 556        struct dyn_ftrace *rec;
 557        const char *report = "adding breakpoints";
 558        int count = 0;
 559        int ret;
 560
 561        for_ftrace_rec_iter(iter) {
 562                rec = ftrace_rec_iter_record(iter);
 563
 564                ret = add_breakpoints(rec, enable);
 565                if (ret)
 566                        goto remove_breakpoints;
 567                count++;
 568        }
 569
 570        run_sync();
 571
 572        report = "updating code";
 573        count = 0;
 574
 575        for_ftrace_rec_iter(iter) {
 576                rec = ftrace_rec_iter_record(iter);
 577
 578                ret = add_update(rec, enable);
 579                if (ret)
 580                        goto remove_breakpoints;
 581                count++;
 582        }
 583
 584        run_sync();
 585
 586        report = "removing breakpoints";
 587        count = 0;
 588
 589        for_ftrace_rec_iter(iter) {
 590                rec = ftrace_rec_iter_record(iter);
 591
 592                ret = finish_update(rec, enable);
 593                if (ret)
 594                        goto remove_breakpoints;
 595                count++;
 596        }
 597
 598        run_sync();
 599
 600        return;
 601
 602 remove_breakpoints:
 603        pr_warn("Failed on %s (%d):\n", report, count);
 604        ftrace_bug(ret, rec);
 605        for_ftrace_rec_iter(iter) {
 606                rec = ftrace_rec_iter_record(iter);
 607                /*
 608                 * Breakpoints are handled only when this function is in
 609                 * progress. The system could not work with them.
 610                 */
 611                if (remove_breakpoint(rec))
 612                        BUG();
 613        }
 614        run_sync();
 615}
 616
 617static int
 618ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 619                   unsigned const char *new_code)
 620{
 621        int ret;
 622
 623        ret = add_break(ip, old_code);
 624        if (ret)
 625                goto out;
 626
 627        run_sync();
 628
 629        ret = add_update_code(ip, new_code);
 630        if (ret)
 631                goto fail_update;
 632
 633        run_sync();
 634
 635        ret = ftrace_write(ip, new_code, 1);
 636        /*
 637         * The breakpoint is handled only when this function is in progress.
 638         * The system could not work if we could not remove it.
 639         */
 640        BUG_ON(ret);
 641 out:
 642        run_sync();
 643        return ret;
 644
 645 fail_update:
 646        /* Also here the system could not work with the breakpoint */
 647        if (ftrace_write(ip, old_code, 1))
 648                BUG();
 649        goto out;
 650}
 651
 652void arch_ftrace_update_code(int command)
 653{
 654        /* See comment above by declaration of modifying_ftrace_code */
 655        atomic_inc(&modifying_ftrace_code);
 656
 657        ftrace_modify_all_code(command);
 658
 659        atomic_dec(&modifying_ftrace_code);
 660}
 661
 662int __init ftrace_dyn_arch_init(void)
 663{
 664        return 0;
 665}
 666
 667#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
 668static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 669{
 670        static union ftrace_code_union calc;
 671
 672        /* Jmp not a call (ignore the .e8) */
 673        calc.e8         = 0xe9;
 674        calc.offset     = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
 675
 676        /*
 677         * ftrace external locks synchronize the access to the static variable.
 678         */
 679        return calc.code;
 680}
 681#endif
 682
 683/* Currently only x86_64 supports dynamic trampolines */
 684#ifdef CONFIG_X86_64
 685
 686#ifdef CONFIG_MODULES
 687#include <linux/moduleloader.h>
 688/* Module allocation simplifies allocating memory for code */
 689static inline void *alloc_tramp(unsigned long size)
 690{
 691        return module_alloc(size);
 692}
 693static inline void tramp_free(void *tramp, int size)
 694{
 695        int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 696
 697        set_memory_nx((unsigned long)tramp, npages);
 698        set_memory_rw((unsigned long)tramp, npages);
 699        module_memfree(tramp);
 700}
 701#else
 702/* Trampolines can only be created if modules are supported */
 703static inline void *alloc_tramp(unsigned long size)
 704{
 705        return NULL;
 706}
 707static inline void tramp_free(void *tramp, int size) { }
 708#endif
 709
 710/* Defined as markers to the end of the ftrace default trampolines */
 711extern void ftrace_regs_caller_end(void);
 712extern void ftrace_epilogue(void);
 713extern void ftrace_caller_op_ptr(void);
 714extern void ftrace_regs_caller_op_ptr(void);
 715
 716/* movq function_trace_op(%rip), %rdx */
 717/* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
 718#define OP_REF_SIZE     7
 719
 720/*
 721 * The ftrace_ops is passed to the function callback. Since the
 722 * trampoline only services a single ftrace_ops, we can pass in
 723 * that ops directly.
 724 *
 725 * The ftrace_op_code_union is used to create a pointer to the
 726 * ftrace_ops that will be passed to the callback function.
 727 */
 728union ftrace_op_code_union {
 729        char code[OP_REF_SIZE];
 730        struct {
 731                char op[3];
 732                int offset;
 733        } __attribute__((packed));
 734};
 735
 736#define RET_SIZE                1
 737
 738static unsigned long
 739create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 740{
 741        unsigned long start_offset;
 742        unsigned long end_offset;
 743        unsigned long op_offset;
 744        unsigned long offset;
 745        unsigned long size;
 746        unsigned long retq;
 747        unsigned long *ptr;
 748        void *trampoline;
 749        void *ip;
 750        /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
 751        unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
 752        union ftrace_op_code_union op_ptr;
 753        int ret;
 754
 755        if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
 756                start_offset = (unsigned long)ftrace_regs_caller;
 757                end_offset = (unsigned long)ftrace_regs_caller_end;
 758                op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
 759        } else {
 760                start_offset = (unsigned long)ftrace_caller;
 761                end_offset = (unsigned long)ftrace_epilogue;
 762                op_offset = (unsigned long)ftrace_caller_op_ptr;
 763        }
 764
 765        size = end_offset - start_offset;
 766
 767        /*
 768         * Allocate enough size to store the ftrace_caller code,
 769         * the iret , as well as the address of the ftrace_ops this
 770         * trampoline is used for.
 771         */
 772        trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
 773        if (!trampoline)
 774                return 0;
 775
 776        *tramp_size = size + RET_SIZE + sizeof(void *);
 777
 778        /* Copy ftrace_caller onto the trampoline memory */
 779        ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 780        if (WARN_ON(ret < 0))
 781                goto fail;
 782
 783        ip = trampoline + size;
 784
 785        /* The trampoline ends with ret(q) */
 786        retq = (unsigned long)ftrace_stub;
 787        ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
 788        if (WARN_ON(ret < 0))
 789                goto fail;
 790
 791        /*
 792         * The address of the ftrace_ops that is used for this trampoline
 793         * is stored at the end of the trampoline. This will be used to
 794         * load the third parameter for the callback. Basically, that
 795         * location at the end of the trampoline takes the place of
 796         * the global function_trace_op variable.
 797         */
 798
 799        ptr = (unsigned long *)(trampoline + size + RET_SIZE);
 800        *ptr = (unsigned long)ops;
 801
 802        op_offset -= start_offset;
 803        memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
 804
 805        /* Are we pointing to the reference? */
 806        if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0))
 807                goto fail;
 808
 809        /* Load the contents of ptr into the callback parameter */
 810        offset = (unsigned long)ptr;
 811        offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
 812
 813        op_ptr.offset = offset;
 814
 815        /* put in the new offset to the ftrace_ops */
 816        memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
 817
 818        /* ALLOC_TRAMP flags lets us know we created it */
 819        ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 820
 821        return (unsigned long)trampoline;
 822fail:
 823        tramp_free(trampoline, *tramp_size);
 824        return 0;
 825}
 826
 827static unsigned long calc_trampoline_call_offset(bool save_regs)
 828{
 829        unsigned long start_offset;
 830        unsigned long call_offset;
 831
 832        if (save_regs) {
 833                start_offset = (unsigned long)ftrace_regs_caller;
 834                call_offset = (unsigned long)ftrace_regs_call;
 835        } else {
 836                start_offset = (unsigned long)ftrace_caller;
 837                call_offset = (unsigned long)ftrace_call;
 838        }
 839
 840        return call_offset - start_offset;
 841}
 842
 843void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 844{
 845        ftrace_func_t func;
 846        unsigned char *new;
 847        unsigned long offset;
 848        unsigned long ip;
 849        unsigned int size;
 850        int ret, npages;
 851
 852        if (ops->trampoline) {
 853                /*
 854                 * The ftrace_ops caller may set up its own trampoline.
 855                 * In such a case, this code must not modify it.
 856                 */
 857                if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 858                        return;
 859                npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
 860                set_memory_rw(ops->trampoline, npages);
 861        } else {
 862                ops->trampoline = create_trampoline(ops, &size);
 863                if (!ops->trampoline)
 864                        return;
 865                ops->trampoline_size = size;
 866                npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 867        }
 868
 869        offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
 870        ip = ops->trampoline + offset;
 871
 872        func = ftrace_ops_get_func(ops);
 873
 874        /* Do a safe modify in case the trampoline is executing */
 875        new = ftrace_call_replace(ip, (unsigned long)func);
 876        ret = update_ftrace_func(ip, new);
 877        set_memory_ro(ops->trampoline, npages);
 878
 879        /* The update should never fail */
 880        WARN_ON(ret);
 881}
 882
 883/* Return the address of the function the trampoline calls */
 884static void *addr_from_call(void *ptr)
 885{
 886        union ftrace_code_union calc;
 887        int ret;
 888
 889        ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
 890        if (WARN_ON_ONCE(ret < 0))
 891                return NULL;
 892
 893        /* Make sure this is a call */
 894        if (WARN_ON_ONCE(calc.e8 != 0xe8)) {
 895                pr_warn("Expected e8, got %x\n", calc.e8);
 896                return NULL;
 897        }
 898
 899        return ptr + MCOUNT_INSN_SIZE + calc.offset;
 900}
 901
 902void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 903                           unsigned long frame_pointer);
 904
 905/*
 906 * If the ops->trampoline was not allocated, then it probably
 907 * has a static trampoline func, or is the ftrace caller itself.
 908 */
 909static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 910{
 911        unsigned long offset;
 912        bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
 913        void *ptr;
 914
 915        if (ops && ops->trampoline) {
 916#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 917                /*
 918                 * We only know about function graph tracer setting as static
 919                 * trampoline.
 920                 */
 921                if (ops->trampoline == FTRACE_GRAPH_ADDR)
 922                        return (void *)prepare_ftrace_return;
 923#endif
 924                return NULL;
 925        }
 926
 927        offset = calc_trampoline_call_offset(save_regs);
 928
 929        if (save_regs)
 930                ptr = (void *)FTRACE_REGS_ADDR + offset;
 931        else
 932                ptr = (void *)FTRACE_ADDR + offset;
 933
 934        return addr_from_call(ptr);
 935}
 936
 937void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 938{
 939        unsigned long offset;
 940
 941        /* If we didn't allocate this trampoline, consider it static */
 942        if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 943                return static_tramp_func(ops, rec);
 944
 945        offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
 946        return addr_from_call((void *)ops->trampoline + offset);
 947}
 948
 949void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 950{
 951        if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 952                return;
 953
 954        tramp_free((void *)ops->trampoline, ops->trampoline_size);
 955        ops->trampoline = 0;
 956}
 957
 958#endif /* CONFIG_X86_64 */
 959#endif /* CONFIG_DYNAMIC_FTRACE */
 960
 961#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 962
 963#ifdef CONFIG_DYNAMIC_FTRACE
 964extern void ftrace_graph_call(void);
 965
 966static int ftrace_mod_jmp(unsigned long ip, void *func)
 967{
 968        unsigned char *new;
 969
 970        new = ftrace_jmp_replace(ip, (unsigned long)func);
 971
 972        return update_ftrace_func(ip, new);
 973}
 974
 975int ftrace_enable_ftrace_graph_caller(void)
 976{
 977        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 978
 979        return ftrace_mod_jmp(ip, &ftrace_graph_caller);
 980}
 981
 982int ftrace_disable_ftrace_graph_caller(void)
 983{
 984        unsigned long ip = (unsigned long)(&ftrace_graph_call);
 985
 986        return ftrace_mod_jmp(ip, &ftrace_stub);
 987}
 988
 989#endif /* !CONFIG_DYNAMIC_FTRACE */
 990
 991/*
 992 * Hook the return address and push it in the stack of return addrs
 993 * in current thread info.
 994 */
 995void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 996                           unsigned long frame_pointer)
 997{
 998        unsigned long old;
 999        int faulted;
1000        unsigned long return_hooker = (unsigned long)
1001                                &return_to_handler;
1002
1003        /*
1004         * When resuming from suspend-to-ram, this function can be indirectly
1005         * called from early CPU startup code while the CPU is in real mode,
1006         * which would fail miserably.  Make sure the stack pointer is a
1007         * virtual address.
1008         *
1009         * This check isn't as accurate as virt_addr_valid(), but it should be
1010         * good enough for this purpose, and it's fast.
1011         */
1012        if (unlikely((long)__builtin_frame_address(0) >= 0))
1013                return;
1014
1015        if (unlikely(ftrace_graph_is_dead()))
1016                return;
1017
1018        if (unlikely(atomic_read(&current->tracing_graph_pause)))
1019                return;
1020
1021        /*
1022         * Protect against fault, even if it shouldn't
1023         * happen. This tool is too much intrusive to
1024         * ignore such a protection.
1025         */
1026        asm volatile(
1027                "1: " _ASM_MOV " (%[parent]), %[old]\n"
1028                "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
1029                "   movl $0, %[faulted]\n"
1030                "3:\n"
1031
1032                ".section .fixup, \"ax\"\n"
1033                "4: movl $1, %[faulted]\n"
1034                "   jmp 3b\n"
1035                ".previous\n"
1036
1037                _ASM_EXTABLE(1b, 4b)
1038                _ASM_EXTABLE(2b, 4b)
1039
1040                : [old] "=&r" (old), [faulted] "=r" (faulted)
1041                : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
1042                : "memory"
1043        );
1044
1045        if (unlikely(faulted)) {
1046                ftrace_graph_stop();
1047                WARN_ON(1);
1048                return;
1049        }
1050
1051        if (function_graph_enter(old, self_addr, frame_pointer, parent))
1052                *parent = old;
1053}
1054#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1055