linux/kernel/trace/fgraph.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Infrastructure to took into function calls and returns.
   4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
   5 * Mostly borrowed from function tracer which
   6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
   7 *
   8 * Highly modified by Steven Rostedt (VMware).
   9 */
  10#include <linux/suspend.h>
  11#include <linux/ftrace.h>
  12#include <linux/slab.h>
  13
  14#include <trace/events/sched.h>
  15
  16#include "ftrace_internal.h"
  17
  18#ifdef CONFIG_DYNAMIC_FTRACE
  19#define ASSIGN_OPS_HASH(opsname, val) \
  20        .func_hash              = val, \
  21        .local_hash.regex_lock  = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
  22#else
  23#define ASSIGN_OPS_HASH(opsname, val)
  24#endif
  25
  26static bool kill_ftrace_graph;
  27int ftrace_graph_active;
  28
  29/* Both enabled by default (can be cleared by function_graph tracer flags */
  30static bool fgraph_sleep_time = true;
  31
  32/**
  33 * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
  34 *
  35 * ftrace_graph_stop() is called when a severe error is detected in
  36 * the function graph tracing. This function is called by the critical
  37 * paths of function graph to keep those paths from doing any more harm.
  38 */
  39bool ftrace_graph_is_dead(void)
  40{
  41        return kill_ftrace_graph;
  42}
  43
  44/**
  45 * ftrace_graph_stop - set to permanently disable function graph tracincg
  46 *
  47 * In case of an error int function graph tracing, this is called
  48 * to try to keep function graph tracing from causing any more harm.
  49 * Usually this is pretty severe and this is called to try to at least
  50 * get a warning out to the user.
  51 */
  52void ftrace_graph_stop(void)
  53{
  54        kill_ftrace_graph = true;
  55}
  56
  57/* Add a function return address to the trace stack on thread info.*/
  58static int
  59ftrace_push_return_trace(unsigned long ret, unsigned long func,
  60                         unsigned long frame_pointer, unsigned long *retp)
  61{
  62        unsigned long long calltime;
  63        int index;
  64
  65        if (unlikely(ftrace_graph_is_dead()))
  66                return -EBUSY;
  67
  68        if (!current->ret_stack)
  69                return -EBUSY;
  70
  71        /*
  72         * We must make sure the ret_stack is tested before we read
  73         * anything else.
  74         */
  75        smp_rmb();
  76
  77        /* The return trace stack is full */
  78        if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
  79                atomic_inc(&current->trace_overrun);
  80                return -EBUSY;
  81        }
  82
  83        calltime = trace_clock_local();
  84
  85        index = ++current->curr_ret_stack;
  86        barrier();
  87        current->ret_stack[index].ret = ret;
  88        current->ret_stack[index].func = func;
  89        current->ret_stack[index].calltime = calltime;
  90#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
  91        current->ret_stack[index].fp = frame_pointer;
  92#endif
  93#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  94        current->ret_stack[index].retp = retp;
  95#endif
  96        return 0;
  97}
  98
  99int function_graph_enter(unsigned long ret, unsigned long func,
 100                         unsigned long frame_pointer, unsigned long *retp)
 101{
 102        struct ftrace_graph_ent trace;
 103
 104        trace.func = func;
 105        trace.depth = ++current->curr_ret_depth;
 106
 107        if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
 108                goto out;
 109
 110        /* Only trace if the calling function expects to */
 111        if (!ftrace_graph_entry(&trace))
 112                goto out_ret;
 113
 114        return 0;
 115 out_ret:
 116        current->curr_ret_stack--;
 117 out:
 118        current->curr_ret_depth--;
 119        return -EBUSY;
 120}
 121
 122/* Retrieve a function return address to the trace stack on thread info.*/
 123static void
 124ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 125                        unsigned long frame_pointer)
 126{
 127        int index;
 128
 129        index = current->curr_ret_stack;
 130
 131        if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
 132                ftrace_graph_stop();
 133                WARN_ON(1);
 134                /* Might as well panic, otherwise we have no where to go */
 135                *ret = (unsigned long)panic;
 136                return;
 137        }
 138
 139#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 140        /*
 141         * The arch may choose to record the frame pointer used
 142         * and check it here to make sure that it is what we expect it
 143         * to be. If gcc does not set the place holder of the return
 144         * address in the frame pointer, and does a copy instead, then
 145         * the function graph trace will fail. This test detects this
 146         * case.
 147         *
 148         * Currently, x86_32 with optimize for size (-Os) makes the latest
 149         * gcc do the above.
 150         *
 151         * Note, -mfentry does not use frame pointers, and this test
 152         *  is not needed if CC_USING_FENTRY is set.
 153         */
 154        if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
 155                ftrace_graph_stop();
 156                WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
 157                     "  from func %ps return to %lx\n",
 158                     current->ret_stack[index].fp,
 159                     frame_pointer,
 160                     (void *)current->ret_stack[index].func,
 161                     current->ret_stack[index].ret);
 162                *ret = (unsigned long)panic;
 163                return;
 164        }
 165#endif
 166
 167        *ret = current->ret_stack[index].ret;
 168        trace->func = current->ret_stack[index].func;
 169        trace->calltime = current->ret_stack[index].calltime;
 170        trace->overrun = atomic_read(&current->trace_overrun);
 171        trace->depth = current->curr_ret_depth--;
 172        /*
 173         * We still want to trace interrupts coming in if
 174         * max_depth is set to 1. Make sure the decrement is
 175         * seen before ftrace_graph_return.
 176         */
 177        barrier();
 178}
 179
 180/*
 181 * Hibernation protection.
 182 * The state of the current task is too much unstable during
 183 * suspend/restore to disk. We want to protect against that.
 184 */
 185static int
 186ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
 187                                                        void *unused)
 188{
 189        switch (state) {
 190        case PM_HIBERNATION_PREPARE:
 191                pause_graph_tracing();
 192                break;
 193
 194        case PM_POST_HIBERNATION:
 195                unpause_graph_tracing();
 196                break;
 197        }
 198        return NOTIFY_DONE;
 199}
 200
 201static struct notifier_block ftrace_suspend_notifier = {
 202        .notifier_call = ftrace_suspend_notifier_call,
 203};
 204
 205/*
 206 * Send the trace to the ring-buffer.
 207 * @return the original return address.
 208 */
 209unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 210{
 211        struct ftrace_graph_ret trace;
 212        unsigned long ret;
 213
 214        ftrace_pop_return_trace(&trace, &ret, frame_pointer);
 215        trace.rettime = trace_clock_local();
 216        ftrace_graph_return(&trace);
 217        /*
 218         * The ftrace_graph_return() may still access the current
 219         * ret_stack structure, we need to make sure the update of
 220         * curr_ret_stack is after that.
 221         */
 222        barrier();
 223        current->curr_ret_stack--;
 224
 225        if (unlikely(!ret)) {
 226                ftrace_graph_stop();
 227                WARN_ON(1);
 228                /* Might as well panic. What else to do? */
 229                ret = (unsigned long)panic;
 230        }
 231
 232        return ret;
 233}
 234
 235/**
 236 * ftrace_graph_get_ret_stack - return the entry of the shadow stack
 237 * @task: The task to read the shadow stack from
 238 * @idx: Index down the shadow stack
 239 *
 240 * Return the ret_struct on the shadow stack of the @task at the
 241 * call graph at @idx starting with zero. If @idx is zero, it
 242 * will return the last saved ret_stack entry. If it is greater than
 243 * zero, it will return the corresponding ret_stack for the depth
 244 * of saved return addresses.
 245 */
 246struct ftrace_ret_stack *
 247ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
 248{
 249        idx = task->curr_ret_stack - idx;
 250
 251        if (idx >= 0 && idx <= task->curr_ret_stack)
 252                return &task->ret_stack[idx];
 253
 254        return NULL;
 255}
 256
 257/**
 258 * ftrace_graph_ret_addr - convert a potentially modified stack return address
 259 *                         to its original value
 260 *
 261 * This function can be called by stack unwinding code to convert a found stack
 262 * return address ('ret') to its original value, in case the function graph
 263 * tracer has modified it to be 'return_to_handler'.  If the address hasn't
 264 * been modified, the unchanged value of 'ret' is returned.
 265 *
 266 * 'idx' is a state variable which should be initialized by the caller to zero
 267 * before the first call.
 268 *
 269 * 'retp' is a pointer to the return address on the stack.  It's ignored if
 270 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
 271 */
 272#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 273unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 274                                    unsigned long ret, unsigned long *retp)
 275{
 276        int index = task->curr_ret_stack;
 277        int i;
 278
 279        if (ret != (unsigned long)return_to_handler)
 280                return ret;
 281
 282        if (index < 0)
 283                return ret;
 284
 285        for (i = 0; i <= index; i++)
 286                if (task->ret_stack[i].retp == retp)
 287                        return task->ret_stack[i].ret;
 288
 289        return ret;
 290}
 291#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 292unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 293                                    unsigned long ret, unsigned long *retp)
 294{
 295        int task_idx;
 296
 297        if (ret != (unsigned long)return_to_handler)
 298                return ret;
 299
 300        task_idx = task->curr_ret_stack;
 301
 302        if (!task->ret_stack || task_idx < *idx)
 303                return ret;
 304
 305        task_idx -= *idx;
 306        (*idx)++;
 307
 308        return task->ret_stack[task_idx].ret;
 309}
 310#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 311
 312static struct ftrace_ops graph_ops = {
 313        .func                   = ftrace_stub,
 314        .flags                  = FTRACE_OPS_FL_RECURSION_SAFE |
 315                                   FTRACE_OPS_FL_INITIALIZED |
 316                                   FTRACE_OPS_FL_PID |
 317                                   FTRACE_OPS_FL_STUB,
 318#ifdef FTRACE_GRAPH_TRAMP_ADDR
 319        .trampoline             = FTRACE_GRAPH_TRAMP_ADDR,
 320        /* trampoline_size is only needed for dynamically allocated tramps */
 321#endif
 322        ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
 323};
 324
 325void ftrace_graph_sleep_time_control(bool enable)
 326{
 327        fgraph_sleep_time = enable;
 328}
 329
 330int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 331{
 332        return 0;
 333}
 334
 335/* The callbacks that hook a function */
 336trace_func_graph_ret_t ftrace_graph_return =
 337                        (trace_func_graph_ret_t)ftrace_stub;
 338trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
 339static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
 340
 341/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
 342static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 343{
 344        int i;
 345        int ret = 0;
 346        int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
 347        struct task_struct *g, *t;
 348
 349        for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
 350                ret_stack_list[i] =
 351                        kmalloc_array(FTRACE_RETFUNC_DEPTH,
 352                                      sizeof(struct ftrace_ret_stack),
 353                                      GFP_KERNEL);
 354                if (!ret_stack_list[i]) {
 355                        start = 0;
 356                        end = i;
 357                        ret = -ENOMEM;
 358                        goto free;
 359                }
 360        }
 361
 362        read_lock(&tasklist_lock);
 363        do_each_thread(g, t) {
 364                if (start == end) {
 365                        ret = -EAGAIN;
 366                        goto unlock;
 367                }
 368
 369                if (t->ret_stack == NULL) {
 370                        atomic_set(&t->tracing_graph_pause, 0);
 371                        atomic_set(&t->trace_overrun, 0);
 372                        t->curr_ret_stack = -1;
 373                        t->curr_ret_depth = -1;
 374                        /* Make sure the tasks see the -1 first: */
 375                        smp_wmb();
 376                        t->ret_stack = ret_stack_list[start++];
 377                }
 378        } while_each_thread(g, t);
 379
 380unlock:
 381        read_unlock(&tasklist_lock);
 382free:
 383        for (i = start; i < end; i++)
 384                kfree(ret_stack_list[i]);
 385        return ret;
 386}
 387
 388static void
 389ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
 390                        struct task_struct *prev, struct task_struct *next)
 391{
 392        unsigned long long timestamp;
 393        int index;
 394
 395        /*
 396         * Does the user want to count the time a function was asleep.
 397         * If so, do not update the time stamps.
 398         */
 399        if (fgraph_sleep_time)
 400                return;
 401
 402        timestamp = trace_clock_local();
 403
 404        prev->ftrace_timestamp = timestamp;
 405
 406        /* only process tasks that we timestamped */
 407        if (!next->ftrace_timestamp)
 408                return;
 409
 410        /*
 411         * Update all the counters in next to make up for the
 412         * time next was sleeping.
 413         */
 414        timestamp -= next->ftrace_timestamp;
 415
 416        for (index = next->curr_ret_stack; index >= 0; index--)
 417                next->ret_stack[index].calltime += timestamp;
 418}
 419
 420static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
 421{
 422        if (!ftrace_ops_test(&global_ops, trace->func, NULL))
 423                return 0;
 424        return __ftrace_graph_entry(trace);
 425}
 426
 427/*
 428 * The function graph tracer should only trace the functions defined
 429 * by set_ftrace_filter and set_ftrace_notrace. If another function
 430 * tracer ops is registered, the graph tracer requires testing the
 431 * function against the global ops, and not just trace any function
 432 * that any ftrace_ops registered.
 433 */
 434void update_function_graph_func(void)
 435{
 436        struct ftrace_ops *op;
 437        bool do_test = false;
 438
 439        /*
 440         * The graph and global ops share the same set of functions
 441         * to test. If any other ops is on the list, then
 442         * the graph tracing needs to test if its the function
 443         * it should call.
 444         */
 445        do_for_each_ftrace_op(op, ftrace_ops_list) {
 446                if (op != &global_ops && op != &graph_ops &&
 447                    op != &ftrace_list_end) {
 448                        do_test = true;
 449                        /* in double loop, break out with goto */
 450                        goto out;
 451                }
 452        } while_for_each_ftrace_op(op);
 453 out:
 454        if (do_test)
 455                ftrace_graph_entry = ftrace_graph_entry_test;
 456        else
 457                ftrace_graph_entry = __ftrace_graph_entry;
 458}
 459
 460static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
 461
 462static void
 463graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
 464{
 465        atomic_set(&t->tracing_graph_pause, 0);
 466        atomic_set(&t->trace_overrun, 0);
 467        t->ftrace_timestamp = 0;
 468        /* make curr_ret_stack visible before we add the ret_stack */
 469        smp_wmb();
 470        t->ret_stack = ret_stack;
 471}
 472
 473/*
 474 * Allocate a return stack for the idle task. May be the first
 475 * time through, or it may be done by CPU hotplug online.
 476 */
 477void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
 478{
 479        t->curr_ret_stack = -1;
 480        t->curr_ret_depth = -1;
 481        /*
 482         * The idle task has no parent, it either has its own
 483         * stack or no stack at all.
 484         */
 485        if (t->ret_stack)
 486                WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
 487
 488        if (ftrace_graph_active) {
 489                struct ftrace_ret_stack *ret_stack;
 490
 491                ret_stack = per_cpu(idle_ret_stack, cpu);
 492                if (!ret_stack) {
 493                        ret_stack =
 494                                kmalloc_array(FTRACE_RETFUNC_DEPTH,
 495                                              sizeof(struct ftrace_ret_stack),
 496                                              GFP_KERNEL);
 497                        if (!ret_stack)
 498                                return;
 499                        per_cpu(idle_ret_stack, cpu) = ret_stack;
 500                }
 501                graph_init_task(t, ret_stack);
 502        }
 503}
 504
 505/* Allocate a return stack for newly created task */
 506void ftrace_graph_init_task(struct task_struct *t)
 507{
 508        /* Make sure we do not use the parent ret_stack */
 509        t->ret_stack = NULL;
 510        t->curr_ret_stack = -1;
 511        t->curr_ret_depth = -1;
 512
 513        if (ftrace_graph_active) {
 514                struct ftrace_ret_stack *ret_stack;
 515
 516                ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
 517                                          sizeof(struct ftrace_ret_stack),
 518                                          GFP_KERNEL);
 519                if (!ret_stack)
 520                        return;
 521                graph_init_task(t, ret_stack);
 522        }
 523}
 524
 525void ftrace_graph_exit_task(struct task_struct *t)
 526{
 527        struct ftrace_ret_stack *ret_stack = t->ret_stack;
 528
 529        t->ret_stack = NULL;
 530        /* NULL must become visible to IRQs before we free it: */
 531        barrier();
 532
 533        kfree(ret_stack);
 534}
 535
 536/* Allocate a return stack for each task */
 537static int start_graph_tracing(void)
 538{
 539        struct ftrace_ret_stack **ret_stack_list;
 540        int ret, cpu;
 541
 542        ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
 543                                       sizeof(struct ftrace_ret_stack *),
 544                                       GFP_KERNEL);
 545
 546        if (!ret_stack_list)
 547                return -ENOMEM;
 548
 549        /* The cpu_boot init_task->ret_stack will never be freed */
 550        for_each_online_cpu(cpu) {
 551                if (!idle_task(cpu)->ret_stack)
 552                        ftrace_graph_init_idle_task(idle_task(cpu), cpu);
 553        }
 554
 555        do {
 556                ret = alloc_retstack_tasklist(ret_stack_list);
 557        } while (ret == -EAGAIN);
 558
 559        if (!ret) {
 560                ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 561                if (ret)
 562                        pr_info("ftrace_graph: Couldn't activate tracepoint"
 563                                " probe to kernel_sched_switch\n");
 564        }
 565
 566        kfree(ret_stack_list);
 567        return ret;
 568}
 569
 570int register_ftrace_graph(struct fgraph_ops *gops)
 571{
 572        int ret = 0;
 573
 574        mutex_lock(&ftrace_lock);
 575
 576        /* we currently allow only one tracer registered at a time */
 577        if (ftrace_graph_active) {
 578                ret = -EBUSY;
 579                goto out;
 580        }
 581
 582        register_pm_notifier(&ftrace_suspend_notifier);
 583
 584        ftrace_graph_active++;
 585        ret = start_graph_tracing();
 586        if (ret) {
 587                ftrace_graph_active--;
 588                goto out;
 589        }
 590
 591        ftrace_graph_return = gops->retfunc;
 592
 593        /*
 594         * Update the indirect function to the entryfunc, and the
 595         * function that gets called to the entry_test first. Then
 596         * call the update fgraph entry function to determine if
 597         * the entryfunc should be called directly or not.
 598         */
 599        __ftrace_graph_entry = gops->entryfunc;
 600        ftrace_graph_entry = ftrace_graph_entry_test;
 601        update_function_graph_func();
 602
 603        ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
 604out:
 605        mutex_unlock(&ftrace_lock);
 606        return ret;
 607}
 608
 609void unregister_ftrace_graph(struct fgraph_ops *gops)
 610{
 611        mutex_lock(&ftrace_lock);
 612
 613        if (unlikely(!ftrace_graph_active))
 614                goto out;
 615
 616        ftrace_graph_active--;
 617        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 618        ftrace_graph_entry = ftrace_graph_entry_stub;
 619        __ftrace_graph_entry = ftrace_graph_entry_stub;
 620        ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
 621        unregister_pm_notifier(&ftrace_suspend_notifier);
 622        unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 623
 624 out:
 625        mutex_unlock(&ftrace_lock);
 626}
 627