linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/security.h>
  21#include <linux/seq_file.h>
  22#include <linux/notifier.h>
  23#include <linux/irqflags.h>
  24#include <linux/debugfs.h>
  25#include <linux/tracefs.h>
  26#include <linux/pagemap.h>
  27#include <linux/hardirq.h>
  28#include <linux/linkage.h>
  29#include <linux/uaccess.h>
  30#include <linux/vmalloc.h>
  31#include <linux/ftrace.h>
  32#include <linux/module.h>
  33#include <linux/percpu.h>
  34#include <linux/splice.h>
  35#include <linux/kdebug.h>
  36#include <linux/string.h>
  37#include <linux/mount.h>
  38#include <linux/rwsem.h>
  39#include <linux/slab.h>
  40#include <linux/ctype.h>
  41#include <linux/init.h>
  42#include <linux/panic_notifier.h>
  43#include <linux/poll.h>
  44#include <linux/nmi.h>
  45#include <linux/fs.h>
  46#include <linux/trace.h>
  47#include <linux/sched/clock.h>
  48#include <linux/sched/rt.h>
  49#include <linux/fsnotify.h>
  50#include <linux/irq_work.h>
  51#include <linux/workqueue.h>
  52
  53#include "trace.h"
  54#include "trace_output.h"
  55
  56/*
  57 * On boot up, the ring buffer is set to the minimum size, so that
  58 * we do not waste memory on systems that are not using tracing.
  59 */
  60bool ring_buffer_expanded;
  61
  62/*
  63 * We need to change this state when a selftest is running.
  64 * A selftest will lurk into the ring-buffer to count the
  65 * entries inserted during the selftest although some concurrent
  66 * insertions into the ring-buffer such as trace_printk could occurred
  67 * at the same time, giving false positive or negative results.
  68 */
  69static bool __read_mostly tracing_selftest_running;
  70
  71/*
  72 * If boot-time tracing including tracers/events via kernel cmdline
  73 * is running, we do not want to run SELFTEST.
  74 */
  75bool __read_mostly tracing_selftest_disabled;
  76
  77#ifdef CONFIG_FTRACE_STARTUP_TEST
  78void __init disable_tracing_selftest(const char *reason)
  79{
  80        if (!tracing_selftest_disabled) {
  81                tracing_selftest_disabled = true;
  82                pr_info("Ftrace startup test is disabled due to %s\n", reason);
  83        }
  84}
  85#endif
  86
  87/* Pipe tracepoints to printk */
  88struct trace_iterator *tracepoint_print_iter;
  89int tracepoint_printk;
  90static bool tracepoint_printk_stop_on_boot __initdata;
  91static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  92
  93/* For tracers that don't implement custom flags */
  94static struct tracer_opt dummy_tracer_opt[] = {
  95        { }
  96};
  97
  98static int
  99dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 100{
 101        return 0;
 102}
 103
 104/*
 105 * To prevent the comm cache from being overwritten when no
 106 * tracing is active, only save the comm when a trace event
 107 * occurred.
 108 */
 109static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 110
 111/*
 112 * Kill all tracing for good (never come back).
 113 * It is initialized to 1 but will turn to zero if the initialization
 114 * of the tracer is successful. But that is the only place that sets
 115 * this back to zero.
 116 */
 117static int tracing_disabled = 1;
 118
 119cpumask_var_t __read_mostly     tracing_buffer_mask;
 120
 121/*
 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 123 *
 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 125 * is set, then ftrace_dump is called. This will output the contents
 126 * of the ftrace buffers to the console.  This is very useful for
 127 * capturing traces that lead to crashes and outputing it to a
 128 * serial console.
 129 *
 130 * It is default off, but you can enable it with either specifying
 131 * "ftrace_dump_on_oops" in the kernel command line, or setting
 132 * /proc/sys/kernel/ftrace_dump_on_oops
 133 * Set 1 if you want to dump buffers of all CPUs
 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 135 */
 136
 137enum ftrace_dump_mode ftrace_dump_on_oops;
 138
 139/* When set, tracing will stop when a WARN*() is hit */
 140int __disable_trace_on_warning;
 141
 142#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 143/* Map of enums to their values, for "eval_map" file */
 144struct trace_eval_map_head {
 145        struct module                   *mod;
 146        unsigned long                   length;
 147};
 148
 149union trace_eval_map_item;
 150
 151struct trace_eval_map_tail {
 152        /*
 153         * "end" is first and points to NULL as it must be different
 154         * than "mod" or "eval_string"
 155         */
 156        union trace_eval_map_item       *next;
 157        const char                      *end;   /* points to NULL */
 158};
 159
 160static DEFINE_MUTEX(trace_eval_mutex);
 161
 162/*
 163 * The trace_eval_maps are saved in an array with two extra elements,
 164 * one at the beginning, and one at the end. The beginning item contains
 165 * the count of the saved maps (head.length), and the module they
 166 * belong to if not built in (head.mod). The ending item contains a
 167 * pointer to the next array of saved eval_map items.
 168 */
 169union trace_eval_map_item {
 170        struct trace_eval_map           map;
 171        struct trace_eval_map_head      head;
 172        struct trace_eval_map_tail      tail;
 173};
 174
 175static union trace_eval_map_item *trace_eval_maps;
 176#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 177
 178int tracing_set_tracer(struct trace_array *tr, const char *buf);
 179static void ftrace_trace_userstack(struct trace_array *tr,
 180                                   struct trace_buffer *buffer,
 181                                   unsigned int trace_ctx);
 182
 183#define MAX_TRACER_SIZE         100
 184static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 185static char *default_bootup_tracer;
 186
 187static bool allocate_snapshot;
 188
 189static int __init set_cmdline_ftrace(char *str)
 190{
 191        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 192        default_bootup_tracer = bootup_tracer_buf;
 193        /* We are using ftrace early, expand it */
 194        ring_buffer_expanded = true;
 195        return 1;
 196}
 197__setup("ftrace=", set_cmdline_ftrace);
 198
 199static int __init set_ftrace_dump_on_oops(char *str)
 200{
 201        if (*str++ != '=' || !*str || !strcmp("1", str)) {
 202                ftrace_dump_on_oops = DUMP_ALL;
 203                return 1;
 204        }
 205
 206        if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
 207                ftrace_dump_on_oops = DUMP_ORIG;
 208                return 1;
 209        }
 210
 211        return 0;
 212}
 213__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 214
 215static int __init stop_trace_on_warning(char *str)
 216{
 217        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 218                __disable_trace_on_warning = 1;
 219        return 1;
 220}
 221__setup("traceoff_on_warning", stop_trace_on_warning);
 222
 223static int __init boot_alloc_snapshot(char *str)
 224{
 225        allocate_snapshot = true;
 226        /* We also need the main ring buffer expanded */
 227        ring_buffer_expanded = true;
 228        return 1;
 229}
 230__setup("alloc_snapshot", boot_alloc_snapshot);
 231
 232
 233static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 234
 235static int __init set_trace_boot_options(char *str)
 236{
 237        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 238        return 0;
 239}
 240__setup("trace_options=", set_trace_boot_options);
 241
 242static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 243static char *trace_boot_clock __initdata;
 244
 245static int __init set_trace_boot_clock(char *str)
 246{
 247        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 248        trace_boot_clock = trace_boot_clock_buf;
 249        return 0;
 250}
 251__setup("trace_clock=", set_trace_boot_clock);
 252
 253static int __init set_tracepoint_printk(char *str)
 254{
 255        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 256                tracepoint_printk = 1;
 257        return 1;
 258}
 259__setup("tp_printk", set_tracepoint_printk);
 260
 261static int __init set_tracepoint_printk_stop(char *str)
 262{
 263        tracepoint_printk_stop_on_boot = true;
 264        return 1;
 265}
 266__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
 267
 268unsigned long long ns2usecs(u64 nsec)
 269{
 270        nsec += 500;
 271        do_div(nsec, 1000);
 272        return nsec;
 273}
 274
 275static void
 276trace_process_export(struct trace_export *export,
 277               struct ring_buffer_event *event, int flag)
 278{
 279        struct trace_entry *entry;
 280        unsigned int size = 0;
 281
 282        if (export->flags & flag) {
 283                entry = ring_buffer_event_data(event);
 284                size = ring_buffer_event_length(event);
 285                export->write(export, entry, size);
 286        }
 287}
 288
 289static DEFINE_MUTEX(ftrace_export_lock);
 290
 291static struct trace_export __rcu *ftrace_exports_list __read_mostly;
 292
 293static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
 294static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
 295static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
 296
 297static inline void ftrace_exports_enable(struct trace_export *export)
 298{
 299        if (export->flags & TRACE_EXPORT_FUNCTION)
 300                static_branch_inc(&trace_function_exports_enabled);
 301
 302        if (export->flags & TRACE_EXPORT_EVENT)
 303                static_branch_inc(&trace_event_exports_enabled);
 304
 305        if (export->flags & TRACE_EXPORT_MARKER)
 306                static_branch_inc(&trace_marker_exports_enabled);
 307}
 308
 309static inline void ftrace_exports_disable(struct trace_export *export)
 310{
 311        if (export->flags & TRACE_EXPORT_FUNCTION)
 312                static_branch_dec(&trace_function_exports_enabled);
 313
 314        if (export->flags & TRACE_EXPORT_EVENT)
 315                static_branch_dec(&trace_event_exports_enabled);
 316
 317        if (export->flags & TRACE_EXPORT_MARKER)
 318                static_branch_dec(&trace_marker_exports_enabled);
 319}
 320
 321static void ftrace_exports(struct ring_buffer_event *event, int flag)
 322{
 323        struct trace_export *export;
 324
 325        preempt_disable_notrace();
 326
 327        export = rcu_dereference_raw_check(ftrace_exports_list);
 328        while (export) {
 329                trace_process_export(export, event, flag);
 330                export = rcu_dereference_raw_check(export->next);
 331        }
 332
 333        preempt_enable_notrace();
 334}
 335
 336static inline void
 337add_trace_export(struct trace_export **list, struct trace_export *export)
 338{
 339        rcu_assign_pointer(export->next, *list);
 340        /*
 341         * We are entering export into the list but another
 342         * CPU might be walking that list. We need to make sure
 343         * the export->next pointer is valid before another CPU sees
 344         * the export pointer included into the list.
 345         */
 346        rcu_assign_pointer(*list, export);
 347}
 348
 349static inline int
 350rm_trace_export(struct trace_export **list, struct trace_export *export)
 351{
 352        struct trace_export **p;
 353
 354        for (p = list; *p != NULL; p = &(*p)->next)
 355                if (*p == export)
 356                        break;
 357
 358        if (*p != export)
 359                return -1;
 360
 361        rcu_assign_pointer(*p, (*p)->next);
 362
 363        return 0;
 364}
 365
 366static inline void
 367add_ftrace_export(struct trace_export **list, struct trace_export *export)
 368{
 369        ftrace_exports_enable(export);
 370
 371        add_trace_export(list, export);
 372}
 373
 374static inline int
 375rm_ftrace_export(struct trace_export **list, struct trace_export *export)
 376{
 377        int ret;
 378
 379        ret = rm_trace_export(list, export);
 380        ftrace_exports_disable(export);
 381
 382        return ret;
 383}
 384
 385int register_ftrace_export(struct trace_export *export)
 386{
 387        if (WARN_ON_ONCE(!export->write))
 388                return -1;
 389
 390        mutex_lock(&ftrace_export_lock);
 391
 392        add_ftrace_export(&ftrace_exports_list, export);
 393
 394        mutex_unlock(&ftrace_export_lock);
 395
 396        return 0;
 397}
 398EXPORT_SYMBOL_GPL(register_ftrace_export);
 399
 400int unregister_ftrace_export(struct trace_export *export)
 401{
 402        int ret;
 403
 404        mutex_lock(&ftrace_export_lock);
 405
 406        ret = rm_ftrace_export(&ftrace_exports_list, export);
 407
 408        mutex_unlock(&ftrace_export_lock);
 409
 410        return ret;
 411}
 412EXPORT_SYMBOL_GPL(unregister_ftrace_export);
 413
 414/* trace_flags holds trace_options default values */
 415#define TRACE_DEFAULT_FLAGS                                             \
 416        (FUNCTION_DEFAULT_FLAGS |                                       \
 417         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 418         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 419         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 420         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
 421         TRACE_ITER_HASH_PTR)
 422
 423/* trace_options that are only supported by global_trace */
 424#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 425               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 426
 427/* trace_flags that are default zero for instances */
 428#define ZEROED_TRACE_FLAGS \
 429        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 430
 431/*
 432 * The global_trace is the descriptor that holds the top-level tracing
 433 * buffers for the live tracing.
 434 */
 435static struct trace_array global_trace = {
 436        .trace_flags = TRACE_DEFAULT_FLAGS,
 437};
 438
 439LIST_HEAD(ftrace_trace_arrays);
 440
 441int trace_array_get(struct trace_array *this_tr)
 442{
 443        struct trace_array *tr;
 444        int ret = -ENODEV;
 445
 446        mutex_lock(&trace_types_lock);
 447        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 448                if (tr == this_tr) {
 449                        tr->ref++;
 450                        ret = 0;
 451                        break;
 452                }
 453        }
 454        mutex_unlock(&trace_types_lock);
 455
 456        return ret;
 457}
 458
 459static void __trace_array_put(struct trace_array *this_tr)
 460{
 461        WARN_ON(!this_tr->ref);
 462        this_tr->ref--;
 463}
 464
 465/**
 466 * trace_array_put - Decrement the reference counter for this trace array.
 467 * @this_tr : pointer to the trace array
 468 *
 469 * NOTE: Use this when we no longer need the trace array returned by
 470 * trace_array_get_by_name(). This ensures the trace array can be later
 471 * destroyed.
 472 *
 473 */
 474void trace_array_put(struct trace_array *this_tr)
 475{
 476        if (!this_tr)
 477                return;
 478
 479        mutex_lock(&trace_types_lock);
 480        __trace_array_put(this_tr);
 481        mutex_unlock(&trace_types_lock);
 482}
 483EXPORT_SYMBOL_GPL(trace_array_put);
 484
 485int tracing_check_open_get_tr(struct trace_array *tr)
 486{
 487        int ret;
 488
 489        ret = security_locked_down(LOCKDOWN_TRACEFS);
 490        if (ret)
 491                return ret;
 492
 493        if (tracing_disabled)
 494                return -ENODEV;
 495
 496        if (tr && trace_array_get(tr) < 0)
 497                return -ENODEV;
 498
 499        return 0;
 500}
 501
 502int call_filter_check_discard(struct trace_event_call *call, void *rec,
 503                              struct trace_buffer *buffer,
 504                              struct ring_buffer_event *event)
 505{
 506        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 507            !filter_match_preds(call->filter, rec)) {
 508                __trace_event_discard_commit(buffer, event);
 509                return 1;
 510        }
 511
 512        return 0;
 513}
 514
 515void trace_free_pid_list(struct trace_pid_list *pid_list)
 516{
 517        vfree(pid_list->pids);
 518        kfree(pid_list);
 519}
 520
 521/**
 522 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 523 * @filtered_pids: The list of pids to check
 524 * @search_pid: The PID to find in @filtered_pids
 525 *
 526 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
 527 */
 528bool
 529trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 530{
 531        /*
 532         * If pid_max changed after filtered_pids was created, we
 533         * by default ignore all pids greater than the previous pid_max.
 534         */
 535        if (search_pid >= filtered_pids->pid_max)
 536                return false;
 537
 538        return test_bit(search_pid, filtered_pids->pids);
 539}
 540
 541/**
 542 * trace_ignore_this_task - should a task be ignored for tracing
 543 * @filtered_pids: The list of pids to check
 544 * @filtered_no_pids: The list of pids not to be traced
 545 * @task: The task that should be ignored if not filtered
 546 *
 547 * Checks if @task should be traced or not from @filtered_pids.
 548 * Returns true if @task should *NOT* be traced.
 549 * Returns false if @task should be traced.
 550 */
 551bool
 552trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 553                       struct trace_pid_list *filtered_no_pids,
 554                       struct task_struct *task)
 555{
 556        /*
 557         * If filtered_no_pids is not empty, and the task's pid is listed
 558         * in filtered_no_pids, then return true.
 559         * Otherwise, if filtered_pids is empty, that means we can
 560         * trace all tasks. If it has content, then only trace pids
 561         * within filtered_pids.
 562         */
 563
 564        return (filtered_pids &&
 565                !trace_find_filtered_pid(filtered_pids, task->pid)) ||
 566                (filtered_no_pids &&
 567                 trace_find_filtered_pid(filtered_no_pids, task->pid));
 568}
 569
 570/**
 571 * trace_filter_add_remove_task - Add or remove a task from a pid_list
 572 * @pid_list: The list to modify
 573 * @self: The current task for fork or NULL for exit
 574 * @task: The task to add or remove
 575 *
 576 * If adding a task, if @self is defined, the task is only added if @self
 577 * is also included in @pid_list. This happens on fork and tasks should
 578 * only be added when the parent is listed. If @self is NULL, then the
 579 * @task pid will be removed from the list, which would happen on exit
 580 * of a task.
 581 */
 582void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 583                                  struct task_struct *self,
 584                                  struct task_struct *task)
 585{
 586        if (!pid_list)
 587                return;
 588
 589        /* For forks, we only add if the forking task is listed */
 590        if (self) {
 591                if (!trace_find_filtered_pid(pid_list, self->pid))
 592                        return;
 593        }
 594
 595        /* Sorry, but we don't support pid_max changing after setting */
 596        if (task->pid >= pid_list->pid_max)
 597                return;
 598
 599        /* "self" is set for forks, and NULL for exits */
 600        if (self)
 601                set_bit(task->pid, pid_list->pids);
 602        else
 603                clear_bit(task->pid, pid_list->pids);
 604}
 605
 606/**
 607 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 608 * @pid_list: The pid list to show
 609 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 610 * @pos: The position of the file
 611 *
 612 * This is used by the seq_file "next" operation to iterate the pids
 613 * listed in a trace_pid_list structure.
 614 *
 615 * Returns the pid+1 as we want to display pid of zero, but NULL would
 616 * stop the iteration.
 617 */
 618void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 619{
 620        unsigned long pid = (unsigned long)v;
 621
 622        (*pos)++;
 623
 624        /* pid already is +1 of the actual previous bit */
 625        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 626
 627        /* Return pid + 1 to allow zero to be represented */
 628        if (pid < pid_list->pid_max)
 629                return (void *)(pid + 1);
 630
 631        return NULL;
 632}
 633
 634/**
 635 * trace_pid_start - Used for seq_file to start reading pid lists
 636 * @pid_list: The pid list to show
 637 * @pos: The position of the file
 638 *
 639 * This is used by seq_file "start" operation to start the iteration
 640 * of listing pids.
 641 *
 642 * Returns the pid+1 as we want to display pid of zero, but NULL would
 643 * stop the iteration.
 644 */
 645void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 646{
 647        unsigned long pid;
 648        loff_t l = 0;
 649
 650        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 651        if (pid >= pid_list->pid_max)
 652                return NULL;
 653
 654        /* Return pid + 1 so that zero can be the exit value */
 655        for (pid++; pid && l < *pos;
 656             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 657                ;
 658        return (void *)pid;
 659}
 660
 661/**
 662 * trace_pid_show - show the current pid in seq_file processing
 663 * @m: The seq_file structure to write into
 664 * @v: A void pointer of the pid (+1) value to display
 665 *
 666 * Can be directly used by seq_file operations to display the current
 667 * pid value.
 668 */
 669int trace_pid_show(struct seq_file *m, void *v)
 670{
 671        unsigned long pid = (unsigned long)v - 1;
 672
 673        seq_printf(m, "%lu\n", pid);
 674        return 0;
 675}
 676
 677/* 128 should be much more than enough */
 678#define PID_BUF_SIZE            127
 679
 680int trace_pid_write(struct trace_pid_list *filtered_pids,
 681                    struct trace_pid_list **new_pid_list,
 682                    const char __user *ubuf, size_t cnt)
 683{
 684        struct trace_pid_list *pid_list;
 685        struct trace_parser parser;
 686        unsigned long val;
 687        int nr_pids = 0;
 688        ssize_t read = 0;
 689        ssize_t ret = 0;
 690        loff_t pos;
 691        pid_t pid;
 692
 693        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 694                return -ENOMEM;
 695
 696        /*
 697         * Always recreate a new array. The write is an all or nothing
 698         * operation. Always create a new array when adding new pids by
 699         * the user. If the operation fails, then the current list is
 700         * not modified.
 701         */
 702        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 703        if (!pid_list) {
 704                trace_parser_put(&parser);
 705                return -ENOMEM;
 706        }
 707
 708        pid_list->pid_max = READ_ONCE(pid_max);
 709
 710        /* Only truncating will shrink pid_max */
 711        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 712                pid_list->pid_max = filtered_pids->pid_max;
 713
 714        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 715        if (!pid_list->pids) {
 716                trace_parser_put(&parser);
 717                kfree(pid_list);
 718                return -ENOMEM;
 719        }
 720
 721        if (filtered_pids) {
 722                /* copy the current bits to the new max */
 723                for_each_set_bit(pid, filtered_pids->pids,
 724                                 filtered_pids->pid_max) {
 725                        set_bit(pid, pid_list->pids);
 726                        nr_pids++;
 727                }
 728        }
 729
 730        while (cnt > 0) {
 731
 732                pos = 0;
 733
 734                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 735                if (ret < 0 || !trace_parser_loaded(&parser))
 736                        break;
 737
 738                read += ret;
 739                ubuf += ret;
 740                cnt -= ret;
 741
 742                ret = -EINVAL;
 743                if (kstrtoul(parser.buffer, 0, &val))
 744                        break;
 745                if (val >= pid_list->pid_max)
 746                        break;
 747
 748                pid = (pid_t)val;
 749
 750                set_bit(pid, pid_list->pids);
 751                nr_pids++;
 752
 753                trace_parser_clear(&parser);
 754                ret = 0;
 755        }
 756        trace_parser_put(&parser);
 757
 758        if (ret < 0) {
 759                trace_free_pid_list(pid_list);
 760                return ret;
 761        }
 762
 763        if (!nr_pids) {
 764                /* Cleared the list of pids */
 765                trace_free_pid_list(pid_list);
 766                read = ret;
 767                pid_list = NULL;
 768        }
 769
 770        *new_pid_list = pid_list;
 771
 772        return read;
 773}
 774
 775static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 776{
 777        u64 ts;
 778
 779        /* Early boot up does not have a buffer yet */
 780        if (!buf->buffer)
 781                return trace_clock_local();
 782
 783        ts = ring_buffer_time_stamp(buf->buffer);
 784        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 785
 786        return ts;
 787}
 788
 789u64 ftrace_now(int cpu)
 790{
 791        return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 792}
 793
 794/**
 795 * tracing_is_enabled - Show if global_trace has been enabled
 796 *
 797 * Shows if the global trace has been enabled or not. It uses the
 798 * mirror flag "buffer_disabled" to be used in fast paths such as for
 799 * the irqsoff tracer. But it may be inaccurate due to races. If you
 800 * need to know the accurate state, use tracing_is_on() which is a little
 801 * slower, but accurate.
 802 */
 803int tracing_is_enabled(void)
 804{
 805        /*
 806         * For quick access (irqsoff uses this in fast path), just
 807         * return the mirror variable of the state of the ring buffer.
 808         * It's a little racy, but we don't really care.
 809         */
 810        smp_rmb();
 811        return !global_trace.buffer_disabled;
 812}
 813
 814/*
 815 * trace_buf_size is the size in bytes that is allocated
 816 * for a buffer. Note, the number of bytes is always rounded
 817 * to page size.
 818 *
 819 * This number is purposely set to a low number of 16384.
 820 * If the dump on oops happens, it will be much appreciated
 821 * to not have to wait for all that output. Anyway this can be
 822 * boot time and run time configurable.
 823 */
 824#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 825
 826static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 827
 828/* trace_types holds a link list of available tracers. */
 829static struct tracer            *trace_types __read_mostly;
 830
 831/*
 832 * trace_types_lock is used to protect the trace_types list.
 833 */
 834DEFINE_MUTEX(trace_types_lock);
 835
 836/*
 837 * serialize the access of the ring buffer
 838 *
 839 * ring buffer serializes readers, but it is low level protection.
 840 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 841 * are not protected by ring buffer.
 842 *
 843 * The content of events may become garbage if we allow other process consumes
 844 * these events concurrently:
 845 *   A) the page of the consumed events may become a normal page
 846 *      (not reader page) in ring buffer, and this page will be rewritten
 847 *      by events producer.
 848 *   B) The page of the consumed events may become a page for splice_read,
 849 *      and this page will be returned to system.
 850 *
 851 * These primitives allow multi process access to different cpu ring buffer
 852 * concurrently.
 853 *
 854 * These primitives don't distinguish read-only and read-consume access.
 855 * Multi read-only access are also serialized.
 856 */
 857
 858#ifdef CONFIG_SMP
 859static DECLARE_RWSEM(all_cpu_access_lock);
 860static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 861
 862static inline void trace_access_lock(int cpu)
 863{
 864        if (cpu == RING_BUFFER_ALL_CPUS) {
 865                /* gain it for accessing the whole ring buffer. */
 866                down_write(&all_cpu_access_lock);
 867        } else {
 868                /* gain it for accessing a cpu ring buffer. */
 869
 870                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 871                down_read(&all_cpu_access_lock);
 872
 873                /* Secondly block other access to this @cpu ring buffer. */
 874                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 875        }
 876}
 877
 878static inline void trace_access_unlock(int cpu)
 879{
 880        if (cpu == RING_BUFFER_ALL_CPUS) {
 881                up_write(&all_cpu_access_lock);
 882        } else {
 883                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 884                up_read(&all_cpu_access_lock);
 885        }
 886}
 887
 888static inline void trace_access_lock_init(void)
 889{
 890        int cpu;
 891
 892        for_each_possible_cpu(cpu)
 893                mutex_init(&per_cpu(cpu_access_lock, cpu));
 894}
 895
 896#else
 897
 898static DEFINE_MUTEX(access_lock);
 899
 900static inline void trace_access_lock(int cpu)
 901{
 902        (void)cpu;
 903        mutex_lock(&access_lock);
 904}
 905
 906static inline void trace_access_unlock(int cpu)
 907{
 908        (void)cpu;
 909        mutex_unlock(&access_lock);
 910}
 911
 912static inline void trace_access_lock_init(void)
 913{
 914}
 915
 916#endif
 917
 918#ifdef CONFIG_STACKTRACE
 919static void __ftrace_trace_stack(struct trace_buffer *buffer,
 920                                 unsigned int trace_ctx,
 921                                 int skip, struct pt_regs *regs);
 922static inline void ftrace_trace_stack(struct trace_array *tr,
 923                                      struct trace_buffer *buffer,
 924                                      unsigned int trace_ctx,
 925                                      int skip, struct pt_regs *regs);
 926
 927#else
 928static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 929                                        unsigned int trace_ctx,
 930                                        int skip, struct pt_regs *regs)
 931{
 932}
 933static inline void ftrace_trace_stack(struct trace_array *tr,
 934                                      struct trace_buffer *buffer,
 935                                      unsigned long trace_ctx,
 936                                      int skip, struct pt_regs *regs)
 937{
 938}
 939
 940#endif
 941
 942static __always_inline void
 943trace_event_setup(struct ring_buffer_event *event,
 944                  int type, unsigned int trace_ctx)
 945{
 946        struct trace_entry *ent = ring_buffer_event_data(event);
 947
 948        tracing_generic_entry_update(ent, type, trace_ctx);
 949}
 950
 951static __always_inline struct ring_buffer_event *
 952__trace_buffer_lock_reserve(struct trace_buffer *buffer,
 953                          int type,
 954                          unsigned long len,
 955                          unsigned int trace_ctx)
 956{
 957        struct ring_buffer_event *event;
 958
 959        event = ring_buffer_lock_reserve(buffer, len);
 960        if (event != NULL)
 961                trace_event_setup(event, type, trace_ctx);
 962
 963        return event;
 964}
 965
 966void tracer_tracing_on(struct trace_array *tr)
 967{
 968        if (tr->array_buffer.buffer)
 969                ring_buffer_record_on(tr->array_buffer.buffer);
 970        /*
 971         * This flag is looked at when buffers haven't been allocated
 972         * yet, or by some tracers (like irqsoff), that just want to
 973         * know if the ring buffer has been disabled, but it can handle
 974         * races of where it gets disabled but we still do a record.
 975         * As the check is in the fast path of the tracers, it is more
 976         * important to be fast than accurate.
 977         */
 978        tr->buffer_disabled = 0;
 979        /* Make the flag seen by readers */
 980        smp_wmb();
 981}
 982
 983/**
 984 * tracing_on - enable tracing buffers
 985 *
 986 * This function enables tracing buffers that may have been
 987 * disabled with tracing_off.
 988 */
 989void tracing_on(void)
 990{
 991        tracer_tracing_on(&global_trace);
 992}
 993EXPORT_SYMBOL_GPL(tracing_on);
 994
 995
 996static __always_inline void
 997__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 998{
 999        __this_cpu_write(trace_taskinfo_save, true);
1000
1001        /* If this is the temp buffer, we need to commit fully */
1002        if (this_cpu_read(trace_buffered_event) == event) {
1003                /* Length is in event->array[0] */
1004                ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                /* Release the temp buffer */
1006                this_cpu_dec(trace_buffered_event_cnt);
1007        } else
1008                ring_buffer_unlock_commit(buffer, event);
1009}
1010
1011/**
1012 * __trace_puts - write a constant string into the trace buffer.
1013 * @ip:    The address of the caller
1014 * @str:   The constant string to write
1015 * @size:  The size of the string.
1016 */
1017int __trace_puts(unsigned long ip, const char *str, int size)
1018{
1019        struct ring_buffer_event *event;
1020        struct trace_buffer *buffer;
1021        struct print_entry *entry;
1022        unsigned int trace_ctx;
1023        int alloc;
1024
1025        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                return 0;
1027
1028        if (unlikely(tracing_selftest_running || tracing_disabled))
1029                return 0;
1030
1031        alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033        trace_ctx = tracing_gen_ctx();
1034        buffer = global_trace.array_buffer.buffer;
1035        ring_buffer_nest_start(buffer);
1036        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                            trace_ctx);
1038        if (!event) {
1039                size = 0;
1040                goto out;
1041        }
1042
1043        entry = ring_buffer_event_data(event);
1044        entry->ip = ip;
1045
1046        memcpy(&entry->buf, str, size);
1047
1048        /* Add a newline if necessary */
1049        if (entry->buf[size - 1] != '\n') {
1050                entry->buf[size] = '\n';
1051                entry->buf[size + 1] = '\0';
1052        } else
1053                entry->buf[size] = '\0';
1054
1055        __buffer_unlock_commit(buffer, event);
1056        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057 out:
1058        ring_buffer_nest_end(buffer);
1059        return size;
1060}
1061EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063/**
1064 * __trace_bputs - write the pointer to a constant string into trace buffer
1065 * @ip:    The address of the caller
1066 * @str:   The constant string to write to the buffer to
1067 */
1068int __trace_bputs(unsigned long ip, const char *str)
1069{
1070        struct ring_buffer_event *event;
1071        struct trace_buffer *buffer;
1072        struct bputs_entry *entry;
1073        unsigned int trace_ctx;
1074        int size = sizeof(struct bputs_entry);
1075        int ret = 0;
1076
1077        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                return 0;
1079
1080        if (unlikely(tracing_selftest_running || tracing_disabled))
1081                return 0;
1082
1083        trace_ctx = tracing_gen_ctx();
1084        buffer = global_trace.array_buffer.buffer;
1085
1086        ring_buffer_nest_start(buffer);
1087        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                            trace_ctx);
1089        if (!event)
1090                goto out;
1091
1092        entry = ring_buffer_event_data(event);
1093        entry->ip                       = ip;
1094        entry->str                      = str;
1095
1096        __buffer_unlock_commit(buffer, event);
1097        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099        ret = 1;
1100 out:
1101        ring_buffer_nest_end(buffer);
1102        return ret;
1103}
1104EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106#ifdef CONFIG_TRACER_SNAPSHOT
1107static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                           void *cond_data)
1109{
1110        struct tracer *tracer = tr->current_trace;
1111        unsigned long flags;
1112
1113        if (in_nmi()) {
1114                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                return;
1117        }
1118
1119        if (!tr->allocated_snapshot) {
1120                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                internal_trace_puts("*** stopping trace here!   ***\n");
1122                tracing_off();
1123                return;
1124        }
1125
1126        /* Note, snapshot can not be used when the tracer uses it */
1127        if (tracer->use_max_tr) {
1128                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                return;
1131        }
1132
1133        local_irq_save(flags);
1134        update_max_tr(tr, current, smp_processor_id(), cond_data);
1135        local_irq_restore(flags);
1136}
1137
1138void tracing_snapshot_instance(struct trace_array *tr)
1139{
1140        tracing_snapshot_instance_cond(tr, NULL);
1141}
1142
1143/**
1144 * tracing_snapshot - take a snapshot of the current buffer.
1145 *
1146 * This causes a swap between the snapshot buffer and the current live
1147 * tracing buffer. You can use this to take snapshots of the live
1148 * trace when some condition is triggered, but continue to trace.
1149 *
1150 * Note, make sure to allocate the snapshot with either
1151 * a tracing_snapshot_alloc(), or by doing it manually
1152 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153 *
1154 * If the snapshot buffer is not allocated, it will stop tracing.
1155 * Basically making a permanent snapshot.
1156 */
1157void tracing_snapshot(void)
1158{
1159        struct trace_array *tr = &global_trace;
1160
1161        tracing_snapshot_instance(tr);
1162}
1163EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165/**
1166 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167 * @tr:         The tracing instance to snapshot
1168 * @cond_data:  The data to be tested conditionally, and possibly saved
1169 *
1170 * This is the same as tracing_snapshot() except that the snapshot is
1171 * conditional - the snapshot will only happen if the
1172 * cond_snapshot.update() implementation receiving the cond_data
1173 * returns true, which means that the trace array's cond_snapshot
1174 * update() operation used the cond_data to determine whether the
1175 * snapshot should be taken, and if it was, presumably saved it along
1176 * with the snapshot.
1177 */
1178void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179{
1180        tracing_snapshot_instance_cond(tr, cond_data);
1181}
1182EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184/**
1185 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186 * @tr:         The tracing instance
1187 *
1188 * When the user enables a conditional snapshot using
1189 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190 * with the snapshot.  This accessor is used to retrieve it.
1191 *
1192 * Should not be called from cond_snapshot.update(), since it takes
1193 * the tr->max_lock lock, which the code calling
1194 * cond_snapshot.update() has already done.
1195 *
1196 * Returns the cond_data associated with the trace array's snapshot.
1197 */
1198void *tracing_cond_snapshot_data(struct trace_array *tr)
1199{
1200        void *cond_data = NULL;
1201
1202        arch_spin_lock(&tr->max_lock);
1203
1204        if (tr->cond_snapshot)
1205                cond_data = tr->cond_snapshot->cond_data;
1206
1207        arch_spin_unlock(&tr->max_lock);
1208
1209        return cond_data;
1210}
1211EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                        struct array_buffer *size_buf, int cpu_id);
1215static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218{
1219        int ret;
1220
1221        if (!tr->allocated_snapshot) {
1222
1223                /* allocate spare buffer */
1224                ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                if (ret < 0)
1227                        return ret;
1228
1229                tr->allocated_snapshot = true;
1230        }
1231
1232        return 0;
1233}
1234
1235static void free_snapshot(struct trace_array *tr)
1236{
1237        /*
1238         * We don't free the ring buffer. instead, resize it because
1239         * The max_tr ring buffer has some state (e.g. ring->clock) and
1240         * we want preserve it.
1241         */
1242        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243        set_buffer_entries(&tr->max_buffer, 1);
1244        tracing_reset_online_cpus(&tr->max_buffer);
1245        tr->allocated_snapshot = false;
1246}
1247
1248/**
1249 * tracing_alloc_snapshot - allocate snapshot buffer.
1250 *
1251 * This only allocates the snapshot buffer if it isn't already
1252 * allocated - it doesn't also take a snapshot.
1253 *
1254 * This is meant to be used in cases where the snapshot buffer needs
1255 * to be set up for events that can't sleep but need to be able to
1256 * trigger a snapshot.
1257 */
1258int tracing_alloc_snapshot(void)
1259{
1260        struct trace_array *tr = &global_trace;
1261        int ret;
1262
1263        ret = tracing_alloc_snapshot_instance(tr);
1264        WARN_ON(ret < 0);
1265
1266        return ret;
1267}
1268EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270/**
1271 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272 *
1273 * This is similar to tracing_snapshot(), but it will allocate the
1274 * snapshot buffer if it isn't already allocated. Use this only
1275 * where it is safe to sleep, as the allocation may sleep.
1276 *
1277 * This causes a swap between the snapshot buffer and the current live
1278 * tracing buffer. You can use this to take snapshots of the live
1279 * trace when some condition is triggered, but continue to trace.
1280 */
1281void tracing_snapshot_alloc(void)
1282{
1283        int ret;
1284
1285        ret = tracing_alloc_snapshot();
1286        if (ret < 0)
1287                return;
1288
1289        tracing_snapshot();
1290}
1291EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293/**
1294 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295 * @tr:         The tracing instance
1296 * @cond_data:  User data to associate with the snapshot
1297 * @update:     Implementation of the cond_snapshot update function
1298 *
1299 * Check whether the conditional snapshot for the given instance has
1300 * already been enabled, or if the current tracer is already using a
1301 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302 * save the cond_data and update function inside.
1303 *
1304 * Returns 0 if successful, error otherwise.
1305 */
1306int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                 cond_update_fn_t update)
1308{
1309        struct cond_snapshot *cond_snapshot;
1310        int ret = 0;
1311
1312        cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313        if (!cond_snapshot)
1314                return -ENOMEM;
1315
1316        cond_snapshot->cond_data = cond_data;
1317        cond_snapshot->update = update;
1318
1319        mutex_lock(&trace_types_lock);
1320
1321        ret = tracing_alloc_snapshot_instance(tr);
1322        if (ret)
1323                goto fail_unlock;
1324
1325        if (tr->current_trace->use_max_tr) {
1326                ret = -EBUSY;
1327                goto fail_unlock;
1328        }
1329
1330        /*
1331         * The cond_snapshot can only change to NULL without the
1332         * trace_types_lock. We don't care if we race with it going
1333         * to NULL, but we want to make sure that it's not set to
1334         * something other than NULL when we get here, which we can
1335         * do safely with only holding the trace_types_lock and not
1336         * having to take the max_lock.
1337         */
1338        if (tr->cond_snapshot) {
1339                ret = -EBUSY;
1340                goto fail_unlock;
1341        }
1342
1343        arch_spin_lock(&tr->max_lock);
1344        tr->cond_snapshot = cond_snapshot;
1345        arch_spin_unlock(&tr->max_lock);
1346
1347        mutex_unlock(&trace_types_lock);
1348
1349        return ret;
1350
1351 fail_unlock:
1352        mutex_unlock(&trace_types_lock);
1353        kfree(cond_snapshot);
1354        return ret;
1355}
1356EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358/**
1359 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360 * @tr:         The tracing instance
1361 *
1362 * Check whether the conditional snapshot for the given instance is
1363 * enabled; if so, free the cond_snapshot associated with it,
1364 * otherwise return -EINVAL.
1365 *
1366 * Returns 0 if successful, error otherwise.
1367 */
1368int tracing_snapshot_cond_disable(struct trace_array *tr)
1369{
1370        int ret = 0;
1371
1372        arch_spin_lock(&tr->max_lock);
1373
1374        if (!tr->cond_snapshot)
1375                ret = -EINVAL;
1376        else {
1377                kfree(tr->cond_snapshot);
1378                tr->cond_snapshot = NULL;
1379        }
1380
1381        arch_spin_unlock(&tr->max_lock);
1382
1383        return ret;
1384}
1385EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386#else
1387void tracing_snapshot(void)
1388{
1389        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390}
1391EXPORT_SYMBOL_GPL(tracing_snapshot);
1392void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393{
1394        WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395}
1396EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397int tracing_alloc_snapshot(void)
1398{
1399        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400        return -ENODEV;
1401}
1402EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403void tracing_snapshot_alloc(void)
1404{
1405        /* Give warning */
1406        tracing_snapshot();
1407}
1408EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409void *tracing_cond_snapshot_data(struct trace_array *tr)
1410{
1411        return NULL;
1412}
1413EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415{
1416        return -ENODEV;
1417}
1418EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419int tracing_snapshot_cond_disable(struct trace_array *tr)
1420{
1421        return false;
1422}
1423EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424#endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426void tracer_tracing_off(struct trace_array *tr)
1427{
1428        if (tr->array_buffer.buffer)
1429                ring_buffer_record_off(tr->array_buffer.buffer);
1430        /*
1431         * This flag is looked at when buffers haven't been allocated
1432         * yet, or by some tracers (like irqsoff), that just want to
1433         * know if the ring buffer has been disabled, but it can handle
1434         * races of where it gets disabled but we still do a record.
1435         * As the check is in the fast path of the tracers, it is more
1436         * important to be fast than accurate.
1437         */
1438        tr->buffer_disabled = 1;
1439        /* Make the flag seen by readers */
1440        smp_wmb();
1441}
1442
1443/**
1444 * tracing_off - turn off tracing buffers
1445 *
1446 * This function stops the tracing buffers from recording data.
1447 * It does not disable any overhead the tracers themselves may
1448 * be causing. This function simply causes all recording to
1449 * the ring buffers to fail.
1450 */
1451void tracing_off(void)
1452{
1453        tracer_tracing_off(&global_trace);
1454}
1455EXPORT_SYMBOL_GPL(tracing_off);
1456
1457void disable_trace_on_warning(void)
1458{
1459        if (__disable_trace_on_warning) {
1460                trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                        "Disabling tracing due to warning\n");
1462                tracing_off();
1463        }
1464}
1465
1466/**
1467 * tracer_tracing_is_on - show real state of ring buffer enabled
1468 * @tr : the trace array to know if ring buffer is enabled
1469 *
1470 * Shows real state of the ring buffer if it is enabled or not.
1471 */
1472bool tracer_tracing_is_on(struct trace_array *tr)
1473{
1474        if (tr->array_buffer.buffer)
1475                return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476        return !tr->buffer_disabled;
1477}
1478
1479/**
1480 * tracing_is_on - show state of ring buffers enabled
1481 */
1482int tracing_is_on(void)
1483{
1484        return tracer_tracing_is_on(&global_trace);
1485}
1486EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488static int __init set_buf_size(char *str)
1489{
1490        unsigned long buf_size;
1491
1492        if (!str)
1493                return 0;
1494        buf_size = memparse(str, &str);
1495        /* nr_entries can not be zero */
1496        if (buf_size == 0)
1497                return 0;
1498        trace_buf_size = buf_size;
1499        return 1;
1500}
1501__setup("trace_buf_size=", set_buf_size);
1502
1503static int __init set_tracing_thresh(char *str)
1504{
1505        unsigned long threshold;
1506        int ret;
1507
1508        if (!str)
1509                return 0;
1510        ret = kstrtoul(str, 0, &threshold);
1511        if (ret < 0)
1512                return 0;
1513        tracing_thresh = threshold * 1000;
1514        return 1;
1515}
1516__setup("tracing_thresh=", set_tracing_thresh);
1517
1518unsigned long nsecs_to_usecs(unsigned long nsecs)
1519{
1520        return nsecs / 1000;
1521}
1522
1523/*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529#undef C
1530#define C(a, b) b
1531
1532/* These must match the bit positions in trace_iterator_flags */
1533static const char *trace_options[] = {
1534        TRACE_FLAGS
1535        NULL
1536};
1537
1538static struct {
1539        u64 (*func)(void);
1540        const char *name;
1541        int in_ns;              /* is this clock in nanoseconds? */
1542} trace_clocks[] = {
1543        { trace_clock_local,            "local",        1 },
1544        { trace_clock_global,           "global",       1 },
1545        { trace_clock_counter,          "counter",      0 },
1546        { trace_clock_jiffies,          "uptime",       0 },
1547        { trace_clock,                  "perf",         1 },
1548        { ktime_get_mono_fast_ns,       "mono",         1 },
1549        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550        { ktime_get_boot_fast_ns,       "boot",         1 },
1551        ARCH_TRACE_CLOCKS
1552};
1553
1554bool trace_clock_in_ns(struct trace_array *tr)
1555{
1556        if (trace_clocks[tr->clock_id].in_ns)
1557                return true;
1558
1559        return false;
1560}
1561
1562/*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
1565int trace_parser_get_init(struct trace_parser *parser, int size)
1566{
1567        memset(parser, 0, sizeof(*parser));
1568
1569        parser->buffer = kmalloc(size, GFP_KERNEL);
1570        if (!parser->buffer)
1571                return 1;
1572
1573        parser->size = size;
1574        return 0;
1575}
1576
1577/*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
1580void trace_parser_put(struct trace_parser *parser)
1581{
1582        kfree(parser->buffer);
1583        parser->buffer = NULL;
1584}
1585
1586/*
1587 * trace_get_user - reads the user input string separated by  space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
1597int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598        size_t cnt, loff_t *ppos)
1599{
1600        char ch;
1601        size_t read = 0;
1602        ssize_t ret;
1603
1604        if (!*ppos)
1605                trace_parser_clear(parser);
1606
1607        ret = get_user(ch, ubuf++);
1608        if (ret)
1609                goto out;
1610
1611        read++;
1612        cnt--;
1613
1614        /*
1615         * The parser is not finished with the last write,
1616         * continue reading the user input without skipping spaces.
1617         */
1618        if (!parser->cont) {
1619                /* skip white space */
1620                while (cnt && isspace(ch)) {
1621                        ret = get_user(ch, ubuf++);
1622                        if (ret)
1623                                goto out;
1624                        read++;
1625                        cnt--;
1626                }
1627
1628                parser->idx = 0;
1629
1630                /* only spaces were written */
1631                if (isspace(ch) || !ch) {
1632                        *ppos += read;
1633                        ret = read;
1634                        goto out;
1635                }
1636        }
1637
1638        /* read the non-space input */
1639        while (cnt && !isspace(ch) && ch) {
1640                if (parser->idx < parser->size - 1)
1641                        parser->buffer[parser->idx++] = ch;
1642                else {
1643                        ret = -EINVAL;
1644                        goto out;
1645                }
1646                ret = get_user(ch, ubuf++);
1647                if (ret)
1648                        goto out;
1649                read++;
1650                cnt--;
1651        }
1652
1653        /* We either got finished input or we have to wait for another call. */
1654        if (isspace(ch) || !ch) {
1655                parser->buffer[parser->idx] = 0;
1656                parser->cont = false;
1657        } else if (parser->idx < parser->size - 1) {
1658                parser->cont = true;
1659                parser->buffer[parser->idx++] = ch;
1660                /* Make sure the parsed string always terminates with '\0'. */
1661                parser->buffer[parser->idx] = 0;
1662        } else {
1663                ret = -EINVAL;
1664                goto out;
1665        }
1666
1667        *ppos += read;
1668        ret = read;
1669
1670out:
1671        return ret;
1672}
1673
1674/* TODO add a seq_buf_to_buffer() */
1675static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676{
1677        int len;
1678
1679        if (trace_seq_used(s) <= s->seq.readpos)
1680                return -EBUSY;
1681
1682        len = trace_seq_used(s) - s->seq.readpos;
1683        if (cnt > len)
1684                cnt = len;
1685        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687        s->seq.readpos += cnt;
1688        return cnt;
1689}
1690
1691unsigned long __read_mostly     tracing_thresh;
1692static const struct file_operations tracing_max_lat_fops;
1693
1694#ifdef LATENCY_FS_NOTIFY
1695
1696static struct workqueue_struct *fsnotify_wq;
1697
1698static void latency_fsnotify_workfn(struct work_struct *work)
1699{
1700        struct trace_array *tr = container_of(work, struct trace_array,
1701                                              fsnotify_work);
1702        fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703}
1704
1705static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706{
1707        struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                              fsnotify_irqwork);
1709        queue_work(fsnotify_wq, &tr->fsnotify_work);
1710}
1711
1712static void trace_create_maxlat_file(struct trace_array *tr,
1713                                     struct dentry *d_tracer)
1714{
1715        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717        tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                              d_tracer, &tr->max_latency,
1719                                              &tracing_max_lat_fops);
1720}
1721
1722__init static int latency_fsnotify_init(void)
1723{
1724        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726        if (!fsnotify_wq) {
1727                pr_err("Unable to allocate tr_max_lat_wq\n");
1728                return -ENOMEM;
1729        }
1730        return 0;
1731}
1732
1733late_initcall_sync(latency_fsnotify_init);
1734
1735void latency_fsnotify(struct trace_array *tr)
1736{
1737        if (!fsnotify_wq)
1738                return;
1739        /*
1740         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741         * possible that we are called from __schedule() or do_idle(), which
1742         * could cause a deadlock.
1743         */
1744        irq_work_queue(&tr->fsnotify_irqwork);
1745}
1746
1747#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1748        || defined(CONFIG_OSNOISE_TRACER)
1749
1750#define trace_create_maxlat_file(tr, d_tracer)                          \
1751        trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1752                          &tr->max_latency, &tracing_max_lat_fops)
1753
1754#else
1755#define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1756#endif
1757
1758#ifdef CONFIG_TRACER_MAX_TRACE
1759/*
1760 * Copy the new maximum trace into the separate maximum-trace
1761 * structure. (this way the maximum trace is permanently saved,
1762 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763 */
1764static void
1765__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766{
1767        struct array_buffer *trace_buf = &tr->array_buffer;
1768        struct array_buffer *max_buf = &tr->max_buffer;
1769        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771
1772        max_buf->cpu = cpu;
1773        max_buf->time_start = data->preempt_timestamp;
1774
1775        max_data->saved_latency = tr->max_latency;
1776        max_data->critical_start = data->critical_start;
1777        max_data->critical_end = data->critical_end;
1778
1779        strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780        max_data->pid = tsk->pid;
1781        /*
1782         * If tsk == current, then use current_uid(), as that does not use
1783         * RCU. The irq tracer can be called out of RCU scope.
1784         */
1785        if (tsk == current)
1786                max_data->uid = current_uid();
1787        else
1788                max_data->uid = task_uid(tsk);
1789
1790        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791        max_data->policy = tsk->policy;
1792        max_data->rt_priority = tsk->rt_priority;
1793
1794        /* record this tasks comm */
1795        tracing_record_cmdline(tsk);
1796        latency_fsnotify(tr);
1797}
1798
1799/**
1800 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801 * @tr: tracer
1802 * @tsk: the task with the latency
1803 * @cpu: The cpu that initiated the trace.
1804 * @cond_data: User data associated with a conditional snapshot
1805 *
1806 * Flip the buffers between the @tr and the max_tr and record information
1807 * about which task was the cause of this latency.
1808 */
1809void
1810update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811              void *cond_data)
1812{
1813        if (tr->stop_count)
1814                return;
1815
1816        WARN_ON_ONCE(!irqs_disabled());
1817
1818        if (!tr->allocated_snapshot) {
1819                /* Only the nop tracer should hit this when disabling */
1820                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821                return;
1822        }
1823
1824        arch_spin_lock(&tr->max_lock);
1825
1826        /* Inherit the recordable setting from array_buffer */
1827        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828                ring_buffer_record_on(tr->max_buffer.buffer);
1829        else
1830                ring_buffer_record_off(tr->max_buffer.buffer);
1831
1832#ifdef CONFIG_TRACER_SNAPSHOT
1833        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834                goto out_unlock;
1835#endif
1836        swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837
1838        __update_max_tr(tr, tsk, cpu);
1839
1840 out_unlock:
1841        arch_spin_unlock(&tr->max_lock);
1842}
1843
1844/**
1845 * update_max_tr_single - only copy one trace over, and reset the rest
1846 * @tr: tracer
1847 * @tsk: task with the latency
1848 * @cpu: the cpu of the buffer to copy.
1849 *
1850 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851 */
1852void
1853update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854{
1855        int ret;
1856
1857        if (tr->stop_count)
1858                return;
1859
1860        WARN_ON_ONCE(!irqs_disabled());
1861        if (!tr->allocated_snapshot) {
1862                /* Only the nop tracer should hit this when disabling */
1863                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864                return;
1865        }
1866
1867        arch_spin_lock(&tr->max_lock);
1868
1869        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870
1871        if (ret == -EBUSY) {
1872                /*
1873                 * We failed to swap the buffer due to a commit taking
1874                 * place on this CPU. We fail to record, but we reset
1875                 * the max trace buffer (no one writes directly to it)
1876                 * and flag that it failed.
1877                 */
1878                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879                        "Failed to swap buffers due to commit in progress\n");
1880        }
1881
1882        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883
1884        __update_max_tr(tr, tsk, cpu);
1885        arch_spin_unlock(&tr->max_lock);
1886}
1887#endif /* CONFIG_TRACER_MAX_TRACE */
1888
1889static int wait_on_pipe(struct trace_iterator *iter, int full)
1890{
1891        /* Iterators are static, they should be filled or empty */
1892        if (trace_buffer_iter(iter, iter->cpu_file))
1893                return 0;
1894
1895        return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896                                full);
1897}
1898
1899#ifdef CONFIG_FTRACE_STARTUP_TEST
1900static bool selftests_can_run;
1901
1902struct trace_selftests {
1903        struct list_head                list;
1904        struct tracer                   *type;
1905};
1906
1907static LIST_HEAD(postponed_selftests);
1908
1909static int save_selftest(struct tracer *type)
1910{
1911        struct trace_selftests *selftest;
1912
1913        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914        if (!selftest)
1915                return -ENOMEM;
1916
1917        selftest->type = type;
1918        list_add(&selftest->list, &postponed_selftests);
1919        return 0;
1920}
1921
1922static int run_tracer_selftest(struct tracer *type)
1923{
1924        struct trace_array *tr = &global_trace;
1925        struct tracer *saved_tracer = tr->current_trace;
1926        int ret;
1927
1928        if (!type->selftest || tracing_selftest_disabled)
1929                return 0;
1930
1931        /*
1932         * If a tracer registers early in boot up (before scheduling is
1933         * initialized and such), then do not run its selftests yet.
1934         * Instead, run it a little later in the boot process.
1935         */
1936        if (!selftests_can_run)
1937                return save_selftest(type);
1938
1939        if (!tracing_is_on()) {
1940                pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941                        type->name);
1942                return 0;
1943        }
1944
1945        /*
1946         * Run a selftest on this tracer.
1947         * Here we reset the trace buffer, and set the current
1948         * tracer to be this tracer. The tracer can then run some
1949         * internal tracing to verify that everything is in order.
1950         * If we fail, we do not register this tracer.
1951         */
1952        tracing_reset_online_cpus(&tr->array_buffer);
1953
1954        tr->current_trace = type;
1955
1956#ifdef CONFIG_TRACER_MAX_TRACE
1957        if (type->use_max_tr) {
1958                /* If we expanded the buffers, make sure the max is expanded too */
1959                if (ring_buffer_expanded)
1960                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961                                           RING_BUFFER_ALL_CPUS);
1962                tr->allocated_snapshot = true;
1963        }
1964#endif
1965
1966        /* the test is responsible for initializing and enabling */
1967        pr_info("Testing tracer %s: ", type->name);
1968        ret = type->selftest(type, tr);
1969        /* the test is responsible for resetting too */
1970        tr->current_trace = saved_tracer;
1971        if (ret) {
1972                printk(KERN_CONT "FAILED!\n");
1973                /* Add the warning after printing 'FAILED' */
1974                WARN_ON(1);
1975                return -1;
1976        }
1977        /* Only reset on passing, to avoid touching corrupted buffers */
1978        tracing_reset_online_cpus(&tr->array_buffer);
1979
1980#ifdef CONFIG_TRACER_MAX_TRACE
1981        if (type->use_max_tr) {
1982                tr->allocated_snapshot = false;
1983
1984                /* Shrink the max buffer again */
1985                if (ring_buffer_expanded)
1986                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1987                                           RING_BUFFER_ALL_CPUS);
1988        }
1989#endif
1990
1991        printk(KERN_CONT "PASSED\n");
1992        return 0;
1993}
1994
1995static __init int init_trace_selftests(void)
1996{
1997        struct trace_selftests *p, *n;
1998        struct tracer *t, **last;
1999        int ret;
2000
2001        selftests_can_run = true;
2002
2003        mutex_lock(&trace_types_lock);
2004
2005        if (list_empty(&postponed_selftests))
2006                goto out;
2007
2008        pr_info("Running postponed tracer tests:\n");
2009
2010        tracing_selftest_running = true;
2011        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012                /* This loop can take minutes when sanitizers are enabled, so
2013                 * lets make sure we allow RCU processing.
2014                 */
2015                cond_resched();
2016                ret = run_tracer_selftest(p->type);
2017                /* If the test fails, then warn and remove from available_tracers */
2018                if (ret < 0) {
2019                        WARN(1, "tracer: %s failed selftest, disabling\n",
2020                             p->type->name);
2021                        last = &trace_types;
2022                        for (t = trace_types; t; t = t->next) {
2023                                if (t == p->type) {
2024                                        *last = t->next;
2025                                        break;
2026                                }
2027                                last = &t->next;
2028                        }
2029                }
2030                list_del(&p->list);
2031                kfree(p);
2032        }
2033        tracing_selftest_running = false;
2034
2035 out:
2036        mutex_unlock(&trace_types_lock);
2037
2038        return 0;
2039}
2040core_initcall(init_trace_selftests);
2041#else
2042static inline int run_tracer_selftest(struct tracer *type)
2043{
2044        return 0;
2045}
2046#endif /* CONFIG_FTRACE_STARTUP_TEST */
2047
2048static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049
2050static void __init apply_trace_boot_options(void);
2051
2052/**
2053 * register_tracer - register a tracer with the ftrace system.
2054 * @type: the plugin for the tracer
2055 *
2056 * Register a new plugin tracer.
2057 */
2058int __init register_tracer(struct tracer *type)
2059{
2060        struct tracer *t;
2061        int ret = 0;
2062
2063        if (!type->name) {
2064                pr_info("Tracer must have a name\n");
2065                return -1;
2066        }
2067
2068        if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070                return -1;
2071        }
2072
2073        if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074                pr_warn("Can not register tracer %s due to lockdown\n",
2075                           type->name);
2076                return -EPERM;
2077        }
2078
2079        mutex_lock(&trace_types_lock);
2080
2081        tracing_selftest_running = true;
2082
2083        for (t = trace_types; t; t = t->next) {
2084                if (strcmp(type->name, t->name) == 0) {
2085                        /* already found */
2086                        pr_info("Tracer %s already registered\n",
2087                                type->name);
2088                        ret = -1;
2089                        goto out;
2090                }
2091        }
2092
2093        if (!type->set_flag)
2094                type->set_flag = &dummy_set_flag;
2095        if (!type->flags) {
2096                /*allocate a dummy tracer_flags*/
2097                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098                if (!type->flags) {
2099                        ret = -ENOMEM;
2100                        goto out;
2101                }
2102                type->flags->val = 0;
2103                type->flags->opts = dummy_tracer_opt;
2104        } else
2105                if (!type->flags->opts)
2106                        type->flags->opts = dummy_tracer_opt;
2107
2108        /* store the tracer for __set_tracer_option */
2109        type->flags->trace = type;
2110
2111        ret = run_tracer_selftest(type);
2112        if (ret < 0)
2113                goto out;
2114
2115        type->next = trace_types;
2116        trace_types = type;
2117        add_tracer_options(&global_trace, type);
2118
2119 out:
2120        tracing_selftest_running = false;
2121        mutex_unlock(&trace_types_lock);
2122
2123        if (ret || !default_bootup_tracer)
2124                goto out_unlock;
2125
2126        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127                goto out_unlock;
2128
2129        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130        /* Do we want this tracer to start on bootup? */
2131        tracing_set_tracer(&global_trace, type->name);
2132        default_bootup_tracer = NULL;
2133
2134        apply_trace_boot_options();
2135
2136        /* disable other selftests, since this will break it. */
2137        disable_tracing_selftest("running a tracer");
2138
2139 out_unlock:
2140        return ret;
2141}
2142
2143static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144{
2145        struct trace_buffer *buffer = buf->buffer;
2146
2147        if (!buffer)
2148                return;
2149
2150        ring_buffer_record_disable(buffer);
2151
2152        /* Make sure all commits have finished */
2153        synchronize_rcu();
2154        ring_buffer_reset_cpu(buffer, cpu);
2155
2156        ring_buffer_record_enable(buffer);
2157}
2158
2159void tracing_reset_online_cpus(struct array_buffer *buf)
2160{
2161        struct trace_buffer *buffer = buf->buffer;
2162
2163        if (!buffer)
2164                return;
2165
2166        ring_buffer_record_disable(buffer);
2167
2168        /* Make sure all commits have finished */
2169        synchronize_rcu();
2170
2171        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172
2173        ring_buffer_reset_online_cpus(buffer);
2174
2175        ring_buffer_record_enable(buffer);
2176}
2177
2178/* Must have trace_types_lock held */
2179void tracing_reset_all_online_cpus(void)
2180{
2181        struct trace_array *tr;
2182
2183        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184                if (!tr->clear_trace)
2185                        continue;
2186                tr->clear_trace = false;
2187                tracing_reset_online_cpus(&tr->array_buffer);
2188#ifdef CONFIG_TRACER_MAX_TRACE
2189                tracing_reset_online_cpus(&tr->max_buffer);
2190#endif
2191        }
2192}
2193
2194/*
2195 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2196 * is the tgid last observed corresponding to pid=i.
2197 */
2198static int *tgid_map;
2199
2200/* The maximum valid index into tgid_map. */
2201static size_t tgid_map_max;
2202
2203#define SAVED_CMDLINES_DEFAULT 128
2204#define NO_CMDLINE_MAP UINT_MAX
2205static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2206struct saved_cmdlines_buffer {
2207        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2208        unsigned *map_cmdline_to_pid;
2209        unsigned cmdline_num;
2210        int cmdline_idx;
2211        char *saved_cmdlines;
2212};
2213static struct saved_cmdlines_buffer *savedcmd;
2214
2215static inline char *get_saved_cmdlines(int idx)
2216{
2217        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2218}
2219
2220static inline void set_cmdline(int idx, const char *cmdline)
2221{
2222        strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2223}
2224
2225static int allocate_cmdlines_buffer(unsigned int val,
2226                                    struct saved_cmdlines_buffer *s)
2227{
2228        s->map_cmdline_to_pid = kmalloc_array(val,
2229                                              sizeof(*s->map_cmdline_to_pid),
2230                                              GFP_KERNEL);
2231        if (!s->map_cmdline_to_pid)
2232                return -ENOMEM;
2233
2234        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2235        if (!s->saved_cmdlines) {
2236                kfree(s->map_cmdline_to_pid);
2237                return -ENOMEM;
2238        }
2239
2240        s->cmdline_idx = 0;
2241        s->cmdline_num = val;
2242        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2243               sizeof(s->map_pid_to_cmdline));
2244        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2245               val * sizeof(*s->map_cmdline_to_pid));
2246
2247        return 0;
2248}
2249
2250static int trace_create_savedcmd(void)
2251{
2252        int ret;
2253
2254        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2255        if (!savedcmd)
2256                return -ENOMEM;
2257
2258        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2259        if (ret < 0) {
2260                kfree(savedcmd);
2261                savedcmd = NULL;
2262                return -ENOMEM;
2263        }
2264
2265        return 0;
2266}
2267
2268int is_tracing_stopped(void)
2269{
2270        return global_trace.stop_count;
2271}
2272
2273/**
2274 * tracing_start - quick start of the tracer
2275 *
2276 * If tracing is enabled but was stopped by tracing_stop,
2277 * this will start the tracer back up.
2278 */
2279void tracing_start(void)
2280{
2281        struct trace_buffer *buffer;
2282        unsigned long flags;
2283
2284        if (tracing_disabled)
2285                return;
2286
2287        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2288        if (--global_trace.stop_count) {
2289                if (global_trace.stop_count < 0) {
2290                        /* Someone screwed up their debugging */
2291                        WARN_ON_ONCE(1);
2292                        global_trace.stop_count = 0;
2293                }
2294                goto out;
2295        }
2296
2297        /* Prevent the buffers from switching */
2298        arch_spin_lock(&global_trace.max_lock);
2299
2300        buffer = global_trace.array_buffer.buffer;
2301        if (buffer)
2302                ring_buffer_record_enable(buffer);
2303
2304#ifdef CONFIG_TRACER_MAX_TRACE
2305        buffer = global_trace.max_buffer.buffer;
2306        if (buffer)
2307                ring_buffer_record_enable(buffer);
2308#endif
2309
2310        arch_spin_unlock(&global_trace.max_lock);
2311
2312 out:
2313        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2314}
2315
2316static void tracing_start_tr(struct trace_array *tr)
2317{
2318        struct trace_buffer *buffer;
2319        unsigned long flags;
2320
2321        if (tracing_disabled)
2322                return;
2323
2324        /* If global, we need to also start the max tracer */
2325        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2326                return tracing_start();
2327
2328        raw_spin_lock_irqsave(&tr->start_lock, flags);
2329
2330        if (--tr->stop_count) {
2331                if (tr->stop_count < 0) {
2332                        /* Someone screwed up their debugging */
2333                        WARN_ON_ONCE(1);
2334                        tr->stop_count = 0;
2335                }
2336                goto out;
2337        }
2338
2339        buffer = tr->array_buffer.buffer;
2340        if (buffer)
2341                ring_buffer_record_enable(buffer);
2342
2343 out:
2344        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2345}
2346
2347/**
2348 * tracing_stop - quick stop of the tracer
2349 *
2350 * Light weight way to stop tracing. Use in conjunction with
2351 * tracing_start.
2352 */
2353void tracing_stop(void)
2354{
2355        struct trace_buffer *buffer;
2356        unsigned long flags;
2357
2358        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2359        if (global_trace.stop_count++)
2360                goto out;
2361
2362        /* Prevent the buffers from switching */
2363        arch_spin_lock(&global_trace.max_lock);
2364
2365        buffer = global_trace.array_buffer.buffer;
2366        if (buffer)
2367                ring_buffer_record_disable(buffer);
2368
2369#ifdef CONFIG_TRACER_MAX_TRACE
2370        buffer = global_trace.max_buffer.buffer;
2371        if (buffer)
2372                ring_buffer_record_disable(buffer);
2373#endif
2374
2375        arch_spin_unlock(&global_trace.max_lock);
2376
2377 out:
2378        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2379}
2380
2381static void tracing_stop_tr(struct trace_array *tr)
2382{
2383        struct trace_buffer *buffer;
2384        unsigned long flags;
2385
2386        /* If global, we need to also stop the max tracer */
2387        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2388                return tracing_stop();
2389
2390        raw_spin_lock_irqsave(&tr->start_lock, flags);
2391        if (tr->stop_count++)
2392                goto out;
2393
2394        buffer = tr->array_buffer.buffer;
2395        if (buffer)
2396                ring_buffer_record_disable(buffer);
2397
2398 out:
2399        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2400}
2401
2402static int trace_save_cmdline(struct task_struct *tsk)
2403{
2404        unsigned tpid, idx;
2405
2406        /* treat recording of idle task as a success */
2407        if (!tsk->pid)
2408                return 1;
2409
2410        tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2411
2412        /*
2413         * It's not the end of the world if we don't get
2414         * the lock, but we also don't want to spin
2415         * nor do we want to disable interrupts,
2416         * so if we miss here, then better luck next time.
2417         */
2418        if (!arch_spin_trylock(&trace_cmdline_lock))
2419                return 0;
2420
2421        idx = savedcmd->map_pid_to_cmdline[tpid];
2422        if (idx == NO_CMDLINE_MAP) {
2423                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2424
2425                savedcmd->map_pid_to_cmdline[tpid] = idx;
2426                savedcmd->cmdline_idx = idx;
2427        }
2428
2429        savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2430        set_cmdline(idx, tsk->comm);
2431
2432        arch_spin_unlock(&trace_cmdline_lock);
2433
2434        return 1;
2435}
2436
2437static void __trace_find_cmdline(int pid, char comm[])
2438{
2439        unsigned map;
2440        int tpid;
2441
2442        if (!pid) {
2443                strcpy(comm, "<idle>");
2444                return;
2445        }
2446
2447        if (WARN_ON_ONCE(pid < 0)) {
2448                strcpy(comm, "<XXX>");
2449                return;
2450        }
2451
2452        tpid = pid & (PID_MAX_DEFAULT - 1);
2453        map = savedcmd->map_pid_to_cmdline[tpid];
2454        if (map != NO_CMDLINE_MAP) {
2455                tpid = savedcmd->map_cmdline_to_pid[map];
2456                if (tpid == pid) {
2457                        strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2458                        return;
2459                }
2460        }
2461        strcpy(comm, "<...>");
2462}
2463
2464void trace_find_cmdline(int pid, char comm[])
2465{
2466        preempt_disable();
2467        arch_spin_lock(&trace_cmdline_lock);
2468
2469        __trace_find_cmdline(pid, comm);
2470
2471        arch_spin_unlock(&trace_cmdline_lock);
2472        preempt_enable();
2473}
2474
2475static int *trace_find_tgid_ptr(int pid)
2476{
2477        /*
2478         * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2479         * if we observe a non-NULL tgid_map then we also observe the correct
2480         * tgid_map_max.
2481         */
2482        int *map = smp_load_acquire(&tgid_map);
2483
2484        if (unlikely(!map || pid > tgid_map_max))
2485                return NULL;
2486
2487        return &map[pid];
2488}
2489
2490int trace_find_tgid(int pid)
2491{
2492        int *ptr = trace_find_tgid_ptr(pid);
2493
2494        return ptr ? *ptr : 0;
2495}
2496
2497static int trace_save_tgid(struct task_struct *tsk)
2498{
2499        int *ptr;
2500
2501        /* treat recording of idle task as a success */
2502        if (!tsk->pid)
2503                return 1;
2504
2505        ptr = trace_find_tgid_ptr(tsk->pid);
2506        if (!ptr)
2507                return 0;
2508
2509        *ptr = tsk->tgid;
2510        return 1;
2511}
2512
2513static bool tracing_record_taskinfo_skip(int flags)
2514{
2515        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2516                return true;
2517        if (!__this_cpu_read(trace_taskinfo_save))
2518                return true;
2519        return false;
2520}
2521
2522/**
2523 * tracing_record_taskinfo - record the task info of a task
2524 *
2525 * @task:  task to record
2526 * @flags: TRACE_RECORD_CMDLINE for recording comm
2527 *         TRACE_RECORD_TGID for recording tgid
2528 */
2529void tracing_record_taskinfo(struct task_struct *task, int flags)
2530{
2531        bool done;
2532
2533        if (tracing_record_taskinfo_skip(flags))
2534                return;
2535
2536        /*
2537         * Record as much task information as possible. If some fail, continue
2538         * to try to record the others.
2539         */
2540        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2541        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2542
2543        /* If recording any information failed, retry again soon. */
2544        if (!done)
2545                return;
2546
2547        __this_cpu_write(trace_taskinfo_save, false);
2548}
2549
2550/**
2551 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2552 *
2553 * @prev: previous task during sched_switch
2554 * @next: next task during sched_switch
2555 * @flags: TRACE_RECORD_CMDLINE for recording comm
2556 *         TRACE_RECORD_TGID for recording tgid
2557 */
2558void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2559                                          struct task_struct *next, int flags)
2560{
2561        bool done;
2562
2563        if (tracing_record_taskinfo_skip(flags))
2564                return;
2565
2566        /*
2567         * Record as much task information as possible. If some fail, continue
2568         * to try to record the others.
2569         */
2570        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2571        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2572        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2573        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2574
2575        /* If recording any information failed, retry again soon. */
2576        if (!done)
2577                return;
2578
2579        __this_cpu_write(trace_taskinfo_save, false);
2580}
2581
2582/* Helpers to record a specific task information */
2583void tracing_record_cmdline(struct task_struct *task)
2584{
2585        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2586}
2587
2588void tracing_record_tgid(struct task_struct *task)
2589{
2590        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2591}
2592
2593/*
2594 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2595 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2596 * simplifies those functions and keeps them in sync.
2597 */
2598enum print_line_t trace_handle_return(struct trace_seq *s)
2599{
2600        return trace_seq_has_overflowed(s) ?
2601                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2602}
2603EXPORT_SYMBOL_GPL(trace_handle_return);
2604
2605static unsigned short migration_disable_value(void)
2606{
2607#if defined(CONFIG_SMP)
2608        return current->migration_disabled;
2609#else
2610        return 0;
2611#endif
2612}
2613
2614unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2615{
2616        unsigned int trace_flags = irqs_status;
2617        unsigned int pc;
2618
2619        pc = preempt_count();
2620
2621        if (pc & NMI_MASK)
2622                trace_flags |= TRACE_FLAG_NMI;
2623        if (pc & HARDIRQ_MASK)
2624                trace_flags |= TRACE_FLAG_HARDIRQ;
2625        if (in_serving_softirq())
2626                trace_flags |= TRACE_FLAG_SOFTIRQ;
2627
2628        if (tif_need_resched())
2629                trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630        if (test_preempt_need_resched())
2631                trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632        return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633                (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634}
2635
2636struct ring_buffer_event *
2637trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638                          int type,
2639                          unsigned long len,
2640                          unsigned int trace_ctx)
2641{
2642        return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643}
2644
2645DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647static int trace_buffered_event_ref;
2648
2649/**
2650 * trace_buffered_event_enable - enable buffering events
2651 *
2652 * When events are being filtered, it is quicker to use a temporary
2653 * buffer to write the event data into if there's a likely chance
2654 * that it will not be committed. The discard of the ring buffer
2655 * is not as fast as committing, and is much slower than copying
2656 * a commit.
2657 *
2658 * When an event is to be filtered, allocate per cpu buffers to
2659 * write the event data into, and if the event is filtered and discarded
2660 * it is simply dropped, otherwise, the entire data is to be committed
2661 * in one shot.
2662 */
2663void trace_buffered_event_enable(void)
2664{
2665        struct ring_buffer_event *event;
2666        struct page *page;
2667        int cpu;
2668
2669        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671        if (trace_buffered_event_ref++)
2672                return;
2673
2674        for_each_tracing_cpu(cpu) {
2675                page = alloc_pages_node(cpu_to_node(cpu),
2676                                        GFP_KERNEL | __GFP_NORETRY, 0);
2677                if (!page)
2678                        goto failed;
2679
2680                event = page_address(page);
2681                memset(event, 0, sizeof(*event));
2682
2683                per_cpu(trace_buffered_event, cpu) = event;
2684
2685                preempt_disable();
2686                if (cpu == smp_processor_id() &&
2687                    __this_cpu_read(trace_buffered_event) !=
2688                    per_cpu(trace_buffered_event, cpu))
2689                        WARN_ON_ONCE(1);
2690                preempt_enable();
2691        }
2692
2693        return;
2694 failed:
2695        trace_buffered_event_disable();
2696}
2697
2698static void enable_trace_buffered_event(void *data)
2699{
2700        /* Probably not needed, but do it anyway */
2701        smp_rmb();
2702        this_cpu_dec(trace_buffered_event_cnt);
2703}
2704
2705static void disable_trace_buffered_event(void *data)
2706{
2707        this_cpu_inc(trace_buffered_event_cnt);
2708}
2709
2710/**
2711 * trace_buffered_event_disable - disable buffering events
2712 *
2713 * When a filter is removed, it is faster to not use the buffered
2714 * events, and to commit directly into the ring buffer. Free up
2715 * the temp buffers when there are no more users. This requires
2716 * special synchronization with current events.
2717 */
2718void trace_buffered_event_disable(void)
2719{
2720        int cpu;
2721
2722        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725                return;
2726
2727        if (--trace_buffered_event_ref)
2728                return;
2729
2730        preempt_disable();
2731        /* For each CPU, set the buffer as used. */
2732        smp_call_function_many(tracing_buffer_mask,
2733                               disable_trace_buffered_event, NULL, 1);
2734        preempt_enable();
2735
2736        /* Wait for all current users to finish */
2737        synchronize_rcu();
2738
2739        for_each_tracing_cpu(cpu) {
2740                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741                per_cpu(trace_buffered_event, cpu) = NULL;
2742        }
2743        /*
2744         * Make sure trace_buffered_event is NULL before clearing
2745         * trace_buffered_event_cnt.
2746         */
2747        smp_wmb();
2748
2749        preempt_disable();
2750        /* Do the work on each cpu */
2751        smp_call_function_many(tracing_buffer_mask,
2752                               enable_trace_buffered_event, NULL, 1);
2753        preempt_enable();
2754}
2755
2756static struct trace_buffer *temp_buffer;
2757
2758struct ring_buffer_event *
2759trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760                          struct trace_event_file *trace_file,
2761                          int type, unsigned long len,
2762                          unsigned int trace_ctx)
2763{
2764        struct ring_buffer_event *entry;
2765        struct trace_array *tr = trace_file->tr;
2766        int val;
2767
2768        *current_rb = tr->array_buffer.buffer;
2769
2770        if (!tr->no_filter_buffering_ref &&
2771            (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2772            (entry = this_cpu_read(trace_buffered_event))) {
2773                /*
2774                 * Filtering is on, so try to use the per cpu buffer first.
2775                 * This buffer will simulate a ring_buffer_event,
2776                 * where the type_len is zero and the array[0] will
2777                 * hold the full length.
2778                 * (see include/linux/ring-buffer.h for details on
2779                 *  how the ring_buffer_event is structured).
2780                 *
2781                 * Using a temp buffer during filtering and copying it
2782                 * on a matched filter is quicker than writing directly
2783                 * into the ring buffer and then discarding it when
2784                 * it doesn't match. That is because the discard
2785                 * requires several atomic operations to get right.
2786                 * Copying on match and doing nothing on a failed match
2787                 * is still quicker than no copy on match, but having
2788                 * to discard out of the ring buffer on a failed match.
2789                 */
2790                int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2791
2792                val = this_cpu_inc_return(trace_buffered_event_cnt);
2793
2794                /*
2795                 * Preemption is disabled, but interrupts and NMIs
2796                 * can still come in now. If that happens after
2797                 * the above increment, then it will have to go
2798                 * back to the old method of allocating the event
2799                 * on the ring buffer, and if the filter fails, it
2800                 * will have to call ring_buffer_discard_commit()
2801                 * to remove it.
2802                 *
2803                 * Need to also check the unlikely case that the
2804                 * length is bigger than the temp buffer size.
2805                 * If that happens, then the reserve is pretty much
2806                 * guaranteed to fail, as the ring buffer currently
2807                 * only allows events less than a page. But that may
2808                 * change in the future, so let the ring buffer reserve
2809                 * handle the failure in that case.
2810                 */
2811                if (val == 1 && likely(len <= max_len)) {
2812                        trace_event_setup(entry, type, trace_ctx);
2813                        entry->array[0] = len;
2814                        return entry;
2815                }
2816                this_cpu_dec(trace_buffered_event_cnt);
2817        }
2818
2819        entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820                                            trace_ctx);
2821        /*
2822         * If tracing is off, but we have triggers enabled
2823         * we still need to look at the event data. Use the temp_buffer
2824         * to store the trace event for the trigger to use. It's recursive
2825         * safe and will not be recorded anywhere.
2826         */
2827        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2828                *current_rb = temp_buffer;
2829                entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2830                                                    trace_ctx);
2831        }
2832        return entry;
2833}
2834EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2835
2836static DEFINE_SPINLOCK(tracepoint_iter_lock);
2837static DEFINE_MUTEX(tracepoint_printk_mutex);
2838
2839static void output_printk(struct trace_event_buffer *fbuffer)
2840{
2841        struct trace_event_call *event_call;
2842        struct trace_event_file *file;
2843        struct trace_event *event;
2844        unsigned long flags;
2845        struct trace_iterator *iter = tracepoint_print_iter;
2846
2847        /* We should never get here if iter is NULL */
2848        if (WARN_ON_ONCE(!iter))
2849                return;
2850
2851        event_call = fbuffer->trace_file->event_call;
2852        if (!event_call || !event_call->event.funcs ||
2853            !event_call->event.funcs->trace)
2854                return;
2855
2856        file = fbuffer->trace_file;
2857        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2858            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2859             !filter_match_preds(file->filter, fbuffer->entry)))
2860                return;
2861
2862        event = &fbuffer->trace_file->event_call->event;
2863
2864        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2865        trace_seq_init(&iter->seq);
2866        iter->ent = fbuffer->entry;
2867        event_call->event.funcs->trace(iter, 0, event);
2868        trace_seq_putc(&iter->seq, 0);
2869        printk("%s", iter->seq.buffer);
2870
2871        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2872}
2873
2874int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2875                             void *buffer, size_t *lenp,
2876                             loff_t *ppos)
2877{
2878        int save_tracepoint_printk;
2879        int ret;
2880
2881        mutex_lock(&tracepoint_printk_mutex);
2882        save_tracepoint_printk = tracepoint_printk;
2883
2884        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2885
2886        /*
2887         * This will force exiting early, as tracepoint_printk
2888         * is always zero when tracepoint_printk_iter is not allocated
2889         */
2890        if (!tracepoint_print_iter)
2891                tracepoint_printk = 0;
2892
2893        if (save_tracepoint_printk == tracepoint_printk)
2894                goto out;
2895
2896        if (tracepoint_printk)
2897                static_key_enable(&tracepoint_printk_key.key);
2898        else
2899                static_key_disable(&tracepoint_printk_key.key);
2900
2901 out:
2902        mutex_unlock(&tracepoint_printk_mutex);
2903
2904        return ret;
2905}
2906
2907void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2908{
2909        enum event_trigger_type tt = ETT_NONE;
2910        struct trace_event_file *file = fbuffer->trace_file;
2911
2912        if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2913                        fbuffer->entry, &tt))
2914                goto discard;
2915
2916        if (static_key_false(&tracepoint_printk_key.key))
2917                output_printk(fbuffer);
2918
2919        if (static_branch_unlikely(&trace_event_exports_enabled))
2920                ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2921
2922        trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2923                        fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2924
2925discard:
2926        if (tt)
2927                event_triggers_post_call(file, tt);
2928
2929}
2930EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2931
2932/*
2933 * Skip 3:
2934 *
2935 *   trace_buffer_unlock_commit_regs()
2936 *   trace_event_buffer_commit()
2937 *   trace_event_raw_event_xxx()
2938 */
2939# define STACK_SKIP 3
2940
2941void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2942                                     struct trace_buffer *buffer,
2943                                     struct ring_buffer_event *event,
2944                                     unsigned int trace_ctx,
2945                                     struct pt_regs *regs)
2946{
2947        __buffer_unlock_commit(buffer, event);
2948
2949        /*
2950         * If regs is not set, then skip the necessary functions.
2951         * Note, we can still get here via blktrace, wakeup tracer
2952         * and mmiotrace, but that's ok if they lose a function or
2953         * two. They are not that meaningful.
2954         */
2955        ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2956        ftrace_trace_userstack(tr, buffer, trace_ctx);
2957}
2958
2959/*
2960 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2961 */
2962void
2963trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2964                                   struct ring_buffer_event *event)
2965{
2966        __buffer_unlock_commit(buffer, event);
2967}
2968
2969void
2970trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2971               parent_ip, unsigned int trace_ctx)
2972{
2973        struct trace_event_call *call = &event_function;
2974        struct trace_buffer *buffer = tr->array_buffer.buffer;
2975        struct ring_buffer_event *event;
2976        struct ftrace_entry *entry;
2977
2978        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2979                                            trace_ctx);
2980        if (!event)
2981                return;
2982        entry   = ring_buffer_event_data(event);
2983        entry->ip                       = ip;
2984        entry->parent_ip                = parent_ip;
2985
2986        if (!call_filter_check_discard(call, entry, buffer, event)) {
2987                if (static_branch_unlikely(&trace_function_exports_enabled))
2988                        ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2989                __buffer_unlock_commit(buffer, event);
2990        }
2991}
2992
2993#ifdef CONFIG_STACKTRACE
2994
2995/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2996#define FTRACE_KSTACK_NESTING   4
2997
2998#define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2999
3000struct ftrace_stack {
3001        unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3002};
3003
3004
3005struct ftrace_stacks {
3006        struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3007};
3008
3009static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3010static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3011
3012static void __ftrace_trace_stack(struct trace_buffer *buffer,
3013                                 unsigned int trace_ctx,
3014                                 int skip, struct pt_regs *regs)
3015{
3016        struct trace_event_call *call = &event_kernel_stack;
3017        struct ring_buffer_event *event;
3018        unsigned int size, nr_entries;
3019        struct ftrace_stack *fstack;
3020        struct stack_entry *entry;
3021        int stackidx;
3022
3023        /*
3024         * Add one, for this function and the call to save_stack_trace()
3025         * If regs is set, then these functions will not be in the way.
3026         */
3027#ifndef CONFIG_UNWINDER_ORC
3028        if (!regs)
3029                skip++;
3030#endif
3031
3032        preempt_disable_notrace();
3033
3034        stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3035
3036        /* This should never happen. If it does, yell once and skip */
3037        if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3038                goto out;
3039
3040        /*
3041         * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3042         * interrupt will either see the value pre increment or post
3043         * increment. If the interrupt happens pre increment it will have
3044         * restored the counter when it returns.  We just need a barrier to
3045         * keep gcc from moving things around.
3046         */
3047        barrier();
3048
3049        fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3050        size = ARRAY_SIZE(fstack->calls);
3051
3052        if (regs) {
3053                nr_entries = stack_trace_save_regs(regs, fstack->calls,
3054                                                   size, skip);
3055        } else {
3056                nr_entries = stack_trace_save(fstack->calls, size, skip);
3057        }
3058
3059        size = nr_entries * sizeof(unsigned long);
3060        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3061                                    (sizeof(*entry) - sizeof(entry->caller)) + size,
3062                                    trace_ctx);
3063        if (!event)
3064                goto out;
3065        entry = ring_buffer_event_data(event);
3066
3067        memcpy(&entry->caller, fstack->calls, size);
3068        entry->size = nr_entries;
3069
3070        if (!call_filter_check_discard(call, entry, buffer, event))
3071                __buffer_unlock_commit(buffer, event);
3072
3073 out:
3074        /* Again, don't let gcc optimize things here */
3075        barrier();
3076        __this_cpu_dec(ftrace_stack_reserve);
3077        preempt_enable_notrace();
3078
3079}
3080
3081static inline void ftrace_trace_stack(struct trace_array *tr,
3082                                      struct trace_buffer *buffer,
3083                                      unsigned int trace_ctx,
3084                                      int skip, struct pt_regs *regs)
3085{
3086        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3087                return;
3088
3089        __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3090}
3091
3092void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3093                   int skip)
3094{
3095        struct trace_buffer *buffer = tr->array_buffer.buffer;
3096
3097        if (rcu_is_watching()) {
3098                __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3099                return;
3100        }
3101
3102        /*
3103         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3104         * but if the above rcu_is_watching() failed, then the NMI
3105         * triggered someplace critical, and rcu_irq_enter() should
3106         * not be called from NMI.
3107         */
3108        if (unlikely(in_nmi()))
3109                return;
3110
3111        rcu_irq_enter_irqson();
3112        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3113        rcu_irq_exit_irqson();
3114}
3115
3116/**
3117 * trace_dump_stack - record a stack back trace in the trace buffer
3118 * @skip: Number of functions to skip (helper handlers)
3119 */
3120void trace_dump_stack(int skip)
3121{
3122        if (tracing_disabled || tracing_selftest_running)
3123                return;
3124
3125#ifndef CONFIG_UNWINDER_ORC
3126        /* Skip 1 to skip this function. */
3127        skip++;
3128#endif
3129        __ftrace_trace_stack(global_trace.array_buffer.buffer,
3130                             tracing_gen_ctx(), skip, NULL);
3131}
3132EXPORT_SYMBOL_GPL(trace_dump_stack);
3133
3134#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3135static DEFINE_PER_CPU(int, user_stack_count);
3136
3137static void
3138ftrace_trace_userstack(struct trace_array *tr,
3139                       struct trace_buffer *buffer, unsigned int trace_ctx)
3140{
3141        struct trace_event_call *call = &event_user_stack;
3142        struct ring_buffer_event *event;
3143        struct userstack_entry *entry;
3144
3145        if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3146                return;
3147
3148        /*
3149         * NMIs can not handle page faults, even with fix ups.
3150         * The save user stack can (and often does) fault.
3151         */
3152        if (unlikely(in_nmi()))
3153                return;
3154
3155        /*
3156         * prevent recursion, since the user stack tracing may
3157         * trigger other kernel events.
3158         */
3159        preempt_disable();
3160        if (__this_cpu_read(user_stack_count))
3161                goto out;
3162
3163        __this_cpu_inc(user_stack_count);
3164
3165        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3166                                            sizeof(*entry), trace_ctx);
3167        if (!event)
3168                goto out_drop_count;
3169        entry   = ring_buffer_event_data(event);
3170
3171        entry->tgid             = current->tgid;
3172        memset(&entry->caller, 0, sizeof(entry->caller));
3173
3174        stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3175        if (!call_filter_check_discard(call, entry, buffer, event))
3176                __buffer_unlock_commit(buffer, event);
3177
3178 out_drop_count:
3179        __this_cpu_dec(user_stack_count);
3180 out:
3181        preempt_enable();
3182}
3183#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3184static void ftrace_trace_userstack(struct trace_array *tr,
3185                                   struct trace_buffer *buffer,
3186                                   unsigned int trace_ctx)
3187{
3188}
3189#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3190
3191#endif /* CONFIG_STACKTRACE */
3192
3193static inline void
3194func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3195                          unsigned long long delta)
3196{
3197        entry->bottom_delta_ts = delta & U32_MAX;
3198        entry->top_delta_ts = (delta >> 32);
3199}
3200
3201void trace_last_func_repeats(struct trace_array *tr,
3202                             struct trace_func_repeats *last_info,
3203                             unsigned int trace_ctx)
3204{
3205        struct trace_buffer *buffer = tr->array_buffer.buffer;
3206        struct func_repeats_entry *entry;
3207        struct ring_buffer_event *event;
3208        u64 delta;
3209
3210        event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3211                                            sizeof(*entry), trace_ctx);
3212        if (!event)
3213                return;
3214
3215        delta = ring_buffer_event_time_stamp(buffer, event) -
3216                last_info->ts_last_call;
3217
3218        entry = ring_buffer_event_data(event);
3219        entry->ip = last_info->ip;
3220        entry->parent_ip = last_info->parent_ip;
3221        entry->count = last_info->count;
3222        func_repeats_set_delta_ts(entry, delta);
3223
3224        __buffer_unlock_commit(buffer, event);
3225}
3226
3227/* created for use with alloc_percpu */
3228struct trace_buffer_struct {
3229        int nesting;
3230        char buffer[4][TRACE_BUF_SIZE];
3231};
3232
3233static struct trace_buffer_struct *trace_percpu_buffer;
3234
3235/*
3236 * This allows for lockless recording.  If we're nested too deeply, then
3237 * this returns NULL.
3238 */
3239static char *get_trace_buf(void)
3240{
3241        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3242
3243        if (!buffer || buffer->nesting >= 4)
3244                return NULL;
3245
3246        buffer->nesting++;
3247
3248        /* Interrupts must see nesting incremented before we use the buffer */
3249        barrier();
3250        return &buffer->buffer[buffer->nesting - 1][0];
3251}
3252
3253static void put_trace_buf(void)
3254{
3255        /* Don't let the decrement of nesting leak before this */
3256        barrier();
3257        this_cpu_dec(trace_percpu_buffer->nesting);
3258}
3259
3260static int alloc_percpu_trace_buffer(void)
3261{
3262        struct trace_buffer_struct *buffers;
3263
3264        if (trace_percpu_buffer)
3265                return 0;
3266
3267        buffers = alloc_percpu(struct trace_buffer_struct);
3268        if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3269                return -ENOMEM;
3270
3271        trace_percpu_buffer = buffers;
3272        return 0;
3273}
3274
3275static int buffers_allocated;
3276
3277void trace_printk_init_buffers(void)
3278{
3279        if (buffers_allocated)
3280                return;
3281
3282        if (alloc_percpu_trace_buffer())
3283                return;
3284
3285        /* trace_printk() is for debug use only. Don't use it in production. */
3286
3287        pr_warn("\n");
3288        pr_warn("**********************************************************\n");
3289        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290        pr_warn("**                                                      **\n");
3291        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3292        pr_warn("**                                                      **\n");
3293        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3294        pr_warn("** unsafe for production use.                           **\n");
3295        pr_warn("**                                                      **\n");
3296        pr_warn("** If you see this message and you are not debugging    **\n");
3297        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3298        pr_warn("**                                                      **\n");
3299        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3300        pr_warn("**********************************************************\n");
3301
3302        /* Expand the buffers to set size */
3303        tracing_update_buffers();
3304
3305        buffers_allocated = 1;
3306
3307        /*
3308         * trace_printk_init_buffers() can be called by modules.
3309         * If that happens, then we need to start cmdline recording
3310         * directly here. If the global_trace.buffer is already
3311         * allocated here, then this was called by module code.
3312         */
3313        if (global_trace.array_buffer.buffer)
3314                tracing_start_cmdline_record();
3315}
3316EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3317
3318void trace_printk_start_comm(void)
3319{
3320        /* Start tracing comms if trace printk is set */
3321        if (!buffers_allocated)
3322                return;
3323        tracing_start_cmdline_record();
3324}
3325
3326static void trace_printk_start_stop_comm(int enabled)
3327{
3328        if (!buffers_allocated)
3329                return;
3330
3331        if (enabled)
3332                tracing_start_cmdline_record();
3333        else
3334                tracing_stop_cmdline_record();
3335}
3336
3337/**
3338 * trace_vbprintk - write binary msg to tracing buffer
3339 * @ip:    The address of the caller
3340 * @fmt:   The string format to write to the buffer
3341 * @args:  Arguments for @fmt
3342 */
3343int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3344{
3345        struct trace_event_call *call = &event_bprint;
3346        struct ring_buffer_event *event;
3347        struct trace_buffer *buffer;
3348        struct trace_array *tr = &global_trace;
3349        struct bprint_entry *entry;
3350        unsigned int trace_ctx;
3351        char *tbuffer;
3352        int len = 0, size;
3353
3354        if (unlikely(tracing_selftest_running || tracing_disabled))
3355                return 0;
3356
3357        /* Don't pollute graph traces with trace_vprintk internals */
3358        pause_graph_tracing();
3359
3360        trace_ctx = tracing_gen_ctx();
3361        preempt_disable_notrace();
3362
3363        tbuffer = get_trace_buf();
3364        if (!tbuffer) {
3365                len = 0;
3366                goto out_nobuffer;
3367        }
3368
3369        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3370
3371        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3372                goto out_put;
3373
3374        size = sizeof(*entry) + sizeof(u32) * len;
3375        buffer = tr->array_buffer.buffer;
3376        ring_buffer_nest_start(buffer);
3377        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3378                                            trace_ctx);
3379        if (!event)
3380                goto out;
3381        entry = ring_buffer_event_data(event);
3382        entry->ip                       = ip;
3383        entry->fmt                      = fmt;
3384
3385        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3386        if (!call_filter_check_discard(call, entry, buffer, event)) {
3387                __buffer_unlock_commit(buffer, event);
3388                ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3389        }
3390
3391out:
3392        ring_buffer_nest_end(buffer);
3393out_put:
3394        put_trace_buf();
3395
3396out_nobuffer:
3397        preempt_enable_notrace();
3398        unpause_graph_tracing();
3399
3400        return len;
3401}
3402EXPORT_SYMBOL_GPL(trace_vbprintk);
3403
3404__printf(3, 0)
3405static int
3406__trace_array_vprintk(struct trace_buffer *buffer,
3407                      unsigned long ip, const char *fmt, va_list args)
3408{
3409        struct trace_event_call *call = &event_print;
3410        struct ring_buffer_event *event;
3411        int len = 0, size;
3412        struct print_entry *entry;
3413        unsigned int trace_ctx;
3414        char *tbuffer;
3415
3416        if (tracing_disabled || tracing_selftest_running)
3417                return 0;
3418
3419        /* Don't pollute graph traces with trace_vprintk internals */
3420        pause_graph_tracing();
3421
3422        trace_ctx = tracing_gen_ctx();
3423        preempt_disable_notrace();
3424
3425
3426        tbuffer = get_trace_buf();
3427        if (!tbuffer) {
3428                len = 0;
3429                goto out_nobuffer;
3430        }
3431
3432        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3433
3434        size = sizeof(*entry) + len + 1;
3435        ring_buffer_nest_start(buffer);
3436        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3437                                            trace_ctx);
3438        if (!event)
3439                goto out;
3440        entry = ring_buffer_event_data(event);
3441        entry->ip = ip;
3442
3443        memcpy(&entry->buf, tbuffer, len + 1);
3444        if (!call_filter_check_discard(call, entry, buffer, event)) {
3445                __buffer_unlock_commit(buffer, event);
3446                ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3447        }
3448
3449out:
3450        ring_buffer_nest_end(buffer);
3451        put_trace_buf();
3452
3453out_nobuffer:
3454        preempt_enable_notrace();
3455        unpause_graph_tracing();
3456
3457        return len;
3458}
3459
3460__printf(3, 0)
3461int trace_array_vprintk(struct trace_array *tr,
3462                        unsigned long ip, const char *fmt, va_list args)
3463{
3464        return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3465}
3466
3467/**
3468 * trace_array_printk - Print a message to a specific instance
3469 * @tr: The instance trace_array descriptor
3470 * @ip: The instruction pointer that this is called from.
3471 * @fmt: The format to print (printf format)
3472 *
3473 * If a subsystem sets up its own instance, they have the right to
3474 * printk strings into their tracing instance buffer using this
3475 * function. Note, this function will not write into the top level
3476 * buffer (use trace_printk() for that), as writing into the top level
3477 * buffer should only have events that can be individually disabled.
3478 * trace_printk() is only used for debugging a kernel, and should not
3479 * be ever incorporated in normal use.
3480 *
3481 * trace_array_printk() can be used, as it will not add noise to the
3482 * top level tracing buffer.
3483 *
3484 * Note, trace_array_init_printk() must be called on @tr before this
3485 * can be used.
3486 */
3487__printf(3, 0)
3488int trace_array_printk(struct trace_array *tr,
3489                       unsigned long ip, const char *fmt, ...)
3490{
3491        int ret;
3492        va_list ap;
3493
3494        if (!tr)
3495                return -ENOENT;
3496
3497        /* This is only allowed for created instances */
3498        if (tr == &global_trace)
3499                return 0;
3500
3501        if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3502                return 0;
3503
3504        va_start(ap, fmt);
3505        ret = trace_array_vprintk(tr, ip, fmt, ap);
3506        va_end(ap);
3507        return ret;
3508}
3509EXPORT_SYMBOL_GPL(trace_array_printk);
3510
3511/**
3512 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3513 * @tr: The trace array to initialize the buffers for
3514 *
3515 * As trace_array_printk() only writes into instances, they are OK to
3516 * have in the kernel (unlike trace_printk()). This needs to be called
3517 * before trace_array_printk() can be used on a trace_array.
3518 */
3519int trace_array_init_printk(struct trace_array *tr)
3520{
3521        if (!tr)
3522                return -ENOENT;
3523
3524        /* This is only allowed for created instances */
3525        if (tr == &global_trace)
3526                return -EINVAL;
3527
3528        return alloc_percpu_trace_buffer();
3529}
3530EXPORT_SYMBOL_GPL(trace_array_init_printk);
3531
3532__printf(3, 4)
3533int trace_array_printk_buf(struct trace_buffer *buffer,
3534                           unsigned long ip, const char *fmt, ...)
3535{
3536        int ret;
3537        va_list ap;
3538
3539        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3540                return 0;
3541
3542        va_start(ap, fmt);
3543        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544        va_end(ap);
3545        return ret;
3546}
3547
3548__printf(2, 0)
3549int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3550{
3551        return trace_array_vprintk(&global_trace, ip, fmt, args);
3552}
3553EXPORT_SYMBOL_GPL(trace_vprintk);
3554
3555static void trace_iterator_increment(struct trace_iterator *iter)
3556{
3557        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3558
3559        iter->idx++;
3560        if (buf_iter)
3561                ring_buffer_iter_advance(buf_iter);
3562}
3563
3564static struct trace_entry *
3565peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3566                unsigned long *lost_events)
3567{
3568        struct ring_buffer_event *event;
3569        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3570
3571        if (buf_iter) {
3572                event = ring_buffer_iter_peek(buf_iter, ts);
3573                if (lost_events)
3574                        *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3575                                (unsigned long)-1 : 0;
3576        } else {
3577                event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3578                                         lost_events);
3579        }
3580
3581        if (event) {
3582                iter->ent_size = ring_buffer_event_length(event);
3583                return ring_buffer_event_data(event);
3584        }
3585        iter->ent_size = 0;
3586        return NULL;
3587}
3588
3589static struct trace_entry *
3590__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3591                  unsigned long *missing_events, u64 *ent_ts)
3592{
3593        struct trace_buffer *buffer = iter->array_buffer->buffer;
3594        struct trace_entry *ent, *next = NULL;
3595        unsigned long lost_events = 0, next_lost = 0;
3596        int cpu_file = iter->cpu_file;
3597        u64 next_ts = 0, ts;
3598        int next_cpu = -1;
3599        int next_size = 0;
3600        int cpu;
3601
3602        /*
3603         * If we are in a per_cpu trace file, don't bother by iterating over
3604         * all cpu and peek directly.
3605         */
3606        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3607                if (ring_buffer_empty_cpu(buffer, cpu_file))
3608                        return NULL;
3609                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3610                if (ent_cpu)
3611                        *ent_cpu = cpu_file;
3612
3613                return ent;
3614        }
3615
3616        for_each_tracing_cpu(cpu) {
3617
3618                if (ring_buffer_empty_cpu(buffer, cpu))
3619                        continue;
3620
3621                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3622
3623                /*
3624                 * Pick the entry with the smallest timestamp:
3625                 */
3626                if (ent && (!next || ts < next_ts)) {
3627                        next = ent;
3628                        next_cpu = cpu;
3629                        next_ts = ts;
3630                        next_lost = lost_events;
3631                        next_size = iter->ent_size;
3632                }
3633        }
3634
3635        iter->ent_size = next_size;
3636
3637        if (ent_cpu)
3638                *ent_cpu = next_cpu;
3639
3640        if (ent_ts)
3641                *ent_ts = next_ts;
3642
3643        if (missing_events)
3644                *missing_events = next_lost;
3645
3646        return next;
3647}
3648
3649#define STATIC_FMT_BUF_SIZE     128
3650static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3651
3652static char *trace_iter_expand_format(struct trace_iterator *iter)
3653{
3654        char *tmp;
3655
3656        /*
3657         * iter->tr is NULL when used with tp_printk, which makes
3658         * this get called where it is not safe to call krealloc().
3659         */
3660        if (!iter->tr || iter->fmt == static_fmt_buf)
3661                return NULL;
3662
3663        tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3664                       GFP_KERNEL);
3665        if (tmp) {
3666                iter->fmt_size += STATIC_FMT_BUF_SIZE;
3667                iter->fmt = tmp;
3668        }
3669
3670        return tmp;
3671}
3672
3673/* Returns true if the string is safe to dereference from an event */
3674static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3675{
3676        unsigned long addr = (unsigned long)str;
3677        struct trace_event *trace_event;
3678        struct trace_event_call *event;
3679
3680        /* OK if part of the event data */
3681        if ((addr >= (unsigned long)iter->ent) &&
3682            (addr < (unsigned long)iter->ent + iter->ent_size))
3683                return true;
3684
3685        /* OK if part of the temp seq buffer */
3686        if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3687            (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3688                return true;
3689
3690        /* Core rodata can not be freed */
3691        if (is_kernel_rodata(addr))
3692                return true;
3693
3694        if (trace_is_tracepoint_string(str))
3695                return true;
3696
3697        /*
3698         * Now this could be a module event, referencing core module
3699         * data, which is OK.
3700         */
3701        if (!iter->ent)
3702                return false;
3703
3704        trace_event = ftrace_find_event(iter->ent->type);
3705        if (!trace_event)
3706                return false;
3707
3708        event = container_of(trace_event, struct trace_event_call, event);
3709        if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3710                return false;
3711
3712        /* Would rather have rodata, but this will suffice */
3713        if (within_module_core(addr, event->module))
3714                return true;
3715
3716        return false;
3717}
3718
3719static const char *show_buffer(struct trace_seq *s)
3720{
3721        struct seq_buf *seq = &s->seq;
3722
3723        seq_buf_terminate(seq);
3724
3725        return seq->buffer;
3726}
3727
3728static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3729
3730static int test_can_verify_check(const char *fmt, ...)
3731{
3732        char buf[16];
3733        va_list ap;
3734        int ret;
3735
3736        /*
3737         * The verifier is dependent on vsnprintf() modifies the va_list
3738         * passed to it, where it is sent as a reference. Some architectures
3739         * (like x86_32) passes it by value, which means that vsnprintf()
3740         * does not modify the va_list passed to it, and the verifier
3741         * would then need to be able to understand all the values that
3742         * vsnprintf can use. If it is passed by value, then the verifier
3743         * is disabled.
3744         */
3745        va_start(ap, fmt);
3746        vsnprintf(buf, 16, "%d", ap);
3747        ret = va_arg(ap, int);
3748        va_end(ap);
3749
3750        return ret;
3751}
3752
3753static void test_can_verify(void)
3754{
3755        if (!test_can_verify_check("%d %d", 0, 1)) {
3756                pr_info("trace event string verifier disabled\n");
3757                static_branch_inc(&trace_no_verify);
3758        }
3759}
3760
3761/**
3762 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3763 * @iter: The iterator that holds the seq buffer and the event being printed
3764 * @fmt: The format used to print the event
3765 * @ap: The va_list holding the data to print from @fmt.
3766 *
3767 * This writes the data into the @iter->seq buffer using the data from
3768 * @fmt and @ap. If the format has a %s, then the source of the string
3769 * is examined to make sure it is safe to print, otherwise it will
3770 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3771 * pointer.
3772 */
3773void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3774                         va_list ap)
3775{
3776        const char *p = fmt;
3777        const char *str;
3778        int i, j;
3779
3780        if (WARN_ON_ONCE(!fmt))
3781                return;
3782
3783        if (static_branch_unlikely(&trace_no_verify))
3784                goto print;
3785
3786        /* Don't bother checking when doing a ftrace_dump() */
3787        if (iter->fmt == static_fmt_buf)
3788                goto print;
3789
3790        while (*p) {
3791                bool star = false;
3792                int len = 0;
3793
3794                j = 0;
3795
3796                /* We only care about %s and variants */
3797                for (i = 0; p[i]; i++) {
3798                        if (i + 1 >= iter->fmt_size) {
3799                                /*
3800                                 * If we can't expand the copy buffer,
3801                                 * just print it.
3802                                 */
3803                                if (!trace_iter_expand_format(iter))
3804                                        goto print;
3805                        }
3806
3807                        if (p[i] == '\\' && p[i+1]) {
3808                                i++;
3809                                continue;
3810                        }
3811                        if (p[i] == '%') {
3812                                /* Need to test cases like %08.*s */
3813                                for (j = 1; p[i+j]; j++) {
3814                                        if (isdigit(p[i+j]) ||
3815                                            p[i+j] == '.')
3816                                                continue;
3817                                        if (p[i+j] == '*') {
3818                                                star = true;
3819                                                continue;
3820                                        }
3821                                        break;
3822                                }
3823                                if (p[i+j] == 's')
3824                                        break;
3825                                star = false;
3826                        }
3827                        j = 0;
3828                }
3829                /* If no %s found then just print normally */
3830                if (!p[i])
3831                        break;
3832
3833                /* Copy up to the %s, and print that */
3834                strncpy(iter->fmt, p, i);
3835                iter->fmt[i] = '\0';
3836                trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3837
3838                if (star)
3839                        len = va_arg(ap, int);
3840
3841                /* The ap now points to the string data of the %s */
3842                str = va_arg(ap, const char *);
3843
3844                /*
3845                 * If you hit this warning, it is likely that the
3846                 * trace event in question used %s on a string that
3847                 * was saved at the time of the event, but may not be
3848                 * around when the trace is read. Use __string(),
3849                 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3850                 * instead. See samples/trace_events/trace-events-sample.h
3851                 * for reference.
3852                 */
3853                if (WARN_ONCE(!trace_safe_str(iter, str),
3854                              "fmt: '%s' current_buffer: '%s'",
3855                              fmt, show_buffer(&iter->seq))) {
3856                        int ret;
3857
3858                        /* Try to safely read the string */
3859                        if (star) {
3860                                if (len + 1 > iter->fmt_size)
3861                                        len = iter->fmt_size - 1;
3862                                if (len < 0)
3863                                        len = 0;
3864                                ret = copy_from_kernel_nofault(iter->fmt, str, len);
3865                                iter->fmt[len] = 0;
3866                                star = false;
3867                        } else {
3868                                ret = strncpy_from_kernel_nofault(iter->fmt, str,
3869                                                                  iter->fmt_size);
3870                        }
3871                        if (ret < 0)
3872                                trace_seq_printf(&iter->seq, "(0x%px)", str);
3873                        else
3874                                trace_seq_printf(&iter->seq, "(0x%px:%s)",
3875                                                 str, iter->fmt);
3876                        str = "[UNSAFE-MEMORY]";
3877                        strcpy(iter->fmt, "%s");
3878                } else {
3879                        strncpy(iter->fmt, p + i, j + 1);
3880                        iter->fmt[j+1] = '\0';
3881                }
3882                if (star)
3883                        trace_seq_printf(&iter->seq, iter->fmt, len, str);
3884                else
3885                        trace_seq_printf(&iter->seq, iter->fmt, str);
3886
3887                p += i + j + 1;
3888        }
3889 print:
3890        if (*p)
3891                trace_seq_vprintf(&iter->seq, p, ap);
3892}
3893
3894const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3895{
3896        const char *p, *new_fmt;
3897        char *q;
3898
3899        if (WARN_ON_ONCE(!fmt))
3900                return fmt;
3901
3902        if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3903                return fmt;
3904
3905        p = fmt;
3906        new_fmt = q = iter->fmt;
3907        while (*p) {
3908                if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3909                        if (!trace_iter_expand_format(iter))
3910                                return fmt;
3911
3912                        q += iter->fmt - new_fmt;
3913                        new_fmt = iter->fmt;
3914                }
3915
3916                *q++ = *p++;
3917
3918                /* Replace %p with %px */
3919                if (p[-1] == '%') {
3920                        if (p[0] == '%') {
3921                                *q++ = *p++;
3922                        } else if (p[0] == 'p' && !isalnum(p[1])) {
3923                                *q++ = *p++;
3924                                *q++ = 'x';
3925                        }
3926                }
3927        }
3928        *q = '\0';
3929
3930        return new_fmt;
3931}
3932
3933#define STATIC_TEMP_BUF_SIZE    128
3934static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3935
3936/* Find the next real entry, without updating the iterator itself */
3937struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3938                                          int *ent_cpu, u64 *ent_ts)
3939{
3940        /* __find_next_entry will reset ent_size */
3941        int ent_size = iter->ent_size;
3942        struct trace_entry *entry;
3943
3944        /*
3945         * If called from ftrace_dump(), then the iter->temp buffer
3946         * will be the static_temp_buf and not created from kmalloc.
3947         * If the entry size is greater than the buffer, we can
3948         * not save it. Just return NULL in that case. This is only
3949         * used to add markers when two consecutive events' time
3950         * stamps have a large delta. See trace_print_lat_context()
3951         */
3952        if (iter->temp == static_temp_buf &&
3953            STATIC_TEMP_BUF_SIZE < ent_size)
3954                return NULL;
3955
3956        /*
3957         * The __find_next_entry() may call peek_next_entry(), which may
3958         * call ring_buffer_peek() that may make the contents of iter->ent
3959         * undefined. Need to copy iter->ent now.
3960         */
3961        if (iter->ent && iter->ent != iter->temp) {
3962                if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3963                    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3964                        void *temp;
3965                        temp = kmalloc(iter->ent_size, GFP_KERNEL);
3966                        if (!temp)
3967                                return NULL;
3968                        kfree(iter->temp);
3969                        iter->temp = temp;
3970                        iter->temp_size = iter->ent_size;
3971                }
3972                memcpy(iter->temp, iter->ent, iter->ent_size);
3973                iter->ent = iter->temp;
3974        }
3975        entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3976        /* Put back the original ent_size */
3977        iter->ent_size = ent_size;
3978
3979        return entry;
3980}
3981
3982/* Find the next real entry, and increment the iterator to the next entry */
3983void *trace_find_next_entry_inc(struct trace_iterator *iter)
3984{
3985        iter->ent = __find_next_entry(iter, &iter->cpu,
3986                                      &iter->lost_events, &iter->ts);
3987
3988        if (iter->ent)
3989                trace_iterator_increment(iter);
3990
3991        return iter->ent ? iter : NULL;
3992}
3993
3994static void trace_consume(struct trace_iterator *iter)
3995{
3996        ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3997                            &iter->lost_events);
3998}
3999
4000static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4001{
4002        struct trace_iterator *iter = m->private;
4003        int i = (int)*pos;
4004        void *ent;
4005
4006        WARN_ON_ONCE(iter->leftover);
4007
4008        (*pos)++;
4009
4010        /* can't go backwards */
4011        if (iter->idx > i)
4012                return NULL;
4013
4014        if (iter->idx < 0)
4015                ent = trace_find_next_entry_inc(iter);
4016        else
4017                ent = iter;
4018
4019        while (ent && iter->idx < i)
4020                ent = trace_find_next_entry_inc(iter);
4021
4022        iter->pos = *pos;
4023
4024        return ent;
4025}
4026
4027void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4028{
4029        struct ring_buffer_iter *buf_iter;
4030        unsigned long entries = 0;
4031        u64 ts;
4032
4033        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4034
4035        buf_iter = trace_buffer_iter(iter, cpu);
4036        if (!buf_iter)
4037                return;
4038
4039        ring_buffer_iter_reset(buf_iter);
4040
4041        /*
4042         * We could have the case with the max latency tracers
4043         * that a reset never took place on a cpu. This is evident
4044         * by the timestamp being before the start of the buffer.
4045         */
4046        while (ring_buffer_iter_peek(buf_iter, &ts)) {
4047                if (ts >= iter->array_buffer->time_start)
4048                        break;
4049                entries++;
4050                ring_buffer_iter_advance(buf_iter);
4051        }
4052
4053        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4054}
4055
4056/*
4057 * The current tracer is copied to avoid a global locking
4058 * all around.
4059 */
4060static void *s_start(struct seq_file *m, loff_t *pos)
4061{
4062        struct trace_iterator *iter = m->private;
4063        struct trace_array *tr = iter->tr;
4064        int cpu_file = iter->cpu_file;
4065        void *p = NULL;
4066        loff_t l = 0;
4067        int cpu;
4068
4069        /*
4070         * copy the tracer to avoid using a global lock all around.
4071         * iter->trace is a copy of current_trace, the pointer to the
4072         * name may be used instead of a strcmp(), as iter->trace->name
4073         * will point to the same string as current_trace->name.
4074         */
4075        mutex_lock(&trace_types_lock);
4076        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4077                *iter->trace = *tr->current_trace;
4078        mutex_unlock(&trace_types_lock);
4079
4080#ifdef CONFIG_TRACER_MAX_TRACE
4081        if (iter->snapshot && iter->trace->use_max_tr)
4082                return ERR_PTR(-EBUSY);
4083#endif
4084
4085        if (*pos != iter->pos) {
4086                iter->ent = NULL;
4087                iter->cpu = 0;
4088                iter->idx = -1;
4089
4090                if (cpu_file == RING_BUFFER_ALL_CPUS) {
4091                        for_each_tracing_cpu(cpu)
4092                                tracing_iter_reset(iter, cpu);
4093                } else
4094                        tracing_iter_reset(iter, cpu_file);
4095
4096                iter->leftover = 0;
4097                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4098                        ;
4099
4100        } else {
4101                /*
4102                 * If we overflowed the seq_file before, then we want
4103                 * to just reuse the trace_seq buffer again.
4104                 */
4105                if (iter->leftover)
4106                        p = iter;
4107                else {
4108                        l = *pos - 1;
4109                        p = s_next(m, p, &l);
4110                }
4111        }
4112
4113        trace_event_read_lock();
4114        trace_access_lock(cpu_file);
4115        return p;
4116}
4117
4118static void s_stop(struct seq_file *m, void *p)
4119{
4120        struct trace_iterator *iter = m->private;
4121
4122#ifdef CONFIG_TRACER_MAX_TRACE
4123        if (iter->snapshot && iter->trace->use_max_tr)
4124                return;
4125#endif
4126
4127        trace_access_unlock(iter->cpu_file);
4128        trace_event_read_unlock();
4129}
4130
4131static void
4132get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4133                      unsigned long *entries, int cpu)
4134{
4135        unsigned long count;
4136
4137        count = ring_buffer_entries_cpu(buf->buffer, cpu);
4138        /*
4139         * If this buffer has skipped entries, then we hold all
4140         * entries for the trace and we need to ignore the
4141         * ones before the time stamp.
4142         */
4143        if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4144                count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4145                /* total is the same as the entries */
4146                *total = count;
4147        } else
4148                *total = count +
4149                        ring_buffer_overrun_cpu(buf->buffer, cpu);
4150        *entries = count;
4151}
4152
4153static void
4154get_total_entries(struct array_buffer *buf,
4155                  unsigned long *total, unsigned long *entries)
4156{
4157        unsigned long t, e;
4158        int cpu;
4159
4160        *total = 0;
4161        *entries = 0;
4162
4163        for_each_tracing_cpu(cpu) {
4164                get_total_entries_cpu(buf, &t, &e, cpu);
4165                *total += t;
4166                *entries += e;
4167        }
4168}
4169
4170unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4171{
4172        unsigned long total, entries;
4173
4174        if (!tr)
4175                tr = &global_trace;
4176
4177        get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4178
4179        return entries;
4180}
4181
4182unsigned long trace_total_entries(struct trace_array *tr)
4183{
4184        unsigned long total, entries;
4185
4186        if (!tr)
4187                tr = &global_trace;
4188
4189        get_total_entries(&tr->array_buffer, &total, &entries);
4190
4191        return entries;
4192}
4193
4194static void print_lat_help_header(struct seq_file *m)
4195{
4196        seq_puts(m, "#                    _------=> CPU#            \n"
4197                    "#                   / _-----=> irqs-off        \n"
4198                    "#                  | / _----=> need-resched    \n"
4199                    "#                  || / _---=> hardirq/softirq \n"
4200                    "#                  ||| / _--=> preempt-depth   \n"
4201                    "#                  |||| / _-=> migrate-disable \n"
4202                    "#                  ||||| /     delay           \n"
4203                    "#  cmd     pid     |||||| time  |   caller     \n"
4204                    "#     \\   /        ||||||  \\    |    /       \n");
4205}
4206
4207static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4208{
4209        unsigned long total;
4210        unsigned long entries;
4211
4212        get_total_entries(buf, &total, &entries);
4213        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4214                   entries, total, num_online_cpus());
4215        seq_puts(m, "#\n");
4216}
4217
4218static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4219                                   unsigned int flags)
4220{
4221        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222
4223        print_event_info(buf, m);
4224
4225        seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4226        seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4227}
4228
4229static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4230                                       unsigned int flags)
4231{
4232        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4233        const char *space = "            ";
4234        int prec = tgid ? 12 : 2;
4235
4236        print_event_info(buf, m);
4237
4238        seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4239        seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4240        seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4241        seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4242        seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4243        seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4244        seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4245        seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4246}
4247
4248void
4249print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4250{
4251        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4252        struct array_buffer *buf = iter->array_buffer;
4253        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4254        struct tracer *type = iter->trace;
4255        unsigned long entries;
4256        unsigned long total;
4257        const char *name = "preemption";
4258
4259        name = type->name;
4260
4261        get_total_entries(buf, &total, &entries);
4262
4263        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4264                   name, UTS_RELEASE);
4265        seq_puts(m, "# -----------------------------------"
4266                 "---------------------------------\n");
4267        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4268                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4269                   nsecs_to_usecs(data->saved_latency),
4270                   entries,
4271                   total,
4272                   buf->cpu,
4273#if defined(CONFIG_PREEMPT_NONE)
4274                   "server",
4275#elif defined(CONFIG_PREEMPT_VOLUNTARY)
4276                   "desktop",
4277#elif defined(CONFIG_PREEMPT)
4278                   "preempt",
4279#elif defined(CONFIG_PREEMPT_RT)
4280                   "preempt_rt",
4281#else
4282                   "unknown",
4283#endif
4284                   /* These are reserved for later use */
4285                   0, 0, 0, 0);
4286#ifdef CONFIG_SMP
4287        seq_printf(m, " #P:%d)\n", num_online_cpus());
4288#else
4289        seq_puts(m, ")\n");
4290#endif
4291        seq_puts(m, "#    -----------------\n");
4292        seq_printf(m, "#    | task: %.16s-%d "
4293                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4294                   data->comm, data->pid,
4295                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4296                   data->policy, data->rt_priority);
4297        seq_puts(m, "#    -----------------\n");
4298
4299        if (data->critical_start) {
4300                seq_puts(m, "#  => started at: ");
4301                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4302                trace_print_seq(m, &iter->seq);
4303                seq_puts(m, "\n#  => ended at:   ");
4304                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4305                trace_print_seq(m, &iter->seq);
4306                seq_puts(m, "\n#\n");
4307        }
4308
4309        seq_puts(m, "#\n");
4310}
4311
4312static void test_cpu_buff_start(struct trace_iterator *iter)
4313{
4314        struct trace_seq *s = &iter->seq;
4315        struct trace_array *tr = iter->tr;
4316
4317        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4318                return;
4319
4320        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4321                return;
4322
4323        if (cpumask_available(iter->started) &&
4324            cpumask_test_cpu(iter->cpu, iter->started))
4325                return;
4326
4327        if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4328                return;
4329
4330        if (cpumask_available(iter->started))
4331                cpumask_set_cpu(iter->cpu, iter->started);
4332
4333        /* Don't print started cpu buffer for the first entry of the trace */
4334        if (iter->idx > 1)
4335                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4336                                iter->cpu);
4337}
4338
4339static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4340{
4341        struct trace_array *tr = iter->tr;
4342        struct trace_seq *s = &iter->seq;
4343        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4344        struct trace_entry *entry;
4345        struct trace_event *event;
4346
4347        entry = iter->ent;
4348
4349        test_cpu_buff_start(iter);
4350
4351        event = ftrace_find_event(entry->type);
4352
4353        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4354                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4355                        trace_print_lat_context(iter);
4356                else
4357                        trace_print_context(iter);
4358        }
4359
4360        if (trace_seq_has_overflowed(s))
4361                return TRACE_TYPE_PARTIAL_LINE;
4362
4363        if (event)
4364                return event->funcs->trace(iter, sym_flags, event);
4365
4366        trace_seq_printf(s, "Unknown type %d\n", entry->type);
4367
4368        return trace_handle_return(s);
4369}
4370
4371static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4372{
4373        struct trace_array *tr = iter->tr;
4374        struct trace_seq *s = &iter->seq;
4375        struct trace_entry *entry;
4376        struct trace_event *event;
4377
4378        entry = iter->ent;
4379
4380        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4381                trace_seq_printf(s, "%d %d %llu ",
4382                                 entry->pid, iter->cpu, iter->ts);
4383
4384        if (trace_seq_has_overflowed(s))
4385                return TRACE_TYPE_PARTIAL_LINE;
4386
4387        event = ftrace_find_event(entry->type);
4388        if (event)
4389                return event->funcs->raw(iter, 0, event);
4390
4391        trace_seq_printf(s, "%d ?\n", entry->type);
4392
4393        return trace_handle_return(s);
4394}
4395
4396static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4397{
4398        struct trace_array *tr = iter->tr;
4399        struct trace_seq *s = &iter->seq;
4400        unsigned char newline = '\n';
4401        struct trace_entry *entry;
4402        struct trace_event *event;
4403
4404        entry = iter->ent;
4405
4406        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4407                SEQ_PUT_HEX_FIELD(s, entry->pid);
4408                SEQ_PUT_HEX_FIELD(s, iter->cpu);
4409                SEQ_PUT_HEX_FIELD(s, iter->ts);
4410                if (trace_seq_has_overflowed(s))
4411                        return TRACE_TYPE_PARTIAL_LINE;
4412        }
4413
4414        event = ftrace_find_event(entry->type);
4415        if (event) {
4416                enum print_line_t ret = event->funcs->hex(iter, 0, event);
4417                if (ret != TRACE_TYPE_HANDLED)
4418                        return ret;
4419        }
4420
4421        SEQ_PUT_FIELD(s, newline);
4422
4423        return trace_handle_return(s);
4424}
4425
4426static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4427{
4428        struct trace_array *tr = iter->tr;
4429        struct trace_seq *s = &iter->seq;
4430        struct trace_entry *entry;
4431        struct trace_event *event;
4432
4433        entry = iter->ent;
4434
4435        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436                SEQ_PUT_FIELD(s, entry->pid);
4437                SEQ_PUT_FIELD(s, iter->cpu);
4438                SEQ_PUT_FIELD(s, iter->ts);
4439                if (trace_seq_has_overflowed(s))
4440                        return TRACE_TYPE_PARTIAL_LINE;
4441        }
4442
4443        event = ftrace_find_event(entry->type);
4444        return event ? event->funcs->binary(iter, 0, event) :
4445                TRACE_TYPE_HANDLED;
4446}
4447
4448int trace_empty(struct trace_iterator *iter)
4449{
4450        struct ring_buffer_iter *buf_iter;
4451        int cpu;
4452
4453        /* If we are looking at one CPU buffer, only check that one */
4454        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4455                cpu = iter->cpu_file;
4456                buf_iter = trace_buffer_iter(iter, cpu);
4457                if (buf_iter) {
4458                        if (!ring_buffer_iter_empty(buf_iter))
4459                                return 0;
4460                } else {
4461                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4462                                return 0;
4463                }
4464                return 1;
4465        }
4466
4467        for_each_tracing_cpu(cpu) {
4468                buf_iter = trace_buffer_iter(iter, cpu);
4469                if (buf_iter) {
4470                        if (!ring_buffer_iter_empty(buf_iter))
4471                                return 0;
4472                } else {
4473                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4474                                return 0;
4475                }
4476        }
4477
4478        return 1;
4479}
4480
4481/*  Called with trace_event_read_lock() held. */
4482enum print_line_t print_trace_line(struct trace_iterator *iter)
4483{
4484        struct trace_array *tr = iter->tr;
4485        unsigned long trace_flags = tr->trace_flags;
4486        enum print_line_t ret;
4487
4488        if (iter->lost_events) {
4489                if (iter->lost_events == (unsigned long)-1)
4490                        trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4491                                         iter->cpu);
4492                else
4493                        trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4494                                         iter->cpu, iter->lost_events);
4495                if (trace_seq_has_overflowed(&iter->seq))
4496                        return TRACE_TYPE_PARTIAL_LINE;
4497        }
4498
4499        if (iter->trace && iter->trace->print_line) {
4500                ret = iter->trace->print_line(iter);
4501                if (ret != TRACE_TYPE_UNHANDLED)
4502                        return ret;
4503        }
4504
4505        if (iter->ent->type == TRACE_BPUTS &&
4506                        trace_flags & TRACE_ITER_PRINTK &&
4507                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508                return trace_print_bputs_msg_only(iter);
4509
4510        if (iter->ent->type == TRACE_BPRINT &&
4511                        trace_flags & TRACE_ITER_PRINTK &&
4512                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513                return trace_print_bprintk_msg_only(iter);
4514
4515        if (iter->ent->type == TRACE_PRINT &&
4516                        trace_flags & TRACE_ITER_PRINTK &&
4517                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4518                return trace_print_printk_msg_only(iter);
4519
4520        if (trace_flags & TRACE_ITER_BIN)
4521                return print_bin_fmt(iter);
4522
4523        if (trace_flags & TRACE_ITER_HEX)
4524                return print_hex_fmt(iter);
4525
4526        if (trace_flags & TRACE_ITER_RAW)
4527                return print_raw_fmt(iter);
4528
4529        return print_trace_fmt(iter);
4530}
4531
4532void trace_latency_header(struct seq_file *m)
4533{
4534        struct trace_iterator *iter = m->private;
4535        struct trace_array *tr = iter->tr;
4536
4537        /* print nothing if the buffers are empty */
4538        if (trace_empty(iter))
4539                return;
4540
4541        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4542                print_trace_header(m, iter);
4543
4544        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4545                print_lat_help_header(m);
4546}
4547
4548void trace_default_header(struct seq_file *m)
4549{
4550        struct trace_iterator *iter = m->private;
4551        struct trace_array *tr = iter->tr;
4552        unsigned long trace_flags = tr->trace_flags;
4553
4554        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4555                return;
4556
4557        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4558                /* print nothing if the buffers are empty */
4559                if (trace_empty(iter))
4560                        return;
4561                print_trace_header(m, iter);
4562                if (!(trace_flags & TRACE_ITER_VERBOSE))
4563                        print_lat_help_header(m);
4564        } else {
4565                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4566                        if (trace_flags & TRACE_ITER_IRQ_INFO)
4567                                print_func_help_header_irq(iter->array_buffer,
4568                                                           m, trace_flags);
4569                        else
4570                                print_func_help_header(iter->array_buffer, m,
4571                                                       trace_flags);
4572                }
4573        }
4574}
4575
4576static void test_ftrace_alive(struct seq_file *m)
4577{
4578        if (!ftrace_is_dead())
4579                return;
4580        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4581                    "#          MAY BE MISSING FUNCTION EVENTS\n");
4582}
4583
4584#ifdef CONFIG_TRACER_MAX_TRACE
4585static void show_snapshot_main_help(struct seq_file *m)
4586{
4587        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4588                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                    "#                      Takes a snapshot of the main buffer.\n"
4590                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4591                    "#                      (Doesn't have to be '2' works with any number that\n"
4592                    "#                       is not a '0' or '1')\n");
4593}
4594
4595static void show_snapshot_percpu_help(struct seq_file *m)
4596{
4597        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4598#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4599        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4600                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4601#else
4602        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4603                    "#                     Must use main snapshot file to allocate.\n");
4604#endif
4605        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4606                    "#                      (Doesn't have to be '2' works with any number that\n"
4607                    "#                       is not a '0' or '1')\n");
4608}
4609
4610static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4611{
4612        if (iter->tr->allocated_snapshot)
4613                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4614        else
4615                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4616
4617        seq_puts(m, "# Snapshot commands:\n");
4618        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4619                show_snapshot_main_help(m);
4620        else
4621                show_snapshot_percpu_help(m);
4622}
4623#else
4624/* Should never be called */
4625static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4626#endif
4627
4628static int s_show(struct seq_file *m, void *v)
4629{
4630        struct trace_iterator *iter = v;
4631        int ret;
4632
4633        if (iter->ent == NULL) {
4634                if (iter->tr) {
4635                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
4636                        seq_puts(m, "#\n");
4637                        test_ftrace_alive(m);
4638                }
4639                if (iter->snapshot && trace_empty(iter))
4640                        print_snapshot_help(m, iter);
4641                else if (iter->trace && iter->trace->print_header)
4642                        iter->trace->print_header(m);
4643                else
4644                        trace_default_header(m);
4645
4646        } else if (iter->leftover) {
4647                /*
4648                 * If we filled the seq_file buffer earlier, we
4649                 * want to just show it now.
4650                 */
4651                ret = trace_print_seq(m, &iter->seq);
4652
4653                /* ret should this time be zero, but you never know */
4654                iter->leftover = ret;
4655
4656        } else {
4657                print_trace_line(iter);
4658                ret = trace_print_seq(m, &iter->seq);
4659                /*
4660                 * If we overflow the seq_file buffer, then it will
4661                 * ask us for this data again at start up.
4662                 * Use that instead.
4663                 *  ret is 0 if seq_file write succeeded.
4664                 *        -1 otherwise.
4665                 */
4666                iter->leftover = ret;
4667        }
4668
4669        return 0;
4670}
4671
4672/*
4673 * Should be used after trace_array_get(), trace_types_lock
4674 * ensures that i_cdev was already initialized.
4675 */
4676static inline int tracing_get_cpu(struct inode *inode)
4677{
4678        if (inode->i_cdev) /* See trace_create_cpu_file() */
4679                return (long)inode->i_cdev - 1;
4680        return RING_BUFFER_ALL_CPUS;
4681}
4682
4683static const struct seq_operations tracer_seq_ops = {
4684        .start          = s_start,
4685        .next           = s_next,
4686        .stop           = s_stop,
4687        .show           = s_show,
4688};
4689
4690static struct trace_iterator *
4691__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4692{
4693        struct trace_array *tr = inode->i_private;
4694        struct trace_iterator *iter;
4695        int cpu;
4696
4697        if (tracing_disabled)
4698                return ERR_PTR(-ENODEV);
4699
4700        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4701        if (!iter)
4702                return ERR_PTR(-ENOMEM);
4703
4704        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4705                                    GFP_KERNEL);
4706        if (!iter->buffer_iter)
4707                goto release;
4708
4709        /*
4710         * trace_find_next_entry() may need to save off iter->ent.
4711         * It will place it into the iter->temp buffer. As most
4712         * events are less than 128, allocate a buffer of that size.
4713         * If one is greater, then trace_find_next_entry() will
4714         * allocate a new buffer to adjust for the bigger iter->ent.
4715         * It's not critical if it fails to get allocated here.
4716         */
4717        iter->temp = kmalloc(128, GFP_KERNEL);
4718        if (iter->temp)
4719                iter->temp_size = 128;
4720
4721        /*
4722         * trace_event_printf() may need to modify given format
4723         * string to replace %p with %px so that it shows real address
4724         * instead of hash value. However, that is only for the event
4725         * tracing, other tracer may not need. Defer the allocation
4726         * until it is needed.
4727         */
4728        iter->fmt = NULL;
4729        iter->fmt_size = 0;
4730
4731        /*
4732         * We make a copy of the current tracer to avoid concurrent
4733         * changes on it while we are reading.
4734         */
4735        mutex_lock(&trace_types_lock);
4736        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4737        if (!iter->trace)
4738                goto fail;
4739
4740        *iter->trace = *tr->current_trace;
4741
4742        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4743                goto fail;
4744
4745        iter->tr = tr;
4746
4747#ifdef CONFIG_TRACER_MAX_TRACE
4748        /* Currently only the top directory has a snapshot */
4749        if (tr->current_trace->print_max || snapshot)
4750                iter->array_buffer = &tr->max_buffer;
4751        else
4752#endif
4753                iter->array_buffer = &tr->array_buffer;
4754        iter->snapshot = snapshot;
4755        iter->pos = -1;
4756        iter->cpu_file = tracing_get_cpu(inode);
4757        mutex_init(&iter->mutex);
4758
4759        /* Notify the tracer early; before we stop tracing. */
4760        if (iter->trace->open)
4761                iter->trace->open(iter);
4762
4763        /* Annotate start of buffers if we had overruns */
4764        if (ring_buffer_overruns(iter->array_buffer->buffer))
4765                iter->iter_flags |= TRACE_FILE_ANNOTATE;
4766
4767        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4768        if (trace_clocks[tr->clock_id].in_ns)
4769                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4770
4771        /*
4772         * If pause-on-trace is enabled, then stop the trace while
4773         * dumping, unless this is the "snapshot" file
4774         */
4775        if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4776                tracing_stop_tr(tr);
4777
4778        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4779                for_each_tracing_cpu(cpu) {
4780                        iter->buffer_iter[cpu] =
4781                                ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                         cpu, GFP_KERNEL);
4783                }
4784                ring_buffer_read_prepare_sync();
4785                for_each_tracing_cpu(cpu) {
4786                        ring_buffer_read_start(iter->buffer_iter[cpu]);
4787                        tracing_iter_reset(iter, cpu);
4788                }
4789        } else {
4790                cpu = iter->cpu_file;
4791                iter->buffer_iter[cpu] =
4792                        ring_buffer_read_prepare(iter->array_buffer->buffer,
4793                                                 cpu, GFP_KERNEL);
4794                ring_buffer_read_prepare_sync();
4795                ring_buffer_read_start(iter->buffer_iter[cpu]);
4796                tracing_iter_reset(iter, cpu);
4797        }
4798
4799        mutex_unlock(&trace_types_lock);
4800
4801        return iter;
4802
4803 fail:
4804        mutex_unlock(&trace_types_lock);
4805        kfree(iter->trace);
4806        kfree(iter->temp);
4807        kfree(iter->buffer_iter);
4808release:
4809        seq_release_private(inode, file);
4810        return ERR_PTR(-ENOMEM);
4811}
4812
4813int tracing_open_generic(struct inode *inode, struct file *filp)
4814{
4815        int ret;
4816
4817        ret = tracing_check_open_get_tr(NULL);
4818        if (ret)
4819                return ret;
4820
4821        filp->private_data = inode->i_private;
4822        return 0;
4823}
4824
4825bool tracing_is_disabled(void)
4826{
4827        return (tracing_disabled) ? true: false;
4828}
4829
4830/*
4831 * Open and update trace_array ref count.
4832 * Must have the current trace_array passed to it.
4833 */
4834int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4835{
4836        struct trace_array *tr = inode->i_private;
4837        int ret;
4838
4839        ret = tracing_check_open_get_tr(tr);
4840        if (ret)
4841                return ret;
4842
4843        filp->private_data = inode->i_private;
4844
4845        return 0;
4846}
4847
4848static int tracing_release(struct inode *inode, struct file *file)
4849{
4850        struct trace_array *tr = inode->i_private;
4851        struct seq_file *m = file->private_data;
4852        struct trace_iterator *iter;
4853        int cpu;
4854
4855        if (!(file->f_mode & FMODE_READ)) {
4856                trace_array_put(tr);
4857                return 0;
4858        }
4859
4860        /* Writes do not use seq_file */
4861        iter = m->private;
4862        mutex_lock(&trace_types_lock);
4863
4864        for_each_tracing_cpu(cpu) {
4865                if (iter->buffer_iter[cpu])
4866                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
4867        }
4868
4869        if (iter->trace && iter->trace->close)
4870                iter->trace->close(iter);
4871
4872        if (!iter->snapshot && tr->stop_count)
4873                /* reenable tracing if it was previously enabled */
4874                tracing_start_tr(tr);
4875
4876        __trace_array_put(tr);
4877
4878        mutex_unlock(&trace_types_lock);
4879
4880        mutex_destroy(&iter->mutex);
4881        free_cpumask_var(iter->started);
4882        kfree(iter->fmt);
4883        kfree(iter->temp);
4884        kfree(iter->trace);
4885        kfree(iter->buffer_iter);
4886        seq_release_private(inode, file);
4887
4888        return 0;
4889}
4890
4891static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4892{
4893        struct trace_array *tr = inode->i_private;
4894
4895        trace_array_put(tr);
4896        return 0;
4897}
4898
4899static int tracing_single_release_tr(struct inode *inode, struct file *file)
4900{
4901        struct trace_array *tr = inode->i_private;
4902
4903        trace_array_put(tr);
4904
4905        return single_release(inode, file);
4906}
4907
4908static int tracing_open(struct inode *inode, struct file *file)
4909{
4910        struct trace_array *tr = inode->i_private;
4911        struct trace_iterator *iter;
4912        int ret;
4913
4914        ret = tracing_check_open_get_tr(tr);
4915        if (ret)
4916                return ret;
4917
4918        /* If this file was open for write, then erase contents */
4919        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4920                int cpu = tracing_get_cpu(inode);
4921                struct array_buffer *trace_buf = &tr->array_buffer;
4922
4923#ifdef CONFIG_TRACER_MAX_TRACE
4924                if (tr->current_trace->print_max)
4925                        trace_buf = &tr->max_buffer;
4926#endif
4927
4928                if (cpu == RING_BUFFER_ALL_CPUS)
4929                        tracing_reset_online_cpus(trace_buf);
4930                else
4931                        tracing_reset_cpu(trace_buf, cpu);
4932        }
4933
4934        if (file->f_mode & FMODE_READ) {
4935                iter = __tracing_open(inode, file, false);
4936                if (IS_ERR(iter))
4937                        ret = PTR_ERR(iter);
4938                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4939                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4940        }
4941
4942        if (ret < 0)
4943                trace_array_put(tr);
4944
4945        return ret;
4946}
4947
4948/*
4949 * Some tracers are not suitable for instance buffers.
4950 * A tracer is always available for the global array (toplevel)
4951 * or if it explicitly states that it is.
4952 */
4953static bool
4954trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4955{
4956        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4957}
4958
4959/* Find the next tracer that this trace array may use */
4960static struct tracer *
4961get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4962{
4963        while (t && !trace_ok_for_array(t, tr))
4964                t = t->next;
4965
4966        return t;
4967}
4968
4969static void *
4970t_next(struct seq_file *m, void *v, loff_t *pos)
4971{
4972        struct trace_array *tr = m->private;
4973        struct tracer *t = v;
4974
4975        (*pos)++;
4976
4977        if (t)
4978                t = get_tracer_for_array(tr, t->next);
4979
4980        return t;
4981}
4982
4983static void *t_start(struct seq_file *m, loff_t *pos)
4984{
4985        struct trace_array *tr = m->private;
4986        struct tracer *t;
4987        loff_t l = 0;
4988
4989        mutex_lock(&trace_types_lock);
4990
4991        t = get_tracer_for_array(tr, trace_types);
4992        for (; t && l < *pos; t = t_next(m, t, &l))
4993                        ;
4994
4995        return t;
4996}
4997
4998static void t_stop(struct seq_file *m, void *p)
4999{
5000        mutex_unlock(&trace_types_lock);
5001}
5002
5003static int t_show(struct seq_file *m, void *v)
5004{
5005        struct tracer *t = v;
5006
5007        if (!t)
5008                return 0;
5009
5010        seq_puts(m, t->name);
5011        if (t->next)
5012                seq_putc(m, ' ');
5013        else
5014                seq_putc(m, '\n');
5015
5016        return 0;
5017}
5018
5019static const struct seq_operations show_traces_seq_ops = {
5020        .start          = t_start,
5021        .next           = t_next,
5022        .stop           = t_stop,
5023        .show           = t_show,
5024};
5025
5026static int show_traces_open(struct inode *inode, struct file *file)
5027{
5028        struct trace_array *tr = inode->i_private;
5029        struct seq_file *m;
5030        int ret;
5031
5032        ret = tracing_check_open_get_tr(tr);
5033        if (ret)
5034                return ret;
5035
5036        ret = seq_open(file, &show_traces_seq_ops);
5037        if (ret) {
5038                trace_array_put(tr);
5039                return ret;
5040        }
5041
5042        m = file->private_data;
5043        m->private = tr;
5044
5045        return 0;
5046}
5047
5048static int show_traces_release(struct inode *inode, struct file *file)
5049{
5050        struct trace_array *tr = inode->i_private;
5051
5052        trace_array_put(tr);
5053        return seq_release(inode, file);
5054}
5055
5056static ssize_t
5057tracing_write_stub(struct file *filp, const char __user *ubuf,
5058                   size_t count, loff_t *ppos)
5059{
5060        return count;
5061}
5062
5063loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5064{
5065        int ret;
5066
5067        if (file->f_mode & FMODE_READ)
5068                ret = seq_lseek(file, offset, whence);
5069        else
5070                file->f_pos = ret = 0;
5071
5072        return ret;
5073}
5074
5075static const struct file_operations tracing_fops = {
5076        .open           = tracing_open,
5077        .read           = seq_read,
5078        .write          = tracing_write_stub,
5079        .llseek         = tracing_lseek,
5080        .release        = tracing_release,
5081};
5082
5083static const struct file_operations show_traces_fops = {
5084        .open           = show_traces_open,
5085        .read           = seq_read,
5086        .llseek         = seq_lseek,
5087        .release        = show_traces_release,
5088};
5089
5090static ssize_t
5091tracing_cpumask_read(struct file *filp, char __user *ubuf,
5092                     size_t count, loff_t *ppos)
5093{
5094        struct trace_array *tr = file_inode(filp)->i_private;
5095        char *mask_str;
5096        int len;
5097
5098        len = snprintf(NULL, 0, "%*pb\n",
5099                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5100        mask_str = kmalloc(len, GFP_KERNEL);
5101        if (!mask_str)
5102                return -ENOMEM;
5103
5104        len = snprintf(mask_str, len, "%*pb\n",
5105                       cpumask_pr_args(tr->tracing_cpumask));
5106        if (len >= count) {
5107                count = -EINVAL;
5108                goto out_err;
5109        }
5110        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5111
5112out_err:
5113        kfree(mask_str);
5114
5115        return count;
5116}
5117
5118int tracing_set_cpumask(struct trace_array *tr,
5119                        cpumask_var_t tracing_cpumask_new)
5120{
5121        int cpu;
5122
5123        if (!tr)
5124                return -EINVAL;
5125
5126        local_irq_disable();
5127        arch_spin_lock(&tr->max_lock);
5128        for_each_tracing_cpu(cpu) {
5129                /*
5130                 * Increase/decrease the disabled counter if we are
5131                 * about to flip a bit in the cpumask:
5132                 */
5133                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5134                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5135                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5136                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5137                }
5138                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5141                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5142                }
5143        }
5144        arch_spin_unlock(&tr->max_lock);
5145        local_irq_enable();
5146
5147        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5148
5149        return 0;
5150}
5151
5152static ssize_t
5153tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5154                      size_t count, loff_t *ppos)
5155{
5156        struct trace_array *tr = file_inode(filp)->i_private;
5157        cpumask_var_t tracing_cpumask_new;
5158        int err;
5159
5160        if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5161                return -ENOMEM;
5162
5163        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5164        if (err)
5165                goto err_free;
5166
5167        err = tracing_set_cpumask(tr, tracing_cpumask_new);
5168        if (err)
5169                goto err_free;
5170
5171        free_cpumask_var(tracing_cpumask_new);
5172
5173        return count;
5174
5175err_free:
5176        free_cpumask_var(tracing_cpumask_new);
5177
5178        return err;
5179}
5180
5181static const struct file_operations tracing_cpumask_fops = {
5182        .open           = tracing_open_generic_tr,
5183        .read           = tracing_cpumask_read,
5184        .write          = tracing_cpumask_write,
5185        .release        = tracing_release_generic_tr,
5186        .llseek         = generic_file_llseek,
5187};
5188
5189static int tracing_trace_options_show(struct seq_file *m, void *v)
5190{
5191        struct tracer_opt *trace_opts;
5192        struct trace_array *tr = m->private;
5193        u32 tracer_flags;
5194        int i;
5195
5196        mutex_lock(&trace_types_lock);
5197        tracer_flags = tr->current_trace->flags->val;
5198        trace_opts = tr->current_trace->flags->opts;
5199
5200        for (i = 0; trace_options[i]; i++) {
5201                if (tr->trace_flags & (1 << i))
5202                        seq_printf(m, "%s\n", trace_options[i]);
5203                else
5204                        seq_printf(m, "no%s\n", trace_options[i]);
5205        }
5206
5207        for (i = 0; trace_opts[i].name; i++) {
5208                if (tracer_flags & trace_opts[i].bit)
5209                        seq_printf(m, "%s\n", trace_opts[i].name);
5210                else
5211                        seq_printf(m, "no%s\n", trace_opts[i].name);
5212        }
5213        mutex_unlock(&trace_types_lock);
5214
5215        return 0;
5216}
5217
5218static int __set_tracer_option(struct trace_array *tr,
5219                               struct tracer_flags *tracer_flags,
5220                               struct tracer_opt *opts, int neg)
5221{
5222        struct tracer *trace = tracer_flags->trace;
5223        int ret;
5224
5225        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5226        if (ret)
5227                return ret;
5228
5229        if (neg)
5230                tracer_flags->val &= ~opts->bit;
5231        else
5232                tracer_flags->val |= opts->bit;
5233        return 0;
5234}
5235
5236/* Try to assign a tracer specific option */
5237static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5238{
5239        struct tracer *trace = tr->current_trace;
5240        struct tracer_flags *tracer_flags = trace->flags;
5241        struct tracer_opt *opts = NULL;
5242        int i;
5243
5244        for (i = 0; tracer_flags->opts[i].name; i++) {
5245                opts = &tracer_flags->opts[i];
5246
5247                if (strcmp(cmp, opts->name) == 0)
5248                        return __set_tracer_option(tr, trace->flags, opts, neg);
5249        }
5250
5251        return -EINVAL;
5252}
5253
5254/* Some tracers require overwrite to stay enabled */
5255int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5256{
5257        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5258                return -1;
5259
5260        return 0;
5261}
5262
5263int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5264{
5265        int *map;
5266
5267        if ((mask == TRACE_ITER_RECORD_TGID) ||
5268            (mask == TRACE_ITER_RECORD_CMD))
5269                lockdep_assert_held(&event_mutex);
5270
5271        /* do nothing if flag is already set */
5272        if (!!(tr->trace_flags & mask) == !!enabled)
5273                return 0;
5274
5275        /* Give the tracer a chance to approve the change */
5276        if (tr->current_trace->flag_changed)
5277                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5278                        return -EINVAL;
5279
5280        if (enabled)
5281                tr->trace_flags |= mask;
5282        else
5283                tr->trace_flags &= ~mask;
5284
5285        if (mask == TRACE_ITER_RECORD_CMD)
5286                trace_event_enable_cmd_record(enabled);
5287
5288        if (mask == TRACE_ITER_RECORD_TGID) {
5289                if (!tgid_map) {
5290                        tgid_map_max = pid_max;
5291                        map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5292                                       GFP_KERNEL);
5293
5294                        /*
5295                         * Pairs with smp_load_acquire() in
5296                         * trace_find_tgid_ptr() to ensure that if it observes
5297                         * the tgid_map we just allocated then it also observes
5298                         * the corresponding tgid_map_max value.
5299                         */
5300                        smp_store_release(&tgid_map, map);
5301                }
5302                if (!tgid_map) {
5303                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5304                        return -ENOMEM;
5305                }
5306
5307                trace_event_enable_tgid_record(enabled);
5308        }
5309
5310        if (mask == TRACE_ITER_EVENT_FORK)
5311                trace_event_follow_fork(tr, enabled);
5312
5313        if (mask == TRACE_ITER_FUNC_FORK)
5314                ftrace_pid_follow_fork(tr, enabled);
5315
5316        if (mask == TRACE_ITER_OVERWRITE) {
5317                ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5318#ifdef CONFIG_TRACER_MAX_TRACE
5319                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5320#endif
5321        }
5322
5323        if (mask == TRACE_ITER_PRINTK) {
5324                trace_printk_start_stop_comm(enabled);
5325                trace_printk_control(enabled);
5326        }
5327
5328        return 0;
5329}
5330
5331int trace_set_options(struct trace_array *tr, char *option)
5332{
5333        char *cmp;
5334        int neg = 0;
5335        int ret;
5336        size_t orig_len = strlen(option);
5337        int len;
5338
5339        cmp = strstrip(option);
5340
5341        len = str_has_prefix(cmp, "no");
5342        if (len)
5343                neg = 1;
5344
5345        cmp += len;
5346
5347        mutex_lock(&event_mutex);
5348        mutex_lock(&trace_types_lock);
5349
5350        ret = match_string(trace_options, -1, cmp);
5351        /* If no option could be set, test the specific tracer options */
5352        if (ret < 0)
5353                ret = set_tracer_option(tr, cmp, neg);
5354        else
5355                ret = set_tracer_flag(tr, 1 << ret, !neg);
5356
5357        mutex_unlock(&trace_types_lock);
5358        mutex_unlock(&event_mutex);
5359
5360        /*
5361         * If the first trailing whitespace is replaced with '\0' by strstrip,
5362         * turn it back into a space.
5363         */
5364        if (orig_len > strlen(option))
5365                option[strlen(option)] = ' ';
5366
5367        return ret;
5368}
5369
5370static void __init apply_trace_boot_options(void)
5371{
5372        char *buf = trace_boot_options_buf;
5373        char *option;
5374
5375        while (true) {
5376                option = strsep(&buf, ",");
5377
5378                if (!option)
5379                        break;
5380
5381                if (*option)
5382                        trace_set_options(&global_trace, option);
5383
5384                /* Put back the comma to allow this to be called again */
5385                if (buf)
5386                        *(buf - 1) = ',';
5387        }
5388}
5389
5390static ssize_t
5391tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5392                        size_t cnt, loff_t *ppos)
5393{
5394        struct seq_file *m = filp->private_data;
5395        struct trace_array *tr = m->private;
5396        char buf[64];
5397        int ret;
5398
5399        if (cnt >= sizeof(buf))
5400                return -EINVAL;
5401
5402        if (copy_from_user(buf, ubuf, cnt))
5403                return -EFAULT;
5404
5405        buf[cnt] = 0;
5406
5407        ret = trace_set_options(tr, buf);
5408        if (ret < 0)
5409                return ret;
5410
5411        *ppos += cnt;
5412
5413        return cnt;
5414}
5415
5416static int tracing_trace_options_open(struct inode *inode, struct file *file)
5417{
5418        struct trace_array *tr = inode->i_private;
5419        int ret;
5420
5421        ret = tracing_check_open_get_tr(tr);
5422        if (ret)
5423                return ret;
5424
5425        ret = single_open(file, tracing_trace_options_show, inode->i_private);
5426        if (ret < 0)
5427                trace_array_put(tr);
5428
5429        return ret;
5430}
5431
5432static const struct file_operations tracing_iter_fops = {
5433        .open           = tracing_trace_options_open,
5434        .read           = seq_read,
5435        .llseek         = seq_lseek,
5436        .release        = tracing_single_release_tr,
5437        .write          = tracing_trace_options_write,
5438};
5439
5440static const char readme_msg[] =
5441        "tracing mini-HOWTO:\n\n"
5442        "# echo 0 > tracing_on : quick way to disable tracing\n"
5443        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5444        " Important files:\n"
5445        "  trace\t\t\t- The static contents of the buffer\n"
5446        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5447        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5448        "  current_tracer\t- function and latency tracers\n"
5449        "  available_tracers\t- list of configured tracers for current_tracer\n"
5450        "  error_log\t- error log for failed commands (that support it)\n"
5451        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5452        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5453        "  trace_clock\t\t-change the clock used to order events\n"
5454        "       local:   Per cpu clock but may not be synced across CPUs\n"
5455        "      global:   Synced across CPUs but slows tracing down.\n"
5456        "     counter:   Not a clock, but just an increment\n"
5457        "      uptime:   Jiffy counter from time of boot\n"
5458        "        perf:   Same clock that perf events use\n"
5459#ifdef CONFIG_X86_64
5460        "     x86-tsc:   TSC cycle counter\n"
5461#endif
5462        "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5463        "       delta:   Delta difference against a buffer-wide timestamp\n"
5464        "    absolute:   Absolute (standalone) timestamp\n"
5465        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5466        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5467        "  tracing_cpumask\t- Limit which CPUs to trace\n"
5468        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5469        "\t\t\t  Remove sub-buffer with rmdir\n"
5470        "  trace_options\t\t- Set format or modify how tracing happens\n"
5471        "\t\t\t  Disable an option by prefixing 'no' to the\n"
5472        "\t\t\t  option name\n"
5473        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5474#ifdef CONFIG_DYNAMIC_FTRACE
5475        "\n  available_filter_functions - list of functions that can be filtered on\n"
5476        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5477        "\t\t\t  functions\n"
5478        "\t     accepts: func_full_name or glob-matching-pattern\n"
5479        "\t     modules: Can select a group via module\n"
5480        "\t      Format: :mod:<module-name>\n"
5481        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5482        "\t    triggers: a command to perform when function is hit\n"
5483        "\t      Format: <function>:<trigger>[:count]\n"
5484        "\t     trigger: traceon, traceoff\n"
5485        "\t\t      enable_event:<system>:<event>\n"
5486        "\t\t      disable_event:<system>:<event>\n"
5487#ifdef CONFIG_STACKTRACE
5488        "\t\t      stacktrace\n"
5489#endif
5490#ifdef CONFIG_TRACER_SNAPSHOT
5491        "\t\t      snapshot\n"
5492#endif
5493        "\t\t      dump\n"
5494        "\t\t      cpudump\n"
5495        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5496        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5497        "\t     The first one will disable tracing every time do_fault is hit\n"
5498        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5499        "\t       The first time do trap is hit and it disables tracing, the\n"
5500        "\t       counter will decrement to 2. If tracing is already disabled,\n"
5501        "\t       the counter will not decrement. It only decrements when the\n"
5502        "\t       trigger did work\n"
5503        "\t     To remove trigger without count:\n"
5504        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5505        "\t     To remove trigger with a count:\n"
5506        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5507        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5508        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5509        "\t    modules: Can select a group via module command :mod:\n"
5510        "\t    Does not accept triggers\n"
5511#endif /* CONFIG_DYNAMIC_FTRACE */
5512#ifdef CONFIG_FUNCTION_TRACER
5513        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5514        "\t\t    (function)\n"
5515        "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5516        "\t\t    (function)\n"
5517#endif
5518#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5519        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5520        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5521        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5522#endif
5523#ifdef CONFIG_TRACER_SNAPSHOT
5524        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5525        "\t\t\t  snapshot buffer. Read the contents for more\n"
5526        "\t\t\t  information\n"
5527#endif
5528#ifdef CONFIG_STACK_TRACER
5529        "  stack_trace\t\t- Shows the max stack trace when active\n"
5530        "  stack_max_size\t- Shows current max stack size that was traced\n"
5531        "\t\t\t  Write into this file to reset the max size (trigger a\n"
5532        "\t\t\t  new trace)\n"
5533#ifdef CONFIG_DYNAMIC_FTRACE
5534        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5535        "\t\t\t  traces\n"
5536#endif
5537#endif /* CONFIG_STACK_TRACER */
5538#ifdef CONFIG_DYNAMIC_EVENTS
5539        "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5540        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5541#endif
5542#ifdef CONFIG_KPROBE_EVENTS
5543        "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5544        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5545#endif
5546#ifdef CONFIG_UPROBE_EVENTS
5547        "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5548        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5549#endif
5550#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5551        "\t  accepts: event-definitions (one definition per line)\n"
5552        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5553        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5554#ifdef CONFIG_HIST_TRIGGERS
5555        "\t           s:[synthetic/]<event> <field> [<field>]\n"
5556#endif
5557        "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5558        "\t           -:[<group>/]<event>\n"
5559#ifdef CONFIG_KPROBE_EVENTS
5560        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5561  "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5562#endif
5563#ifdef CONFIG_UPROBE_EVENTS
5564  "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5565#endif
5566        "\t     args: <name>=fetcharg[:type]\n"
5567        "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5568#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5569        "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5570#else
5571        "\t           $stack<index>, $stack, $retval, $comm,\n"
5572#endif
5573        "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5574        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5575        "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5576        "\t           <type>\\[<array-size>\\]\n"
5577#ifdef CONFIG_HIST_TRIGGERS
5578        "\t    field: <stype> <name>;\n"
5579        "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5580        "\t           [unsigned] char/int/long\n"
5581#endif
5582        "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5583        "\t            of the <attached-group>/<attached-event>.\n"
5584#endif
5585        "  events/\t\t- Directory containing all trace event subsystems:\n"
5586        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5587        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5588        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5589        "\t\t\t  events\n"
5590        "      filter\t\t- If set, only events passing filter are traced\n"
5591        "  events/<system>/<event>/\t- Directory containing control files for\n"
5592        "\t\t\t  <event>:\n"
5593        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5594        "      filter\t\t- If set, only events passing filter are traced\n"
5595        "      trigger\t\t- If set, a command to perform when event is hit\n"
5596        "\t    Format: <trigger>[:count][if <filter>]\n"
5597        "\t   trigger: traceon, traceoff\n"
5598        "\t            enable_event:<system>:<event>\n"
5599        "\t            disable_event:<system>:<event>\n"
5600#ifdef CONFIG_HIST_TRIGGERS
5601        "\t            enable_hist:<system>:<event>\n"
5602        "\t            disable_hist:<system>:<event>\n"
5603#endif
5604#ifdef CONFIG_STACKTRACE
5605        "\t\t    stacktrace\n"
5606#endif
5607#ifdef CONFIG_TRACER_SNAPSHOT
5608        "\t\t    snapshot\n"
5609#endif
5610#ifdef CONFIG_HIST_TRIGGERS
5611        "\t\t    hist (see below)\n"
5612#endif
5613        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5614        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5615        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5616        "\t                  events/block/block_unplug/trigger\n"
5617        "\t   The first disables tracing every time block_unplug is hit.\n"
5618        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5619        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5620        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5621        "\t   Like function triggers, the counter is only decremented if it\n"
5622        "\t    enabled or disabled tracing.\n"
5623        "\t   To remove a trigger without a count:\n"
5624        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5625        "\t   To remove a trigger with a count:\n"
5626        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5627        "\t   Filters can be ignored when removing a trigger.\n"
5628#ifdef CONFIG_HIST_TRIGGERS
5629        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5630        "\t    Format: hist:keys=<field1[,field2,...]>\n"
5631        "\t            [:values=<field1[,field2,...]>]\n"
5632        "\t            [:sort=<field1[,field2,...]>]\n"
5633        "\t            [:size=#entries]\n"
5634        "\t            [:pause][:continue][:clear]\n"
5635        "\t            [:name=histname1]\n"
5636        "\t            [:<handler>.<action>]\n"
5637        "\t            [if <filter>]\n\n"
5638        "\t    Note, special fields can be used as well:\n"
5639        "\t            common_timestamp - to record current timestamp\n"
5640        "\t            common_cpu - to record the CPU the event happened on\n"
5641        "\n"
5642        "\t    When a matching event is hit, an entry is added to a hash\n"
5643        "\t    table using the key(s) and value(s) named, and the value of a\n"
5644        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5645        "\t    correspond to fields in the event's format description.  Keys\n"
5646        "\t    can be any field, or the special string 'stacktrace'.\n"
5647        "\t    Compound keys consisting of up to two fields can be specified\n"
5648        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5649        "\t    fields.  Sort keys consisting of up to two fields can be\n"
5650        "\t    specified using the 'sort' keyword.  The sort direction can\n"
5651        "\t    be modified by appending '.descending' or '.ascending' to a\n"
5652        "\t    sort field.  The 'size' parameter can be used to specify more\n"
5653        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5654        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5655        "\t    its histogram data will be shared with other triggers of the\n"
5656        "\t    same name, and trigger hits will update this common data.\n\n"
5657        "\t    Reading the 'hist' file for the event will dump the hash\n"
5658        "\t    table in its entirety to stdout.  If there are multiple hist\n"
5659        "\t    triggers attached to an event, there will be a table for each\n"
5660        "\t    trigger in the output.  The table displayed for a named\n"
5661        "\t    trigger will be the same as any other instance having the\n"
5662        "\t    same name.  The default format used to display a given field\n"
5663        "\t    can be modified by appending any of the following modifiers\n"
5664        "\t    to the field name, as applicable:\n\n"
5665        "\t            .hex        display a number as a hex value\n"
5666        "\t            .sym        display an address as a symbol\n"
5667        "\t            .sym-offset display an address as a symbol and offset\n"
5668        "\t            .execname   display a common_pid as a program name\n"
5669        "\t            .syscall    display a syscall id as a syscall name\n"
5670        "\t            .log2       display log2 value rather than raw number\n"
5671        "\t            .buckets=size  display values in groups of size rather than raw number\n"
5672        "\t            .usecs      display a common_timestamp in microseconds\n\n"
5673        "\t    The 'pause' parameter can be used to pause an existing hist\n"
5674        "\t    trigger or to start a hist trigger but not log any events\n"
5675        "\t    until told to do so.  'continue' can be used to start or\n"
5676        "\t    restart a paused hist trigger.\n\n"
5677        "\t    The 'clear' parameter will clear the contents of a running\n"
5678        "\t    hist trigger and leave its current paused/active state\n"
5679        "\t    unchanged.\n\n"
5680        "\t    The enable_hist and disable_hist triggers can be used to\n"
5681        "\t    have one event conditionally start and stop another event's\n"
5682        "\t    already-attached hist trigger.  The syntax is analogous to\n"
5683        "\t    the enable_event and disable_event triggers.\n\n"
5684        "\t    Hist trigger handlers and actions are executed whenever a\n"
5685        "\t    a histogram entry is added or updated.  They take the form:\n\n"
5686        "\t        <handler>.<action>\n\n"
5687        "\t    The available handlers are:\n\n"
5688        "\t        onmatch(matching.event)  - invoke on addition or update\n"
5689        "\t        onmax(var)               - invoke if var exceeds current max\n"
5690        "\t        onchange(var)            - invoke action if var changes\n\n"
5691        "\t    The available actions are:\n\n"
5692        "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5693        "\t        save(field,...)                      - save current event fields\n"
5694#ifdef CONFIG_TRACER_SNAPSHOT
5695        "\t        snapshot()                           - snapshot the trace buffer\n\n"
5696#endif
5697#ifdef CONFIG_SYNTH_EVENTS
5698        "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5699        "\t  Write into this file to define/undefine new synthetic events.\n"
5700        "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5701#endif
5702#endif
5703;
5704
5705static ssize_t
5706tracing_readme_read(struct file *filp, char __user *ubuf,
5707                       size_t cnt, loff_t *ppos)
5708{
5709        return simple_read_from_buffer(ubuf, cnt, ppos,
5710                                        readme_msg, strlen(readme_msg));
5711}
5712
5713static const struct file_operations tracing_readme_fops = {
5714        .open           = tracing_open_generic,
5715        .read           = tracing_readme_read,
5716        .llseek         = generic_file_llseek,
5717};
5718
5719static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5720{
5721        int pid = ++(*pos);
5722
5723        return trace_find_tgid_ptr(pid);
5724}
5725
5726static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5727{
5728        int pid = *pos;
5729
5730        return trace_find_tgid_ptr(pid);
5731}
5732
5733static void saved_tgids_stop(struct seq_file *m, void *v)
5734{
5735}
5736
5737static int saved_tgids_show(struct seq_file *m, void *v)
5738{
5739        int *entry = (int *)v;
5740        int pid = entry - tgid_map;
5741        int tgid = *entry;
5742
5743        if (tgid == 0)
5744                return SEQ_SKIP;
5745
5746        seq_printf(m, "%d %d\n", pid, tgid);
5747        return 0;
5748}
5749
5750static const struct seq_operations tracing_saved_tgids_seq_ops = {
5751        .start          = saved_tgids_start,
5752        .stop           = saved_tgids_stop,
5753        .next           = saved_tgids_next,
5754        .show           = saved_tgids_show,
5755};
5756
5757static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5758{
5759        int ret;
5760
5761        ret = tracing_check_open_get_tr(NULL);
5762        if (ret)
5763                return ret;
5764
5765        return seq_open(filp, &tracing_saved_tgids_seq_ops);
5766}
5767
5768
5769static const struct file_operations tracing_saved_tgids_fops = {
5770        .open           = tracing_saved_tgids_open,
5771        .read           = seq_read,
5772        .llseek         = seq_lseek,
5773        .release        = seq_release,
5774};
5775
5776static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5777{
5778        unsigned int *ptr = v;
5779
5780        if (*pos || m->count)
5781                ptr++;
5782
5783        (*pos)++;
5784
5785        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5786             ptr++) {
5787                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5788                        continue;
5789
5790                return ptr;
5791        }
5792
5793        return NULL;
5794}
5795
5796static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5797{
5798        void *v;
5799        loff_t l = 0;
5800
5801        preempt_disable();
5802        arch_spin_lock(&trace_cmdline_lock);
5803
5804        v = &savedcmd->map_cmdline_to_pid[0];
5805        while (l <= *pos) {
5806                v = saved_cmdlines_next(m, v, &l);
5807                if (!v)
5808                        return NULL;
5809        }
5810
5811        return v;
5812}
5813
5814static void saved_cmdlines_stop(struct seq_file *m, void *v)
5815{
5816        arch_spin_unlock(&trace_cmdline_lock);
5817        preempt_enable();
5818}
5819
5820static int saved_cmdlines_show(struct seq_file *m, void *v)
5821{
5822        char buf[TASK_COMM_LEN];
5823        unsigned int *pid = v;
5824
5825        __trace_find_cmdline(*pid, buf);
5826        seq_printf(m, "%d %s\n", *pid, buf);
5827        return 0;
5828}
5829
5830static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5831        .start          = saved_cmdlines_start,
5832        .next           = saved_cmdlines_next,
5833        .stop           = saved_cmdlines_stop,
5834        .show           = saved_cmdlines_show,
5835};
5836
5837static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5838{
5839        int ret;
5840
5841        ret = tracing_check_open_get_tr(NULL);
5842        if (ret)
5843                return ret;
5844
5845        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5846}
5847
5848static const struct file_operations tracing_saved_cmdlines_fops = {
5849        .open           = tracing_saved_cmdlines_open,
5850        .read           = seq_read,
5851        .llseek         = seq_lseek,
5852        .release        = seq_release,
5853};
5854
5855static ssize_t
5856tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5857                                 size_t cnt, loff_t *ppos)
5858{
5859        char buf[64];
5860        int r;
5861
5862        arch_spin_lock(&trace_cmdline_lock);
5863        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5864        arch_spin_unlock(&trace_cmdline_lock);
5865
5866        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5867}
5868
5869static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5870{
5871        kfree(s->saved_cmdlines);
5872        kfree(s->map_cmdline_to_pid);
5873        kfree(s);
5874}
5875
5876static int tracing_resize_saved_cmdlines(unsigned int val)
5877{
5878        struct saved_cmdlines_buffer *s, *savedcmd_temp;
5879
5880        s = kmalloc(sizeof(*s), GFP_KERNEL);
5881        if (!s)
5882                return -ENOMEM;
5883
5884        if (allocate_cmdlines_buffer(val, s) < 0) {
5885                kfree(s);
5886                return -ENOMEM;
5887        }
5888
5889        arch_spin_lock(&trace_cmdline_lock);
5890        savedcmd_temp = savedcmd;
5891        savedcmd = s;
5892        arch_spin_unlock(&trace_cmdline_lock);
5893        free_saved_cmdlines_buffer(savedcmd_temp);
5894
5895        return 0;
5896}
5897
5898static ssize_t
5899tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5900                                  size_t cnt, loff_t *ppos)
5901{
5902        unsigned long val;
5903        int ret;
5904
5905        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5906        if (ret)
5907                return ret;
5908
5909        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5910        if (!val || val > PID_MAX_DEFAULT)
5911                return -EINVAL;
5912
5913        ret = tracing_resize_saved_cmdlines((unsigned int)val);
5914        if (ret < 0)
5915                return ret;
5916
5917        *ppos += cnt;
5918
5919        return cnt;
5920}
5921
5922static const struct file_operations tracing_saved_cmdlines_size_fops = {
5923        .open           = tracing_open_generic,
5924        .read           = tracing_saved_cmdlines_size_read,
5925        .write          = tracing_saved_cmdlines_size_write,
5926};
5927
5928#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5929static union trace_eval_map_item *
5930update_eval_map(union trace_eval_map_item *ptr)
5931{
5932        if (!ptr->map.eval_string) {
5933                if (ptr->tail.next) {
5934                        ptr = ptr->tail.next;
5935                        /* Set ptr to the next real item (skip head) */
5936                        ptr++;
5937                } else
5938                        return NULL;
5939        }
5940        return ptr;
5941}
5942
5943static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5944{
5945        union trace_eval_map_item *ptr = v;
5946
5947        /*
5948         * Paranoid! If ptr points to end, we don't want to increment past it.
5949         * This really should never happen.
5950         */
5951        (*pos)++;
5952        ptr = update_eval_map(ptr);
5953        if (WARN_ON_ONCE(!ptr))
5954                return NULL;
5955
5956        ptr++;
5957        ptr = update_eval_map(ptr);
5958
5959        return ptr;
5960}
5961
5962static void *eval_map_start(struct seq_file *m, loff_t *pos)
5963{
5964        union trace_eval_map_item *v;
5965        loff_t l = 0;
5966
5967        mutex_lock(&trace_eval_mutex);
5968
5969        v = trace_eval_maps;
5970        if (v)
5971                v++;
5972
5973        while (v && l < *pos) {
5974                v = eval_map_next(m, v, &l);
5975        }
5976
5977        return v;
5978}
5979
5980static void eval_map_stop(struct seq_file *m, void *v)
5981{
5982        mutex_unlock(&trace_eval_mutex);
5983}
5984
5985static int eval_map_show(struct seq_file *m, void *v)
5986{
5987        union trace_eval_map_item *ptr = v;
5988
5989        seq_printf(m, "%s %ld (%s)\n",
5990                   ptr->map.eval_string, ptr->map.eval_value,
5991                   ptr->map.system);
5992
5993        return 0;
5994}
5995
5996static const struct seq_operations tracing_eval_map_seq_ops = {
5997        .start          = eval_map_start,
5998        .next           = eval_map_next,
5999        .stop           = eval_map_stop,
6000        .show           = eval_map_show,
6001};
6002
6003static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6004{
6005        int ret;
6006
6007        ret = tracing_check_open_get_tr(NULL);
6008        if (ret)
6009                return ret;
6010
6011        return seq_open(filp, &tracing_eval_map_seq_ops);
6012}
6013
6014static const struct file_operations tracing_eval_map_fops = {
6015        .open           = tracing_eval_map_open,
6016        .read           = seq_read,
6017        .llseek         = seq_lseek,
6018        .release        = seq_release,
6019};
6020
6021static inline union trace_eval_map_item *
6022trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6023{
6024        /* Return tail of array given the head */
6025        return ptr + ptr->head.length + 1;
6026}
6027
6028static void
6029trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6030                           int len)
6031{
6032        struct trace_eval_map **stop;
6033        struct trace_eval_map **map;
6034        union trace_eval_map_item *map_array;
6035        union trace_eval_map_item *ptr;
6036
6037        stop = start + len;
6038
6039        /*
6040         * The trace_eval_maps contains the map plus a head and tail item,
6041         * where the head holds the module and length of array, and the
6042         * tail holds a pointer to the next list.
6043         */
6044        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6045        if (!map_array) {
6046                pr_warn("Unable to allocate trace eval mapping\n");
6047                return;
6048        }
6049
6050        mutex_lock(&trace_eval_mutex);
6051
6052        if (!trace_eval_maps)
6053                trace_eval_maps = map_array;
6054        else {
6055                ptr = trace_eval_maps;
6056                for (;;) {
6057                        ptr = trace_eval_jmp_to_tail(ptr);
6058                        if (!ptr->tail.next)
6059                                break;
6060                        ptr = ptr->tail.next;
6061
6062                }
6063                ptr->tail.next = map_array;
6064        }
6065        map_array->head.mod = mod;
6066        map_array->head.length = len;
6067        map_array++;
6068
6069        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6070                map_array->map = **map;
6071                map_array++;
6072        }
6073        memset(map_array, 0, sizeof(*map_array));
6074
6075        mutex_unlock(&trace_eval_mutex);
6076}
6077
6078static void trace_create_eval_file(struct dentry *d_tracer)
6079{
6080        trace_create_file("eval_map", 0444, d_tracer,
6081                          NULL, &tracing_eval_map_fops);
6082}
6083
6084#else /* CONFIG_TRACE_EVAL_MAP_FILE */
6085static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6086static inline void trace_insert_eval_map_file(struct module *mod,
6087                              struct trace_eval_map **start, int len) { }
6088#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6089
6090static void trace_insert_eval_map(struct module *mod,
6091                                  struct trace_eval_map **start, int len)
6092{
6093        struct trace_eval_map **map;
6094
6095        if (len <= 0)
6096                return;
6097
6098        map = start;
6099
6100        trace_event_eval_update(map, len);
6101
6102        trace_insert_eval_map_file(mod, start, len);
6103}
6104
6105static ssize_t
6106tracing_set_trace_read(struct file *filp, char __user *ubuf,
6107                       size_t cnt, loff_t *ppos)
6108{
6109        struct trace_array *tr = filp->private_data;
6110        char buf[MAX_TRACER_SIZE+2];
6111        int r;
6112
6113        mutex_lock(&trace_types_lock);
6114        r = sprintf(buf, "%s\n", tr->current_trace->name);
6115        mutex_unlock(&trace_types_lock);
6116
6117        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118}
6119
6120int tracer_init(struct tracer *t, struct trace_array *tr)
6121{
6122        tracing_reset_online_cpus(&tr->array_buffer);
6123        return t->init(tr);
6124}
6125
6126static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6127{
6128        int cpu;
6129
6130        for_each_tracing_cpu(cpu)
6131                per_cpu_ptr(buf->data, cpu)->entries = val;
6132}
6133
6134#ifdef CONFIG_TRACER_MAX_TRACE
6135/* resize @tr's buffer to the size of @size_tr's entries */
6136static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6137                                        struct array_buffer *size_buf, int cpu_id)
6138{
6139        int cpu, ret = 0;
6140
6141        if (cpu_id == RING_BUFFER_ALL_CPUS) {
6142                for_each_tracing_cpu(cpu) {
6143                        ret = ring_buffer_resize(trace_buf->buffer,
6144                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6145                        if (ret < 0)
6146                                break;
6147                        per_cpu_ptr(trace_buf->data, cpu)->entries =
6148                                per_cpu_ptr(size_buf->data, cpu)->entries;
6149                }
6150        } else {
6151                ret = ring_buffer_resize(trace_buf->buffer,
6152                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6153                if (ret == 0)
6154                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6155                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
6156        }
6157
6158        return ret;
6159}
6160#endif /* CONFIG_TRACER_MAX_TRACE */
6161
6162static int __tracing_resize_ring_buffer(struct trace_array *tr,
6163                                        unsigned long size, int cpu)
6164{
6165        int ret;
6166
6167        /*
6168         * If kernel or user changes the size of the ring buffer
6169         * we use the size that was given, and we can forget about
6170         * expanding it later.
6171         */
6172        ring_buffer_expanded = true;
6173
6174        /* May be called before buffers are initialized */
6175        if (!tr->array_buffer.buffer)
6176                return 0;
6177
6178        ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6179        if (ret < 0)
6180                return ret;
6181
6182#ifdef CONFIG_TRACER_MAX_TRACE
6183        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6184            !tr->current_trace->use_max_tr)
6185                goto out;
6186
6187        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6188        if (ret < 0) {
6189                int r = resize_buffer_duplicate_size(&tr->array_buffer,
6190                                                     &tr->array_buffer, cpu);
6191                if (r < 0) {
6192                        /*
6193                         * AARGH! We are left with different
6194                         * size max buffer!!!!
6195                         * The max buffer is our "snapshot" buffer.
6196                         * When a tracer needs a snapshot (one of the
6197                         * latency tracers), it swaps the max buffer
6198                         * with the saved snap shot. We succeeded to
6199                         * update the size of the main buffer, but failed to
6200                         * update the size of the max buffer. But when we tried
6201                         * to reset the main buffer to the original size, we
6202                         * failed there too. This is very unlikely to
6203                         * happen, but if it does, warn and kill all
6204                         * tracing.
6205                         */
6206                        WARN_ON(1);
6207                        tracing_disabled = 1;
6208                }
6209                return ret;
6210        }
6211
6212        if (cpu == RING_BUFFER_ALL_CPUS)
6213                set_buffer_entries(&tr->max_buffer, size);
6214        else
6215                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6216
6217 out:
6218#endif /* CONFIG_TRACER_MAX_TRACE */
6219
6220        if (cpu == RING_BUFFER_ALL_CPUS)
6221                set_buffer_entries(&tr->array_buffer, size);
6222        else
6223                per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6224
6225        return ret;
6226}
6227
6228ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6229                                  unsigned long size, int cpu_id)
6230{
6231        int ret;
6232
6233        mutex_lock(&trace_types_lock);
6234
6235        if (cpu_id != RING_BUFFER_ALL_CPUS) {
6236                /* make sure, this cpu is enabled in the mask */
6237                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6238                        ret = -EINVAL;
6239                        goto out;
6240                }
6241        }
6242
6243        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6244        if (ret < 0)
6245                ret = -ENOMEM;
6246
6247out:
6248        mutex_unlock(&trace_types_lock);
6249
6250        return ret;
6251}
6252
6253
6254/**
6255 * tracing_update_buffers - used by tracing facility to expand ring buffers
6256 *
6257 * To save on memory when the tracing is never used on a system with it
6258 * configured in. The ring buffers are set to a minimum size. But once
6259 * a user starts to use the tracing facility, then they need to grow
6260 * to their default size.
6261 *
6262 * This function is to be called when a tracer is about to be used.
6263 */
6264int tracing_update_buffers(void)
6265{
6266        int ret = 0;
6267
6268        mutex_lock(&trace_types_lock);
6269        if (!ring_buffer_expanded)
6270                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6271                                                RING_BUFFER_ALL_CPUS);
6272        mutex_unlock(&trace_types_lock);
6273
6274        return ret;
6275}
6276
6277struct trace_option_dentry;
6278
6279static void
6280create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6281
6282/*
6283 * Used to clear out the tracer before deletion of an instance.
6284 * Must have trace_types_lock held.
6285 */
6286static void tracing_set_nop(struct trace_array *tr)
6287{
6288        if (tr->current_trace == &nop_trace)
6289                return;
6290        
6291        tr->current_trace->enabled--;
6292
6293        if (tr->current_trace->reset)
6294                tr->current_trace->reset(tr);
6295
6296        tr->current_trace = &nop_trace;
6297}
6298
6299static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6300{
6301        /* Only enable if the directory has been created already. */
6302        if (!tr->dir)
6303                return;
6304
6305        create_trace_option_files(tr, t);
6306}
6307
6308int tracing_set_tracer(struct trace_array *tr, const char *buf)
6309{
6310        struct tracer *t;
6311#ifdef CONFIG_TRACER_MAX_TRACE
6312        bool had_max_tr;
6313#endif
6314        int ret = 0;
6315
6316        mutex_lock(&trace_types_lock);
6317
6318        if (!ring_buffer_expanded) {
6319                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6320                                                RING_BUFFER_ALL_CPUS);
6321                if (ret < 0)
6322                        goto out;
6323                ret = 0;
6324        }
6325
6326        for (t = trace_types; t; t = t->next) {
6327                if (strcmp(t->name, buf) == 0)
6328                        break;
6329        }
6330        if (!t) {
6331                ret = -EINVAL;
6332                goto out;
6333        }
6334        if (t == tr->current_trace)
6335                goto out;
6336
6337#ifdef CONFIG_TRACER_SNAPSHOT
6338        if (t->use_max_tr) {
6339                arch_spin_lock(&tr->max_lock);
6340                if (tr->cond_snapshot)
6341                        ret = -EBUSY;
6342                arch_spin_unlock(&tr->max_lock);
6343                if (ret)
6344                        goto out;
6345        }
6346#endif
6347        /* Some tracers won't work on kernel command line */
6348        if (system_state < SYSTEM_RUNNING && t->noboot) {
6349                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6350                        t->name);
6351                goto out;
6352        }
6353
6354        /* Some tracers are only allowed for the top level buffer */
6355        if (!trace_ok_for_array(t, tr)) {
6356                ret = -EINVAL;
6357                goto out;
6358        }
6359
6360        /* If trace pipe files are being read, we can't change the tracer */
6361        if (tr->trace_ref) {
6362                ret = -EBUSY;
6363                goto out;
6364        }
6365
6366        trace_branch_disable();
6367
6368        tr->current_trace->enabled--;
6369
6370        if (tr->current_trace->reset)
6371                tr->current_trace->reset(tr);
6372
6373        /* Current trace needs to be nop_trace before synchronize_rcu */
6374        tr->current_trace = &nop_trace;
6375
6376#ifdef CONFIG_TRACER_MAX_TRACE
6377        had_max_tr = tr->allocated_snapshot;
6378
6379        if (had_max_tr && !t->use_max_tr) {
6380                /*
6381                 * We need to make sure that the update_max_tr sees that
6382                 * current_trace changed to nop_trace to keep it from
6383                 * swapping the buffers after we resize it.
6384                 * The update_max_tr is called from interrupts disabled
6385                 * so a synchronized_sched() is sufficient.
6386                 */
6387                synchronize_rcu();
6388                free_snapshot(tr);
6389        }
6390#endif
6391
6392#ifdef CONFIG_TRACER_MAX_TRACE
6393        if (t->use_max_tr && !had_max_tr) {
6394                ret = tracing_alloc_snapshot_instance(tr);
6395                if (ret < 0)
6396                        goto out;
6397        }
6398#endif
6399
6400        if (t->init) {
6401                ret = tracer_init(t, tr);
6402                if (ret)
6403                        goto out;
6404        }
6405
6406        tr->current_trace = t;
6407        tr->current_trace->enabled++;
6408        trace_branch_enable(tr);
6409 out:
6410        mutex_unlock(&trace_types_lock);
6411
6412        return ret;
6413}
6414
6415static ssize_t
6416tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6417                        size_t cnt, loff_t *ppos)
6418{
6419        struct trace_array *tr = filp->private_data;
6420        char buf[MAX_TRACER_SIZE+1];
6421        int i;
6422        size_t ret;
6423        int err;
6424
6425        ret = cnt;
6426
6427        if (cnt > MAX_TRACER_SIZE)
6428                cnt = MAX_TRACER_SIZE;
6429
6430        if (copy_from_user(buf, ubuf, cnt))
6431                return -EFAULT;
6432
6433        buf[cnt] = 0;
6434
6435        /* strip ending whitespace. */
6436        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6437                buf[i] = 0;
6438
6439        err = tracing_set_tracer(tr, buf);
6440        if (err)
6441                return err;
6442
6443        *ppos += ret;
6444
6445        return ret;
6446}
6447
6448static ssize_t
6449tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6450                   size_t cnt, loff_t *ppos)
6451{
6452        char buf[64];
6453        int r;
6454
6455        r = snprintf(buf, sizeof(buf), "%ld\n",
6456                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6457        if (r > sizeof(buf))
6458                r = sizeof(buf);
6459        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6460}
6461
6462static ssize_t
6463tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6464                    size_t cnt, loff_t *ppos)
6465{
6466        unsigned long val;
6467        int ret;
6468
6469        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6470        if (ret)
6471                return ret;
6472
6473        *ptr = val * 1000;
6474
6475        return cnt;
6476}
6477
6478static ssize_t
6479tracing_thresh_read(struct file *filp, char __user *ubuf,
6480                    size_t cnt, loff_t *ppos)
6481{
6482        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6483}
6484
6485static ssize_t
6486tracing_thresh_write(struct file *filp, const char __user *ubuf,
6487                     size_t cnt, loff_t *ppos)
6488{
6489        struct trace_array *tr = filp->private_data;
6490        int ret;
6491
6492        mutex_lock(&trace_types_lock);
6493        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6494        if (ret < 0)
6495                goto out;
6496
6497        if (tr->current_trace->update_thresh) {
6498                ret = tr->current_trace->update_thresh(tr);
6499                if (ret < 0)
6500                        goto out;
6501        }
6502
6503        ret = cnt;
6504out:
6505        mutex_unlock(&trace_types_lock);
6506
6507        return ret;
6508}
6509
6510#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511
6512static ssize_t
6513tracing_max_lat_read(struct file *filp, char __user *ubuf,
6514                     size_t cnt, loff_t *ppos)
6515{
6516        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6517}
6518
6519static ssize_t
6520tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521                      size_t cnt, loff_t *ppos)
6522{
6523        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6524}
6525
6526#endif
6527
6528static int tracing_open_pipe(struct inode *inode, struct file *filp)
6529{
6530        struct trace_array *tr = inode->i_private;
6531        struct trace_iterator *iter;
6532        int ret;
6533
6534        ret = tracing_check_open_get_tr(tr);
6535        if (ret)
6536                return ret;
6537
6538        mutex_lock(&trace_types_lock);
6539
6540        /* create a buffer to store the information to pass to userspace */
6541        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6542        if (!iter) {
6543                ret = -ENOMEM;
6544                __trace_array_put(tr);
6545                goto out;
6546        }
6547
6548        trace_seq_init(&iter->seq);
6549        iter->trace = tr->current_trace;
6550
6551        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6552                ret = -ENOMEM;
6553                goto fail;
6554        }
6555
6556        /* trace pipe does not show start of buffer */
6557        cpumask_setall(iter->started);
6558
6559        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6560                iter->iter_flags |= TRACE_FILE_LAT_FMT;
6561
6562        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6563        if (trace_clocks[tr->clock_id].in_ns)
6564                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6565
6566        iter->tr = tr;
6567        iter->array_buffer = &tr->array_buffer;
6568        iter->cpu_file = tracing_get_cpu(inode);
6569        mutex_init(&iter->mutex);
6570        filp->private_data = iter;
6571
6572        if (iter->trace->pipe_open)
6573                iter->trace->pipe_open(iter);
6574
6575        nonseekable_open(inode, filp);
6576
6577        tr->trace_ref++;
6578out:
6579        mutex_unlock(&trace_types_lock);
6580        return ret;
6581
6582fail:
6583        kfree(iter);
6584        __trace_array_put(tr);
6585        mutex_unlock(&trace_types_lock);
6586        return ret;
6587}
6588
6589static int tracing_release_pipe(struct inode *inode, struct file *file)
6590{
6591        struct trace_iterator *iter = file->private_data;
6592        struct trace_array *tr = inode->i_private;
6593
6594        mutex_lock(&trace_types_lock);
6595
6596        tr->trace_ref--;
6597
6598        if (iter->trace->pipe_close)
6599                iter->trace->pipe_close(iter);
6600
6601        mutex_unlock(&trace_types_lock);
6602
6603        free_cpumask_var(iter->started);
6604        mutex_destroy(&iter->mutex);
6605        kfree(iter);
6606
6607        trace_array_put(tr);
6608
6609        return 0;
6610}
6611
6612static __poll_t
6613trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6614{
6615        struct trace_array *tr = iter->tr;
6616
6617        /* Iterators are static, they should be filled or empty */
6618        if (trace_buffer_iter(iter, iter->cpu_file))
6619                return EPOLLIN | EPOLLRDNORM;
6620
6621        if (tr->trace_flags & TRACE_ITER_BLOCK)
6622                /*
6623                 * Always select as readable when in blocking mode
6624                 */
6625                return EPOLLIN | EPOLLRDNORM;
6626        else
6627                return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6628                                             filp, poll_table);
6629}
6630
6631static __poll_t
6632tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6633{
6634        struct trace_iterator *iter = filp->private_data;
6635
6636        return trace_poll(iter, filp, poll_table);
6637}
6638
6639/* Must be called with iter->mutex held. */
6640static int tracing_wait_pipe(struct file *filp)
6641{
6642        struct trace_iterator *iter = filp->private_data;
6643        int ret;
6644
6645        while (trace_empty(iter)) {
6646
6647                if ((filp->f_flags & O_NONBLOCK)) {
6648                        return -EAGAIN;
6649                }
6650
6651                /*
6652                 * We block until we read something and tracing is disabled.
6653                 * We still block if tracing is disabled, but we have never
6654                 * read anything. This allows a user to cat this file, and
6655                 * then enable tracing. But after we have read something,
6656                 * we give an EOF when tracing is again disabled.
6657                 *
6658                 * iter->pos will be 0 if we haven't read anything.
6659                 */
6660                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6661                        break;
6662
6663                mutex_unlock(&iter->mutex);
6664
6665                ret = wait_on_pipe(iter, 0);
6666
6667                mutex_lock(&iter->mutex);
6668
6669                if (ret)
6670                        return ret;
6671        }
6672
6673        return 1;
6674}
6675
6676/*
6677 * Consumer reader.
6678 */
6679static ssize_t
6680tracing_read_pipe(struct file *filp, char __user *ubuf,
6681                  size_t cnt, loff_t *ppos)
6682{
6683        struct trace_iterator *iter = filp->private_data;
6684        ssize_t sret;
6685
6686        /*
6687         * Avoid more than one consumer on a single file descriptor
6688         * This is just a matter of traces coherency, the ring buffer itself
6689         * is protected.
6690         */
6691        mutex_lock(&iter->mutex);
6692
6693        /* return any leftover data */
6694        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6695        if (sret != -EBUSY)
6696                goto out;
6697
6698        trace_seq_init(&iter->seq);
6699
6700        if (iter->trace->read) {
6701                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6702                if (sret)
6703                        goto out;
6704        }
6705
6706waitagain:
6707        sret = tracing_wait_pipe(filp);
6708        if (sret <= 0)
6709                goto out;
6710
6711        /* stop when tracing is finished */
6712        if (trace_empty(iter)) {
6713                sret = 0;
6714                goto out;
6715        }
6716
6717        if (cnt >= PAGE_SIZE)
6718                cnt = PAGE_SIZE - 1;
6719
6720        /* reset all but tr, trace, and overruns */
6721        memset(&iter->seq, 0,
6722               sizeof(struct trace_iterator) -
6723               offsetof(struct trace_iterator, seq));
6724        cpumask_clear(iter->started);
6725        trace_seq_init(&iter->seq);
6726        iter->pos = -1;
6727
6728        trace_event_read_lock();
6729        trace_access_lock(iter->cpu_file);
6730        while (trace_find_next_entry_inc(iter) != NULL) {
6731                enum print_line_t ret;
6732                int save_len = iter->seq.seq.len;
6733
6734                ret = print_trace_line(iter);
6735                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6736                        /* don't print partial lines */
6737                        iter->seq.seq.len = save_len;
6738                        break;
6739                }
6740                if (ret != TRACE_TYPE_NO_CONSUME)
6741                        trace_consume(iter);
6742
6743                if (trace_seq_used(&iter->seq) >= cnt)
6744                        break;
6745
6746                /*
6747                 * Setting the full flag means we reached the trace_seq buffer
6748                 * size and we should leave by partial output condition above.
6749                 * One of the trace_seq_* functions is not used properly.
6750                 */
6751                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6752                          iter->ent->type);
6753        }
6754        trace_access_unlock(iter->cpu_file);
6755        trace_event_read_unlock();
6756
6757        /* Now copy what we have to the user */
6758        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6759        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6760                trace_seq_init(&iter->seq);
6761
6762        /*
6763         * If there was nothing to send to user, in spite of consuming trace
6764         * entries, go back to wait for more entries.
6765         */
6766        if (sret == -EBUSY)
6767                goto waitagain;
6768
6769out:
6770        mutex_unlock(&iter->mutex);
6771
6772        return sret;
6773}
6774
6775static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6776                                     unsigned int idx)
6777{
6778        __free_page(spd->pages[idx]);
6779}
6780
6781static size_t
6782tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6783{
6784        size_t count;
6785        int save_len;
6786        int ret;
6787
6788        /* Seq buffer is page-sized, exactly what we need. */
6789        for (;;) {
6790                save_len = iter->seq.seq.len;
6791                ret = print_trace_line(iter);
6792
6793                if (trace_seq_has_overflowed(&iter->seq)) {
6794                        iter->seq.seq.len = save_len;
6795                        break;
6796                }
6797
6798                /*
6799                 * This should not be hit, because it should only
6800                 * be set if the iter->seq overflowed. But check it
6801                 * anyway to be safe.
6802                 */
6803                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6804                        iter->seq.seq.len = save_len;
6805                        break;
6806                }
6807
6808                count = trace_seq_used(&iter->seq) - save_len;
6809                if (rem < count) {
6810                        rem = 0;
6811                        iter->seq.seq.len = save_len;
6812                        break;
6813                }
6814
6815                if (ret != TRACE_TYPE_NO_CONSUME)
6816                        trace_consume(iter);
6817                rem -= count;
6818                if (!trace_find_next_entry_inc(iter))   {
6819                        rem = 0;
6820                        iter->ent = NULL;
6821                        break;
6822                }
6823        }
6824
6825        return rem;
6826}
6827
6828static ssize_t tracing_splice_read_pipe(struct file *filp,
6829                                        loff_t *ppos,
6830                                        struct pipe_inode_info *pipe,
6831                                        size_t len,
6832                                        unsigned int flags)
6833{
6834        struct page *pages_def[PIPE_DEF_BUFFERS];
6835        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6836        struct trace_iterator *iter = filp->private_data;
6837        struct splice_pipe_desc spd = {
6838                .pages          = pages_def,
6839                .partial        = partial_def,
6840                .nr_pages       = 0, /* This gets updated below. */
6841                .nr_pages_max   = PIPE_DEF_BUFFERS,
6842                .ops            = &default_pipe_buf_ops,
6843                .spd_release    = tracing_spd_release_pipe,
6844        };
6845        ssize_t ret;
6846        size_t rem;
6847        unsigned int i;
6848
6849        if (splice_grow_spd(pipe, &spd))
6850                return -ENOMEM;
6851
6852        mutex_lock(&iter->mutex);
6853
6854        if (iter->trace->splice_read) {
6855                ret = iter->trace->splice_read(iter, filp,
6856                                               ppos, pipe, len, flags);
6857                if (ret)
6858                        goto out_err;
6859        }
6860
6861        ret = tracing_wait_pipe(filp);
6862        if (ret <= 0)
6863                goto out_err;
6864
6865        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6866                ret = -EFAULT;
6867                goto out_err;
6868        }
6869
6870        trace_event_read_lock();
6871        trace_access_lock(iter->cpu_file);
6872
6873        /* Fill as many pages as possible. */
6874        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6875                spd.pages[i] = alloc_page(GFP_KERNEL);
6876                if (!spd.pages[i])
6877                        break;
6878
6879                rem = tracing_fill_pipe_page(rem, iter);
6880
6881                /* Copy the data into the page, so we can start over. */
6882                ret = trace_seq_to_buffer(&iter->seq,
6883                                          page_address(spd.pages[i]),
6884                                          trace_seq_used(&iter->seq));
6885                if (ret < 0) {
6886                        __free_page(spd.pages[i]);
6887                        break;
6888                }
6889                spd.partial[i].offset = 0;
6890                spd.partial[i].len = trace_seq_used(&iter->seq);
6891
6892                trace_seq_init(&iter->seq);
6893        }
6894
6895        trace_access_unlock(iter->cpu_file);
6896        trace_event_read_unlock();
6897        mutex_unlock(&iter->mutex);
6898
6899        spd.nr_pages = i;
6900
6901        if (i)
6902                ret = splice_to_pipe(pipe, &spd);
6903        else
6904                ret = 0;
6905out:
6906        splice_shrink_spd(&spd);
6907        return ret;
6908
6909out_err:
6910        mutex_unlock(&iter->mutex);
6911        goto out;
6912}
6913
6914static ssize_t
6915tracing_entries_read(struct file *filp, char __user *ubuf,
6916                     size_t cnt, loff_t *ppos)
6917{
6918        struct inode *inode = file_inode(filp);
6919        struct trace_array *tr = inode->i_private;
6920        int cpu = tracing_get_cpu(inode);
6921        char buf[64];
6922        int r = 0;
6923        ssize_t ret;
6924
6925        mutex_lock(&trace_types_lock);
6926
6927        if (cpu == RING_BUFFER_ALL_CPUS) {
6928                int cpu, buf_size_same;
6929                unsigned long size;
6930
6931                size = 0;
6932                buf_size_same = 1;
6933                /* check if all cpu sizes are same */
6934                for_each_tracing_cpu(cpu) {
6935                        /* fill in the size from first enabled cpu */
6936                        if (size == 0)
6937                                size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6938                        if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6939                                buf_size_same = 0;
6940                                break;
6941                        }
6942                }
6943
6944                if (buf_size_same) {
6945                        if (!ring_buffer_expanded)
6946                                r = sprintf(buf, "%lu (expanded: %lu)\n",
6947                                            size >> 10,
6948                                            trace_buf_size >> 10);
6949                        else
6950                                r = sprintf(buf, "%lu\n", size >> 10);
6951                } else
6952                        r = sprintf(buf, "X\n");
6953        } else
6954                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6955
6956        mutex_unlock(&trace_types_lock);
6957
6958        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6959        return ret;
6960}
6961
6962static ssize_t
6963tracing_entries_write(struct file *filp, const char __user *ubuf,
6964                      size_t cnt, loff_t *ppos)
6965{
6966        struct inode *inode = file_inode(filp);
6967        struct trace_array *tr = inode->i_private;
6968        unsigned long val;
6969        int ret;
6970
6971        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972        if (ret)
6973                return ret;
6974
6975        /* must have at least 1 entry */
6976        if (!val)
6977                return -EINVAL;
6978
6979        /* value is in KB */
6980        val <<= 10;
6981        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6982        if (ret < 0)
6983                return ret;
6984
6985        *ppos += cnt;
6986
6987        return cnt;
6988}
6989
6990static ssize_t
6991tracing_total_entries_read(struct file *filp, char __user *ubuf,
6992                                size_t cnt, loff_t *ppos)
6993{
6994        struct trace_array *tr = filp->private_data;
6995        char buf[64];
6996        int r, cpu;
6997        unsigned long size = 0, expanded_size = 0;
6998
6999        mutex_lock(&trace_types_lock);
7000        for_each_tracing_cpu(cpu) {
7001                size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7002                if (!ring_buffer_expanded)
7003                        expanded_size += trace_buf_size >> 10;
7004        }
7005        if (ring_buffer_expanded)
7006                r = sprintf(buf, "%lu\n", size);
7007        else
7008                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7009        mutex_unlock(&trace_types_lock);
7010
7011        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012}
7013
7014static ssize_t
7015tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7016                          size_t cnt, loff_t *ppos)
7017{
7018        /*
7019         * There is no need to read what the user has written, this function
7020         * is just to make sure that there is no error when "echo" is used
7021         */
7022
7023        *ppos += cnt;
7024
7025        return cnt;
7026}
7027
7028static int
7029tracing_free_buffer_release(struct inode *inode, struct file *filp)
7030{
7031        struct trace_array *tr = inode->i_private;
7032
7033        /* disable tracing ? */
7034        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7035                tracer_tracing_off(tr);
7036        /* resize the ring buffer to 0 */
7037        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7038
7039        trace_array_put(tr);
7040
7041        return 0;
7042}
7043
7044static ssize_t
7045tracing_mark_write(struct file *filp, const char __user *ubuf,
7046                                        size_t cnt, loff_t *fpos)
7047{
7048        struct trace_array *tr = filp->private_data;
7049        struct ring_buffer_event *event;
7050        enum event_trigger_type tt = ETT_NONE;
7051        struct trace_buffer *buffer;
7052        struct print_entry *entry;
7053        ssize_t written;
7054        int size;
7055        int len;
7056
7057/* Used in tracing_mark_raw_write() as well */
7058#define FAULTED_STR "<faulted>"
7059#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7060
7061        if (tracing_disabled)
7062                return -EINVAL;
7063
7064        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7065                return -EINVAL;
7066
7067        if (cnt > TRACE_BUF_SIZE)
7068                cnt = TRACE_BUF_SIZE;
7069
7070        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7071
7072        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7073
7074        /* If less than "<faulted>", then make sure we can still add that */
7075        if (cnt < FAULTED_SIZE)
7076                size += FAULTED_SIZE - cnt;
7077
7078        buffer = tr->array_buffer.buffer;
7079        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7080                                            tracing_gen_ctx());
7081        if (unlikely(!event))
7082                /* Ring buffer disabled, return as if not open for write */
7083                return -EBADF;
7084
7085        entry = ring_buffer_event_data(event);
7086        entry->ip = _THIS_IP_;
7087
7088        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7089        if (len) {
7090                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7091                cnt = FAULTED_SIZE;
7092                written = -EFAULT;
7093        } else
7094                written = cnt;
7095
7096        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7097                /* do not add \n before testing triggers, but add \0 */
7098                entry->buf[cnt] = '\0';
7099                tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7100        }
7101
7102        if (entry->buf[cnt - 1] != '\n') {
7103                entry->buf[cnt] = '\n';
7104                entry->buf[cnt + 1] = '\0';
7105        } else
7106                entry->buf[cnt] = '\0';
7107
7108        if (static_branch_unlikely(&trace_marker_exports_enabled))
7109                ftrace_exports(event, TRACE_EXPORT_MARKER);
7110        __buffer_unlock_commit(buffer, event);
7111
7112        if (tt)
7113                event_triggers_post_call(tr->trace_marker_file, tt);
7114
7115        if (written > 0)
7116                *fpos += written;
7117
7118        return written;
7119}
7120
7121/* Limit it for now to 3K (including tag) */
7122#define RAW_DATA_MAX_SIZE (1024*3)
7123
7124static ssize_t
7125tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7126                                        size_t cnt, loff_t *fpos)
7127{
7128        struct trace_array *tr = filp->private_data;
7129        struct ring_buffer_event *event;
7130        struct trace_buffer *buffer;
7131        struct raw_data_entry *entry;
7132        ssize_t written;
7133        int size;
7134        int len;
7135
7136#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7137
7138        if (tracing_disabled)
7139                return -EINVAL;
7140
7141        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7142                return -EINVAL;
7143
7144        /* The marker must at least have a tag id */
7145        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7146                return -EINVAL;
7147
7148        if (cnt > TRACE_BUF_SIZE)
7149                cnt = TRACE_BUF_SIZE;
7150
7151        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7152
7153        size = sizeof(*entry) + cnt;
7154        if (cnt < FAULT_SIZE_ID)
7155                size += FAULT_SIZE_ID - cnt;
7156
7157        buffer = tr->array_buffer.buffer;
7158        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7159                                            tracing_gen_ctx());
7160        if (!event)
7161                /* Ring buffer disabled, return as if not open for write */
7162                return -EBADF;
7163
7164        entry = ring_buffer_event_data(event);
7165
7166        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7167        if (len) {
7168                entry->id = -1;
7169                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7170                written = -EFAULT;
7171        } else
7172                written = cnt;
7173
7174        __buffer_unlock_commit(buffer, event);
7175
7176        if (written > 0)
7177                *fpos += written;
7178
7179        return written;
7180}
7181
7182static int tracing_clock_show(struct seq_file *m, void *v)
7183{
7184        struct trace_array *tr = m->private;
7185        int i;
7186
7187        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7188                seq_printf(m,
7189                        "%s%s%s%s", i ? " " : "",
7190                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7191                        i == tr->clock_id ? "]" : "");
7192        seq_putc(m, '\n');
7193
7194        return 0;
7195}
7196
7197int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7198{
7199        int i;
7200
7201        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7202                if (strcmp(trace_clocks[i].name, clockstr) == 0)
7203                        break;
7204        }
7205        if (i == ARRAY_SIZE(trace_clocks))
7206                return -EINVAL;
7207
7208        mutex_lock(&trace_types_lock);
7209
7210        tr->clock_id = i;
7211
7212        ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7213
7214        /*
7215         * New clock may not be consistent with the previous clock.
7216         * Reset the buffer so that it doesn't have incomparable timestamps.
7217         */
7218        tracing_reset_online_cpus(&tr->array_buffer);
7219
7220#ifdef CONFIG_TRACER_MAX_TRACE
7221        if (tr->max_buffer.buffer)
7222                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7223        tracing_reset_online_cpus(&tr->max_buffer);
7224#endif
7225
7226        mutex_unlock(&trace_types_lock);
7227
7228        return 0;
7229}
7230
7231static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7232                                   size_t cnt, loff_t *fpos)
7233{
7234        struct seq_file *m = filp->private_data;
7235        struct trace_array *tr = m->private;
7236        char buf[64];
7237        const char *clockstr;
7238        int ret;
7239
7240        if (cnt >= sizeof(buf))
7241                return -EINVAL;
7242
7243        if (copy_from_user(buf, ubuf, cnt))
7244                return -EFAULT;
7245
7246        buf[cnt] = 0;
7247
7248        clockstr = strstrip(buf);
7249
7250        ret = tracing_set_clock(tr, clockstr);
7251        if (ret)
7252                return ret;
7253
7254        *fpos += cnt;
7255
7256        return cnt;
7257}
7258
7259static int tracing_clock_open(struct inode *inode, struct file *file)
7260{
7261        struct trace_array *tr = inode->i_private;
7262        int ret;
7263
7264        ret = tracing_check_open_get_tr(tr);
7265        if (ret)
7266                return ret;
7267
7268        ret = single_open(file, tracing_clock_show, inode->i_private);
7269        if (ret < 0)
7270                trace_array_put(tr);
7271
7272        return ret;
7273}
7274
7275static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7276{
7277        struct trace_array *tr = m->private;
7278
7279        mutex_lock(&trace_types_lock);
7280
7281        if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7282                seq_puts(m, "delta [absolute]\n");
7283        else
7284                seq_puts(m, "[delta] absolute\n");
7285
7286        mutex_unlock(&trace_types_lock);
7287
7288        return 0;
7289}
7290
7291static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7292{
7293        struct trace_array *tr = inode->i_private;
7294        int ret;
7295
7296        ret = tracing_check_open_get_tr(tr);
7297        if (ret)
7298                return ret;
7299
7300        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7301        if (ret < 0)
7302                trace_array_put(tr);
7303
7304        return ret;
7305}
7306
7307u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7308{
7309        if (rbe == this_cpu_read(trace_buffered_event))
7310                return ring_buffer_time_stamp(buffer);
7311
7312        return ring_buffer_event_time_stamp(buffer, rbe);
7313}
7314
7315/*
7316 * Set or disable using the per CPU trace_buffer_event when possible.
7317 */
7318int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7319{
7320        int ret = 0;
7321
7322        mutex_lock(&trace_types_lock);
7323
7324        if (set && tr->no_filter_buffering_ref++)
7325                goto out;
7326
7327        if (!set) {
7328                if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7329                        ret = -EINVAL;
7330                        goto out;
7331                }
7332
7333                --tr->no_filter_buffering_ref;
7334        }
7335 out:
7336        mutex_unlock(&trace_types_lock);
7337
7338        return ret;
7339}
7340
7341struct ftrace_buffer_info {
7342        struct trace_iterator   iter;
7343        void                    *spare;
7344        unsigned int            spare_cpu;
7345        unsigned int            read;
7346};
7347
7348#ifdef CONFIG_TRACER_SNAPSHOT
7349static int tracing_snapshot_open(struct inode *inode, struct file *file)
7350{
7351        struct trace_array *tr = inode->i_private;
7352        struct trace_iterator *iter;
7353        struct seq_file *m;
7354        int ret;
7355
7356        ret = tracing_check_open_get_tr(tr);
7357        if (ret)
7358                return ret;
7359
7360        if (file->f_mode & FMODE_READ) {
7361                iter = __tracing_open(inode, file, true);
7362                if (IS_ERR(iter))
7363                        ret = PTR_ERR(iter);
7364        } else {
7365                /* Writes still need the seq_file to hold the private data */
7366                ret = -ENOMEM;
7367                m = kzalloc(sizeof(*m), GFP_KERNEL);
7368                if (!m)
7369                        goto out;
7370                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7371                if (!iter) {
7372                        kfree(m);
7373                        goto out;
7374                }
7375                ret = 0;
7376
7377                iter->tr = tr;
7378                iter->array_buffer = &tr->max_buffer;
7379                iter->cpu_file = tracing_get_cpu(inode);
7380                m->private = iter;
7381                file->private_data = m;
7382        }
7383out:
7384        if (ret < 0)
7385                trace_array_put(tr);
7386
7387        return ret;
7388}
7389
7390static ssize_t
7391tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7392                       loff_t *ppos)
7393{
7394        struct seq_file *m = filp->private_data;
7395        struct trace_iterator *iter = m->private;
7396        struct trace_array *tr = iter->tr;
7397        unsigned long val;
7398        int ret;
7399
7400        ret = tracing_update_buffers();
7401        if (ret < 0)
7402                return ret;
7403
7404        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7405        if (ret)
7406                return ret;
7407
7408        mutex_lock(&trace_types_lock);
7409
7410        if (tr->current_trace->use_max_tr) {
7411                ret = -EBUSY;
7412                goto out;
7413        }
7414
7415        arch_spin_lock(&tr->max_lock);
7416        if (tr->cond_snapshot)
7417                ret = -EBUSY;
7418        arch_spin_unlock(&tr->max_lock);
7419        if (ret)
7420                goto out;
7421
7422        switch (val) {
7423        case 0:
7424                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7425                        ret = -EINVAL;
7426                        break;
7427                }
7428                if (tr->allocated_snapshot)
7429                        free_snapshot(tr);
7430                break;
7431        case 1:
7432/* Only allow per-cpu swap if the ring buffer supports it */
7433#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7434                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7435                        ret = -EINVAL;
7436                        break;
7437                }
7438#endif
7439                if (tr->allocated_snapshot)
7440                        ret = resize_buffer_duplicate_size(&tr->max_buffer,
7441                                        &tr->array_buffer, iter->cpu_file);
7442                else
7443                        ret = tracing_alloc_snapshot_instance(tr);
7444                if (ret < 0)
7445                        break;
7446                local_irq_disable();
7447                /* Now, we're going to swap */
7448                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7449                        update_max_tr(tr, current, smp_processor_id(), NULL);
7450                else
7451                        update_max_tr_single(tr, current, iter->cpu_file);
7452                local_irq_enable();
7453                break;
7454        default:
7455                if (tr->allocated_snapshot) {
7456                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7457                                tracing_reset_online_cpus(&tr->max_buffer);
7458                        else
7459                                tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7460                }
7461                break;
7462        }
7463
7464        if (ret >= 0) {
7465                *ppos += cnt;
7466                ret = cnt;
7467        }
7468out:
7469        mutex_unlock(&trace_types_lock);
7470        return ret;
7471}
7472
7473static int tracing_snapshot_release(struct inode *inode, struct file *file)
7474{
7475        struct seq_file *m = file->private_data;
7476        int ret;
7477
7478        ret = tracing_release(inode, file);
7479
7480        if (file->f_mode & FMODE_READ)
7481                return ret;
7482
7483        /* If write only, the seq_file is just a stub */
7484        if (m)
7485                kfree(m->private);
7486        kfree(m);
7487
7488        return 0;
7489}
7490
7491static int tracing_buffers_open(struct inode *inode, struct file *filp);
7492static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7493                                    size_t count, loff_t *ppos);
7494static int tracing_buffers_release(struct inode *inode, struct file *file);
7495static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7496                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7497
7498static int snapshot_raw_open(struct inode *inode, struct file *filp)
7499{
7500        struct ftrace_buffer_info *info;
7501        int ret;
7502
7503        /* The following checks for tracefs lockdown */
7504        ret = tracing_buffers_open(inode, filp);
7505        if (ret < 0)
7506                return ret;
7507
7508        info = filp->private_data;
7509
7510        if (info->iter.trace->use_max_tr) {
7511                tracing_buffers_release(inode, filp);
7512                return -EBUSY;
7513        }
7514
7515        info->iter.snapshot = true;
7516        info->iter.array_buffer = &info->iter.tr->max_buffer;
7517
7518        return ret;
7519}
7520
7521#endif /* CONFIG_TRACER_SNAPSHOT */
7522
7523
7524static const struct file_operations tracing_thresh_fops = {
7525        .open           = tracing_open_generic,
7526        .read           = tracing_thresh_read,
7527        .write          = tracing_thresh_write,
7528        .llseek         = generic_file_llseek,
7529};
7530
7531#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7532static const struct file_operations tracing_max_lat_fops = {
7533        .open           = tracing_open_generic,
7534        .read           = tracing_max_lat_read,
7535        .write          = tracing_max_lat_write,
7536        .llseek         = generic_file_llseek,
7537};
7538#endif
7539
7540static const struct file_operations set_tracer_fops = {
7541        .open           = tracing_open_generic,
7542        .read           = tracing_set_trace_read,
7543        .write          = tracing_set_trace_write,
7544        .llseek         = generic_file_llseek,
7545};
7546
7547static const struct file_operations tracing_pipe_fops = {
7548        .open           = tracing_open_pipe,
7549        .poll           = tracing_poll_pipe,
7550        .read           = tracing_read_pipe,
7551        .splice_read    = tracing_splice_read_pipe,
7552        .release        = tracing_release_pipe,
7553        .llseek         = no_llseek,
7554};
7555
7556static const struct file_operations tracing_entries_fops = {
7557        .open           = tracing_open_generic_tr,
7558        .read           = tracing_entries_read,
7559        .write          = tracing_entries_write,
7560        .llseek         = generic_file_llseek,
7561        .release        = tracing_release_generic_tr,
7562};
7563
7564static const struct file_operations tracing_total_entries_fops = {
7565        .open           = tracing_open_generic_tr,
7566        .read           = tracing_total_entries_read,
7567        .llseek         = generic_file_llseek,
7568        .release        = tracing_release_generic_tr,
7569};
7570
7571static const struct file_operations tracing_free_buffer_fops = {
7572        .open           = tracing_open_generic_tr,
7573        .write          = tracing_free_buffer_write,
7574        .release        = tracing_free_buffer_release,
7575};
7576
7577static const struct file_operations tracing_mark_fops = {
7578        .open           = tracing_open_generic_tr,
7579        .write          = tracing_mark_write,
7580        .llseek         = generic_file_llseek,
7581        .release        = tracing_release_generic_tr,
7582};
7583
7584static const struct file_operations tracing_mark_raw_fops = {
7585        .open           = tracing_open_generic_tr,
7586        .write          = tracing_mark_raw_write,
7587        .llseek         = generic_file_llseek,
7588        .release        = tracing_release_generic_tr,
7589};
7590
7591static const struct file_operations trace_clock_fops = {
7592        .open           = tracing_clock_open,
7593        .read           = seq_read,
7594        .llseek         = seq_lseek,
7595        .release        = tracing_single_release_tr,
7596        .write          = tracing_clock_write,
7597};
7598
7599static const struct file_operations trace_time_stamp_mode_fops = {
7600        .open           = tracing_time_stamp_mode_open,
7601        .read           = seq_read,
7602        .llseek         = seq_lseek,
7603        .release        = tracing_single_release_tr,
7604};
7605
7606#ifdef CONFIG_TRACER_SNAPSHOT
7607static const struct file_operations snapshot_fops = {
7608        .open           = tracing_snapshot_open,
7609        .read           = seq_read,
7610        .write          = tracing_snapshot_write,
7611        .llseek         = tracing_lseek,
7612        .release        = tracing_snapshot_release,
7613};
7614
7615static const struct file_operations snapshot_raw_fops = {
7616        .open           = snapshot_raw_open,
7617        .read           = tracing_buffers_read,
7618        .release        = tracing_buffers_release,
7619        .splice_read    = tracing_buffers_splice_read,
7620        .llseek         = no_llseek,
7621};
7622
7623#endif /* CONFIG_TRACER_SNAPSHOT */
7624
7625/*
7626 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7627 * @filp: The active open file structure
7628 * @ubuf: The userspace provided buffer to read value into
7629 * @cnt: The maximum number of bytes to read
7630 * @ppos: The current "file" position
7631 *
7632 * This function implements the write interface for a struct trace_min_max_param.
7633 * The filp->private_data must point to a trace_min_max_param structure that
7634 * defines where to write the value, the min and the max acceptable values,
7635 * and a lock to protect the write.
7636 */
7637static ssize_t
7638trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7639{
7640        struct trace_min_max_param *param = filp->private_data;
7641        u64 val;
7642        int err;
7643
7644        if (!param)
7645                return -EFAULT;
7646
7647        err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7648        if (err)
7649                return err;
7650
7651        if (param->lock)
7652                mutex_lock(param->lock);
7653
7654        if (param->min && val < *param->min)
7655                err = -EINVAL;
7656
7657        if (param->max && val > *param->max)
7658                err = -EINVAL;
7659
7660        if (!err)
7661                *param->val = val;
7662
7663        if (param->lock)
7664                mutex_unlock(param->lock);
7665
7666        if (err)
7667                return err;
7668
7669        return cnt;
7670}
7671
7672/*
7673 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7674 * @filp: The active open file structure
7675 * @ubuf: The userspace provided buffer to read value into
7676 * @cnt: The maximum number of bytes to read
7677 * @ppos: The current "file" position
7678 *
7679 * This function implements the read interface for a struct trace_min_max_param.
7680 * The filp->private_data must point to a trace_min_max_param struct with valid
7681 * data.
7682 */
7683static ssize_t
7684trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7685{
7686        struct trace_min_max_param *param = filp->private_data;
7687        char buf[U64_STR_SIZE];
7688        int len;
7689        u64 val;
7690
7691        if (!param)
7692                return -EFAULT;
7693
7694        val = *param->val;
7695
7696        if (cnt > sizeof(buf))
7697                cnt = sizeof(buf);
7698
7699        len = snprintf(buf, sizeof(buf), "%llu\n", val);
7700
7701        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7702}
7703
7704const struct file_operations trace_min_max_fops = {
7705        .open           = tracing_open_generic,
7706        .read           = trace_min_max_read,
7707        .write          = trace_min_max_write,
7708};
7709
7710#define TRACING_LOG_ERRS_MAX    8
7711#define TRACING_LOG_LOC_MAX     128
7712
7713#define CMD_PREFIX "  Command: "
7714
7715struct err_info {
7716        const char      **errs; /* ptr to loc-specific array of err strings */
7717        u8              type;   /* index into errs -> specific err string */
7718        u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7719        u64             ts;
7720};
7721
7722struct tracing_log_err {
7723        struct list_head        list;
7724        struct err_info         info;
7725        char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7726        char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7727};
7728
7729static DEFINE_MUTEX(tracing_err_log_lock);
7730
7731static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7732{
7733        struct tracing_log_err *err;
7734
7735        if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7736                err = kzalloc(sizeof(*err), GFP_KERNEL);
7737                if (!err)
7738                        err = ERR_PTR(-ENOMEM);
7739                tr->n_err_log_entries++;
7740
7741                return err;
7742        }
7743
7744        err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7745        list_del(&err->list);
7746
7747        return err;
7748}
7749
7750/**
7751 * err_pos - find the position of a string within a command for error careting
7752 * @cmd: The tracing command that caused the error
7753 * @str: The string to position the caret at within @cmd
7754 *
7755 * Finds the position of the first occurrence of @str within @cmd.  The
7756 * return value can be passed to tracing_log_err() for caret placement
7757 * within @cmd.
7758 *
7759 * Returns the index within @cmd of the first occurrence of @str or 0
7760 * if @str was not found.
7761 */
7762unsigned int err_pos(char *cmd, const char *str)
7763{
7764        char *found;
7765
7766        if (WARN_ON(!strlen(cmd)))
7767                return 0;
7768
7769        found = strstr(cmd, str);
7770        if (found)
7771                return found - cmd;
7772
7773        return 0;
7774}
7775
7776/**
7777 * tracing_log_err - write an error to the tracing error log
7778 * @tr: The associated trace array for the error (NULL for top level array)
7779 * @loc: A string describing where the error occurred
7780 * @cmd: The tracing command that caused the error
7781 * @errs: The array of loc-specific static error strings
7782 * @type: The index into errs[], which produces the specific static err string
7783 * @pos: The position the caret should be placed in the cmd
7784 *
7785 * Writes an error into tracing/error_log of the form:
7786 *
7787 * <loc>: error: <text>
7788 *   Command: <cmd>
7789 *              ^
7790 *
7791 * tracing/error_log is a small log file containing the last
7792 * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7793 * unless there has been a tracing error, and the error log can be
7794 * cleared and have its memory freed by writing the empty string in
7795 * truncation mode to it i.e. echo > tracing/error_log.
7796 *
7797 * NOTE: the @errs array along with the @type param are used to
7798 * produce a static error string - this string is not copied and saved
7799 * when the error is logged - only a pointer to it is saved.  See
7800 * existing callers for examples of how static strings are typically
7801 * defined for use with tracing_log_err().
7802 */
7803void tracing_log_err(struct trace_array *tr,
7804                     const char *loc, const char *cmd,
7805                     const char **errs, u8 type, u8 pos)
7806{
7807        struct tracing_log_err *err;
7808
7809        if (!tr)
7810                tr = &global_trace;
7811
7812        mutex_lock(&tracing_err_log_lock);
7813        err = get_tracing_log_err(tr);
7814        if (PTR_ERR(err) == -ENOMEM) {
7815                mutex_unlock(&tracing_err_log_lock);
7816                return;
7817        }
7818
7819        snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7820        snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7821
7822        err->info.errs = errs;
7823        err->info.type = type;
7824        err->info.pos = pos;
7825        err->info.ts = local_clock();
7826
7827        list_add_tail(&err->list, &tr->err_log);
7828        mutex_unlock(&tracing_err_log_lock);
7829}
7830
7831static void clear_tracing_err_log(struct trace_array *tr)
7832{
7833        struct tracing_log_err *err, *next;
7834
7835        mutex_lock(&tracing_err_log_lock);
7836        list_for_each_entry_safe(err, next, &tr->err_log, list) {
7837                list_del(&err->list);
7838                kfree(err);
7839        }
7840
7841        tr->n_err_log_entries = 0;
7842        mutex_unlock(&tracing_err_log_lock);
7843}
7844
7845static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7846{
7847        struct trace_array *tr = m->private;
7848
7849        mutex_lock(&tracing_err_log_lock);
7850
7851        return seq_list_start(&tr->err_log, *pos);
7852}
7853
7854static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7855{
7856        struct trace_array *tr = m->private;
7857
7858        return seq_list_next(v, &tr->err_log, pos);
7859}
7860
7861static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7862{
7863        mutex_unlock(&tracing_err_log_lock);
7864}
7865
7866static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7867{
7868        u8 i;
7869
7870        for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7871                seq_putc(m, ' ');
7872        for (i = 0; i < pos; i++)
7873                seq_putc(m, ' ');
7874        seq_puts(m, "^\n");
7875}
7876
7877static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7878{
7879        struct tracing_log_err *err = v;
7880
7881        if (err) {
7882                const char *err_text = err->info.errs[err->info.type];
7883                u64 sec = err->info.ts;
7884                u32 nsec;
7885
7886                nsec = do_div(sec, NSEC_PER_SEC);
7887                seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7888                           err->loc, err_text);
7889                seq_printf(m, "%s", err->cmd);
7890                tracing_err_log_show_pos(m, err->info.pos);
7891        }
7892
7893        return 0;
7894}
7895
7896static const struct seq_operations tracing_err_log_seq_ops = {
7897        .start  = tracing_err_log_seq_start,
7898        .next   = tracing_err_log_seq_next,
7899        .stop   = tracing_err_log_seq_stop,
7900        .show   = tracing_err_log_seq_show
7901};
7902
7903static int tracing_err_log_open(struct inode *inode, struct file *file)
7904{
7905        struct trace_array *tr = inode->i_private;
7906        int ret = 0;
7907
7908        ret = tracing_check_open_get_tr(tr);
7909        if (ret)
7910                return ret;
7911
7912        /* If this file was opened for write, then erase contents */
7913        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7914                clear_tracing_err_log(tr);
7915
7916        if (file->f_mode & FMODE_READ) {
7917                ret = seq_open(file, &tracing_err_log_seq_ops);
7918                if (!ret) {
7919                        struct seq_file *m = file->private_data;
7920                        m->private = tr;
7921                } else {
7922                        trace_array_put(tr);
7923                }
7924        }
7925        return ret;
7926}
7927
7928static ssize_t tracing_err_log_write(struct file *file,
7929                                     const char __user *buffer,
7930                                     size_t count, loff_t *ppos)
7931{
7932        return count;
7933}
7934
7935static int tracing_err_log_release(struct inode *inode, struct file *file)
7936{
7937        struct trace_array *tr = inode->i_private;
7938
7939        trace_array_put(tr);
7940
7941        if (file->f_mode & FMODE_READ)
7942                seq_release(inode, file);
7943
7944        return 0;
7945}
7946
7947static const struct file_operations tracing_err_log_fops = {
7948        .open           = tracing_err_log_open,
7949        .write          = tracing_err_log_write,
7950        .read           = seq_read,
7951        .llseek         = seq_lseek,
7952        .release        = tracing_err_log_release,
7953};
7954
7955static int tracing_buffers_open(struct inode *inode, struct file *filp)
7956{
7957        struct trace_array *tr = inode->i_private;
7958        struct ftrace_buffer_info *info;
7959        int ret;
7960
7961        ret = tracing_check_open_get_tr(tr);
7962        if (ret)
7963                return ret;
7964
7965        info = kvzalloc(sizeof(*info), GFP_KERNEL);
7966        if (!info) {
7967                trace_array_put(tr);
7968                return -ENOMEM;
7969        }
7970
7971        mutex_lock(&trace_types_lock);
7972
7973        info->iter.tr           = tr;
7974        info->iter.cpu_file     = tracing_get_cpu(inode);
7975        info->iter.trace        = tr->current_trace;
7976        info->iter.array_buffer = &tr->array_buffer;
7977        info->spare             = NULL;
7978        /* Force reading ring buffer for first read */
7979        info->read              = (unsigned int)-1;
7980
7981        filp->private_data = info;
7982
7983        tr->trace_ref++;
7984
7985        mutex_unlock(&trace_types_lock);
7986
7987        ret = nonseekable_open(inode, filp);
7988        if (ret < 0)
7989                trace_array_put(tr);
7990
7991        return ret;
7992}
7993
7994static __poll_t
7995tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7996{
7997        struct ftrace_buffer_info *info = filp->private_data;
7998        struct trace_iterator *iter = &info->iter;
7999
8000        return trace_poll(iter, filp, poll_table);
8001}
8002
8003static ssize_t
8004tracing_buffers_read(struct file *filp, char __user *ubuf,
8005                     size_t count, loff_t *ppos)
8006{
8007        struct ftrace_buffer_info *info = filp->private_data;
8008        struct trace_iterator *iter = &info->iter;
8009        ssize_t ret = 0;
8010        ssize_t size;
8011
8012        if (!count)
8013                return 0;
8014
8015#ifdef CONFIG_TRACER_MAX_TRACE
8016        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8017                return -EBUSY;
8018#endif
8019
8020        if (!info->spare) {
8021                info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8022                                                          iter->cpu_file);
8023                if (IS_ERR(info->spare)) {
8024                        ret = PTR_ERR(info->spare);
8025                        info->spare = NULL;
8026                } else {
8027                        info->spare_cpu = iter->cpu_file;
8028                }
8029        }
8030        if (!info->spare)
8031                return ret;
8032
8033        /* Do we have previous read data to read? */
8034        if (info->read < PAGE_SIZE)
8035                goto read;
8036
8037 again:
8038        trace_access_lock(iter->cpu_file);
8039        ret = ring_buffer_read_page(iter->array_buffer->buffer,
8040                                    &info->spare,
8041                                    count,
8042                                    iter->cpu_file, 0);
8043        trace_access_unlock(iter->cpu_file);
8044
8045        if (ret < 0) {
8046                if (trace_empty(iter)) {
8047                        if ((filp->f_flags & O_NONBLOCK))
8048                                return -EAGAIN;
8049
8050                        ret = wait_on_pipe(iter, 0);
8051                        if (ret)
8052                                return ret;
8053
8054                        goto again;
8055                }
8056                return 0;
8057        }
8058
8059        info->read = 0;
8060 read:
8061        size = PAGE_SIZE - info->read;
8062        if (size > count)
8063                size = count;
8064
8065        ret = copy_to_user(ubuf, info->spare + info->read, size);
8066        if (ret == size)
8067                return -EFAULT;
8068
8069        size -= ret;
8070
8071        *ppos += size;
8072        info->read += size;
8073
8074        return size;
8075}
8076
8077static int tracing_buffers_release(struct inode *inode, struct file *file)
8078{
8079        struct ftrace_buffer_info *info = file->private_data;
8080        struct trace_iterator *iter = &info->iter;
8081
8082        mutex_lock(&trace_types_lock);
8083
8084        iter->tr->trace_ref--;
8085
8086        __trace_array_put(iter->tr);
8087
8088        if (info->spare)
8089                ring_buffer_free_read_page(iter->array_buffer->buffer,
8090                                           info->spare_cpu, info->spare);
8091        kvfree(info);
8092
8093        mutex_unlock(&trace_types_lock);
8094
8095        return 0;
8096}
8097
8098struct buffer_ref {
8099        struct trace_buffer     *buffer;
8100        void                    *page;
8101        int                     cpu;
8102        refcount_t              refcount;
8103};
8104
8105static void buffer_ref_release(struct buffer_ref *ref)
8106{
8107        if (!refcount_dec_and_test(&ref->refcount))
8108                return;
8109        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8110        kfree(ref);
8111}
8112
8113static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8114                                    struct pipe_buffer *buf)
8115{
8116        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8117
8118        buffer_ref_release(ref);
8119        buf->private = 0;
8120}
8121
8122static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8123                                struct pipe_buffer *buf)
8124{
8125        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8126
8127        if (refcount_read(&ref->refcount) > INT_MAX/2)
8128                return false;
8129
8130        refcount_inc(&ref->refcount);
8131        return true;
8132}
8133
8134/* Pipe buffer operations for a buffer. */
8135static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8136        .release                = buffer_pipe_buf_release,
8137        .get                    = buffer_pipe_buf_get,
8138};
8139
8140/*
8141 * Callback from splice_to_pipe(), if we need to release some pages
8142 * at the end of the spd in case we error'ed out in filling the pipe.
8143 */
8144static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8145{
8146        struct buffer_ref *ref =
8147                (struct buffer_ref *)spd->partial[i].private;
8148
8149        buffer_ref_release(ref);
8150        spd->partial[i].private = 0;
8151}
8152
8153static ssize_t
8154tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8155                            struct pipe_inode_info *pipe, size_t len,
8156                            unsigned int flags)
8157{
8158        struct ftrace_buffer_info *info = file->private_data;
8159        struct trace_iterator *iter = &info->iter;
8160        struct partial_page partial_def[PIPE_DEF_BUFFERS];
8161        struct page *pages_def[PIPE_DEF_BUFFERS];
8162        struct splice_pipe_desc spd = {
8163                .pages          = pages_def,
8164                .partial        = partial_def,
8165                .nr_pages_max   = PIPE_DEF_BUFFERS,
8166                .ops            = &buffer_pipe_buf_ops,
8167                .spd_release    = buffer_spd_release,
8168        };
8169        struct buffer_ref *ref;
8170        int entries, i;
8171        ssize_t ret = 0;
8172
8173#ifdef CONFIG_TRACER_MAX_TRACE
8174        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8175                return -EBUSY;
8176#endif
8177
8178        if (*ppos & (PAGE_SIZE - 1))
8179                return -EINVAL;
8180
8181        if (len & (PAGE_SIZE - 1)) {
8182                if (len < PAGE_SIZE)
8183                        return -EINVAL;
8184                len &= PAGE_MASK;
8185        }
8186
8187        if (splice_grow_spd(pipe, &spd))
8188                return -ENOMEM;
8189
8190 again:
8191        trace_access_lock(iter->cpu_file);
8192        entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8193
8194        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8195                struct page *page;
8196                int r;
8197
8198                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8199                if (!ref) {
8200                        ret = -ENOMEM;
8201                        break;
8202                }
8203
8204                refcount_set(&ref->refcount, 1);
8205                ref->buffer = iter->array_buffer->buffer;
8206                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8207                if (IS_ERR(ref->page)) {
8208                        ret = PTR_ERR(ref->page);
8209                        ref->page = NULL;
8210                        kfree(ref);
8211                        break;
8212                }
8213                ref->cpu = iter->cpu_file;
8214
8215                r = ring_buffer_read_page(ref->buffer, &ref->page,
8216                                          len, iter->cpu_file, 1);
8217                if (r < 0) {
8218                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
8219                                                   ref->page);
8220                        kfree(ref);
8221                        break;
8222                }
8223
8224                page = virt_to_page(ref->page);
8225
8226                spd.pages[i] = page;
8227                spd.partial[i].len = PAGE_SIZE;
8228                spd.partial[i].offset = 0;
8229                spd.partial[i].private = (unsigned long)ref;
8230                spd.nr_pages++;
8231                *ppos += PAGE_SIZE;
8232
8233                entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8234        }
8235
8236        trace_access_unlock(iter->cpu_file);
8237        spd.nr_pages = i;
8238
8239        /* did we read anything? */
8240        if (!spd.nr_pages) {
8241                if (ret)
8242                        goto out;
8243
8244                ret = -EAGAIN;
8245                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8246                        goto out;
8247
8248                ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8249                if (ret)
8250                        goto out;
8251
8252                goto again;
8253        }
8254
8255        ret = splice_to_pipe(pipe, &spd);
8256out:
8257        splice_shrink_spd(&spd);
8258
8259        return ret;
8260}
8261
8262static const struct file_operations tracing_buffers_fops = {
8263        .open           = tracing_buffers_open,
8264        .read           = tracing_buffers_read,
8265        .poll           = tracing_buffers_poll,
8266        .release        = tracing_buffers_release,
8267        .splice_read    = tracing_buffers_splice_read,
8268        .llseek         = no_llseek,
8269};
8270
8271static ssize_t
8272tracing_stats_read(struct file *filp, char __user *ubuf,
8273                   size_t count, loff_t *ppos)
8274{
8275        struct inode *inode = file_inode(filp);
8276        struct trace_array *tr = inode->i_private;
8277        struct array_buffer *trace_buf = &tr->array_buffer;
8278        int cpu = tracing_get_cpu(inode);
8279        struct trace_seq *s;
8280        unsigned long cnt;
8281        unsigned long long t;
8282        unsigned long usec_rem;
8283
8284        s = kmalloc(sizeof(*s), GFP_KERNEL);
8285        if (!s)
8286                return -ENOMEM;
8287
8288        trace_seq_init(s);
8289
8290        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8291        trace_seq_printf(s, "entries: %ld\n", cnt);
8292
8293        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8294        trace_seq_printf(s, "overrun: %ld\n", cnt);
8295
8296        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8297        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8298
8299        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8300        trace_seq_printf(s, "bytes: %ld\n", cnt);
8301
8302        if (trace_clocks[tr->clock_id].in_ns) {
8303                /* local or global for trace_clock */
8304                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305                usec_rem = do_div(t, USEC_PER_SEC);
8306                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8307                                                                t, usec_rem);
8308
8309                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8310                usec_rem = do_div(t, USEC_PER_SEC);
8311                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8312        } else {
8313                /* counter or tsc mode for trace_clock */
8314                trace_seq_printf(s, "oldest event ts: %llu\n",
8315                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8316
8317                trace_seq_printf(s, "now ts: %llu\n",
8318                                ring_buffer_time_stamp(trace_buf->buffer));
8319        }
8320
8321        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8322        trace_seq_printf(s, "dropped events: %ld\n", cnt);
8323
8324        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8325        trace_seq_printf(s, "read events: %ld\n", cnt);
8326
8327        count = simple_read_from_buffer(ubuf, count, ppos,
8328                                        s->buffer, trace_seq_used(s));
8329
8330        kfree(s);
8331
8332        return count;
8333}
8334
8335static const struct file_operations tracing_stats_fops = {
8336        .open           = tracing_open_generic_tr,
8337        .read           = tracing_stats_read,
8338        .llseek         = generic_file_llseek,
8339        .release        = tracing_release_generic_tr,
8340};
8341
8342#ifdef CONFIG_DYNAMIC_FTRACE
8343
8344static ssize_t
8345tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8346                  size_t cnt, loff_t *ppos)
8347{
8348        ssize_t ret;
8349        char *buf;
8350        int r;
8351
8352        /* 256 should be plenty to hold the amount needed */
8353        buf = kmalloc(256, GFP_KERNEL);
8354        if (!buf)
8355                return -ENOMEM;
8356
8357        r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8358                      ftrace_update_tot_cnt,
8359                      ftrace_number_of_pages,
8360                      ftrace_number_of_groups);
8361
8362        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8363        kfree(buf);
8364        return ret;
8365}
8366
8367static const struct file_operations tracing_dyn_info_fops = {
8368        .open           = tracing_open_generic,
8369        .read           = tracing_read_dyn_info,
8370        .llseek         = generic_file_llseek,
8371};
8372#endif /* CONFIG_DYNAMIC_FTRACE */
8373
8374#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8375static void
8376ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8377                struct trace_array *tr, struct ftrace_probe_ops *ops,
8378                void *data)
8379{
8380        tracing_snapshot_instance(tr);
8381}
8382
8383static void
8384ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8385                      struct trace_array *tr, struct ftrace_probe_ops *ops,
8386                      void *data)
8387{
8388        struct ftrace_func_mapper *mapper = data;
8389        long *count = NULL;
8390
8391        if (mapper)
8392                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8393
8394        if (count) {
8395
8396                if (*count <= 0)
8397                        return;
8398
8399                (*count)--;
8400        }
8401
8402        tracing_snapshot_instance(tr);
8403}
8404
8405static int
8406ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8407                      struct ftrace_probe_ops *ops, void *data)
8408{
8409        struct ftrace_func_mapper *mapper = data;
8410        long *count = NULL;
8411
8412        seq_printf(m, "%ps:", (void *)ip);
8413
8414        seq_puts(m, "snapshot");
8415
8416        if (mapper)
8417                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8418
8419        if (count)
8420                seq_printf(m, ":count=%ld\n", *count);
8421        else
8422                seq_puts(m, ":unlimited\n");
8423
8424        return 0;
8425}
8426
8427static int
8428ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8429                     unsigned long ip, void *init_data, void **data)
8430{
8431        struct ftrace_func_mapper *mapper = *data;
8432
8433        if (!mapper) {
8434                mapper = allocate_ftrace_func_mapper();
8435                if (!mapper)
8436                        return -ENOMEM;
8437                *data = mapper;
8438        }
8439
8440        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8441}
8442
8443static void
8444ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8445                     unsigned long ip, void *data)
8446{
8447        struct ftrace_func_mapper *mapper = data;
8448
8449        if (!ip) {
8450                if (!mapper)
8451                        return;
8452                free_ftrace_func_mapper(mapper, NULL);
8453                return;
8454        }
8455
8456        ftrace_func_mapper_remove_ip(mapper, ip);
8457}
8458
8459static struct ftrace_probe_ops snapshot_probe_ops = {
8460        .func                   = ftrace_snapshot,
8461        .print                  = ftrace_snapshot_print,
8462};
8463
8464static struct ftrace_probe_ops snapshot_count_probe_ops = {
8465        .func                   = ftrace_count_snapshot,
8466        .print                  = ftrace_snapshot_print,
8467        .init                   = ftrace_snapshot_init,
8468        .free                   = ftrace_snapshot_free,
8469};
8470
8471static int
8472ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8473                               char *glob, char *cmd, char *param, int enable)
8474{
8475        struct ftrace_probe_ops *ops;
8476        void *count = (void *)-1;
8477        char *number;
8478        int ret;
8479
8480        if (!tr)
8481                return -ENODEV;
8482
8483        /* hash funcs only work with set_ftrace_filter */
8484        if (!enable)
8485                return -EINVAL;
8486
8487        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8488
8489        if (glob[0] == '!')
8490                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8491
8492        if (!param)
8493                goto out_reg;
8494
8495        number = strsep(&param, ":");
8496
8497        if (!strlen(number))
8498                goto out_reg;
8499
8500        /*
8501         * We use the callback data field (which is a pointer)
8502         * as our counter.
8503         */
8504        ret = kstrtoul(number, 0, (unsigned long *)&count);
8505        if (ret)
8506                return ret;
8507
8508 out_reg:
8509        ret = tracing_alloc_snapshot_instance(tr);
8510        if (ret < 0)
8511                goto out;
8512
8513        ret = register_ftrace_function_probe(glob, tr, ops, count);
8514
8515 out:
8516        return ret < 0 ? ret : 0;
8517}
8518
8519static struct ftrace_func_command ftrace_snapshot_cmd = {
8520        .name                   = "snapshot",
8521        .func                   = ftrace_trace_snapshot_callback,
8522};
8523
8524static __init int register_snapshot_cmd(void)
8525{
8526        return register_ftrace_command(&ftrace_snapshot_cmd);
8527}
8528#else
8529static inline __init int register_snapshot_cmd(void) { return 0; }
8530#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8531
8532static struct dentry *tracing_get_dentry(struct trace_array *tr)
8533{
8534        if (WARN_ON(!tr->dir))
8535                return ERR_PTR(-ENODEV);
8536
8537        /* Top directory uses NULL as the parent */
8538        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8539                return NULL;
8540
8541        /* All sub buffers have a descriptor */
8542        return tr->dir;
8543}
8544
8545static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8546{
8547        struct dentry *d_tracer;
8548
8549        if (tr->percpu_dir)
8550                return tr->percpu_dir;
8551
8552        d_tracer = tracing_get_dentry(tr);
8553        if (IS_ERR(d_tracer))
8554                return NULL;
8555
8556        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8557
8558        MEM_FAIL(!tr->percpu_dir,
8559                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8560
8561        return tr->percpu_dir;
8562}
8563
8564static struct dentry *
8565trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8566                      void *data, long cpu, const struct file_operations *fops)
8567{
8568        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8569
8570        if (ret) /* See tracing_get_cpu() */
8571                d_inode(ret)->i_cdev = (void *)(cpu + 1);
8572        return ret;
8573}
8574
8575static void
8576tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8577{
8578        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8579        struct dentry *d_cpu;
8580        char cpu_dir[30]; /* 30 characters should be more than enough */
8581
8582        if (!d_percpu)
8583                return;
8584
8585        snprintf(cpu_dir, 30, "cpu%ld", cpu);
8586        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8587        if (!d_cpu) {
8588                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8589                return;
8590        }
8591
8592        /* per cpu trace_pipe */
8593        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8594                                tr, cpu, &tracing_pipe_fops);
8595
8596        /* per cpu trace */
8597        trace_create_cpu_file("trace", 0644, d_cpu,
8598                                tr, cpu, &tracing_fops);
8599
8600        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8601                                tr, cpu, &tracing_buffers_fops);
8602
8603        trace_create_cpu_file("stats", 0444, d_cpu,
8604                                tr, cpu, &tracing_stats_fops);
8605
8606        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8607                                tr, cpu, &tracing_entries_fops);
8608
8609#ifdef CONFIG_TRACER_SNAPSHOT
8610        trace_create_cpu_file("snapshot", 0644, d_cpu,
8611                                tr, cpu, &snapshot_fops);
8612
8613        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8614                                tr, cpu, &snapshot_raw_fops);
8615#endif
8616}
8617
8618#ifdef CONFIG_FTRACE_SELFTEST
8619/* Let selftest have access to static functions in this file */
8620#include "trace_selftest.c"
8621#endif
8622
8623static ssize_t
8624trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8625                        loff_t *ppos)
8626{
8627        struct trace_option_dentry *topt = filp->private_data;
8628        char *buf;
8629
8630        if (topt->flags->val & topt->opt->bit)
8631                buf = "1\n";
8632        else
8633                buf = "0\n";
8634
8635        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8636}
8637
8638static ssize_t
8639trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8640                         loff_t *ppos)
8641{
8642        struct trace_option_dentry *topt = filp->private_data;
8643        unsigned long val;
8644        int ret;
8645
8646        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8647        if (ret)
8648                return ret;
8649
8650        if (val != 0 && val != 1)
8651                return -EINVAL;
8652
8653        if (!!(topt->flags->val & topt->opt->bit) != val) {
8654                mutex_lock(&trace_types_lock);
8655                ret = __set_tracer_option(topt->tr, topt->flags,
8656                                          topt->opt, !val);
8657                mutex_unlock(&trace_types_lock);
8658                if (ret)
8659                        return ret;
8660        }
8661
8662        *ppos += cnt;
8663
8664        return cnt;
8665}
8666
8667
8668static const struct file_operations trace_options_fops = {
8669        .open = tracing_open_generic,
8670        .read = trace_options_read,
8671        .write = trace_options_write,
8672        .llseek = generic_file_llseek,
8673};
8674
8675/*
8676 * In order to pass in both the trace_array descriptor as well as the index
8677 * to the flag that the trace option file represents, the trace_array
8678 * has a character array of trace_flags_index[], which holds the index
8679 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8680 * The address of this character array is passed to the flag option file
8681 * read/write callbacks.
8682 *
8683 * In order to extract both the index and the trace_array descriptor,
8684 * get_tr_index() uses the following algorithm.
8685 *
8686 *   idx = *ptr;
8687 *
8688 * As the pointer itself contains the address of the index (remember
8689 * index[1] == 1).
8690 *
8691 * Then to get the trace_array descriptor, by subtracting that index
8692 * from the ptr, we get to the start of the index itself.
8693 *
8694 *   ptr - idx == &index[0]
8695 *
8696 * Then a simple container_of() from that pointer gets us to the
8697 * trace_array descriptor.
8698 */
8699static void get_tr_index(void *data, struct trace_array **ptr,
8700                         unsigned int *pindex)
8701{
8702        *pindex = *(unsigned char *)data;
8703
8704        *ptr = container_of(data - *pindex, struct trace_array,
8705                            trace_flags_index);
8706}
8707
8708static ssize_t
8709trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8710                        loff_t *ppos)
8711{
8712        void *tr_index = filp->private_data;
8713        struct trace_array *tr;
8714        unsigned int index;
8715        char *buf;
8716
8717        get_tr_index(tr_index, &tr, &index);
8718
8719        if (tr->trace_flags & (1 << index))
8720                buf = "1\n";
8721        else
8722                buf = "0\n";
8723
8724        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8725}
8726
8727static ssize_t
8728trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8729                         loff_t *ppos)
8730{
8731        void *tr_index = filp->private_data;
8732        struct trace_array *tr;
8733        unsigned int index;
8734        unsigned long val;
8735        int ret;
8736
8737        get_tr_index(tr_index, &tr, &index);
8738
8739        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8740        if (ret)
8741                return ret;
8742
8743        if (val != 0 && val != 1)
8744                return -EINVAL;
8745
8746        mutex_lock(&event_mutex);
8747        mutex_lock(&trace_types_lock);
8748        ret = set_tracer_flag(tr, 1 << index, val);
8749        mutex_unlock(&trace_types_lock);
8750        mutex_unlock(&event_mutex);
8751
8752        if (ret < 0)
8753                return ret;
8754
8755        *ppos += cnt;
8756
8757        return cnt;
8758}
8759
8760static const struct file_operations trace_options_core_fops = {
8761        .open = tracing_open_generic,
8762        .read = trace_options_core_read,
8763        .write = trace_options_core_write,
8764        .llseek = generic_file_llseek,
8765};
8766
8767struct dentry *trace_create_file(const char *name,
8768                                 umode_t mode,
8769                                 struct dentry *parent,
8770                                 void *data,
8771                                 const struct file_operations *fops)
8772{
8773        struct dentry *ret;
8774
8775        ret = tracefs_create_file(name, mode, parent, data, fops);
8776        if (!ret)
8777                pr_warn("Could not create tracefs '%s' entry\n", name);
8778
8779        return ret;
8780}
8781
8782
8783static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8784{
8785        struct dentry *d_tracer;
8786
8787        if (tr->options)
8788                return tr->options;
8789
8790        d_tracer = tracing_get_dentry(tr);
8791        if (IS_ERR(d_tracer))
8792                return NULL;
8793
8794        tr->options = tracefs_create_dir("options", d_tracer);
8795        if (!tr->options) {
8796                pr_warn("Could not create tracefs directory 'options'\n");
8797                return NULL;
8798        }
8799
8800        return tr->options;
8801}
8802
8803static void
8804create_trace_option_file(struct trace_array *tr,
8805                         struct trace_option_dentry *topt,
8806                         struct tracer_flags *flags,
8807                         struct tracer_opt *opt)
8808{
8809        struct dentry *t_options;
8810
8811        t_options = trace_options_init_dentry(tr);
8812        if (!t_options)
8813                return;
8814
8815        topt->flags = flags;
8816        topt->opt = opt;
8817        topt->tr = tr;
8818
8819        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8820                                    &trace_options_fops);
8821
8822}
8823
8824static void
8825create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8826{
8827        struct trace_option_dentry *topts;
8828        struct trace_options *tr_topts;
8829        struct tracer_flags *flags;
8830        struct tracer_opt *opts;
8831        int cnt;
8832        int i;
8833
8834        if (!tracer)
8835                return;
8836
8837        flags = tracer->flags;
8838
8839        if (!flags || !flags->opts)
8840                return;
8841
8842        /*
8843         * If this is an instance, only create flags for tracers
8844         * the instance may have.
8845         */
8846        if (!trace_ok_for_array(tracer, tr))
8847                return;
8848
8849        for (i = 0; i < tr->nr_topts; i++) {
8850                /* Make sure there's no duplicate flags. */
8851                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8852                        return;
8853        }
8854
8855        opts = flags->opts;
8856
8857        for (cnt = 0; opts[cnt].name; cnt++)
8858                ;
8859
8860        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8861        if (!topts)
8862                return;
8863
8864        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8865                            GFP_KERNEL);
8866        if (!tr_topts) {
8867                kfree(topts);
8868                return;
8869        }
8870
8871        tr->topts = tr_topts;
8872        tr->topts[tr->nr_topts].tracer = tracer;
8873        tr->topts[tr->nr_topts].topts = topts;
8874        tr->nr_topts++;
8875
8876        for (cnt = 0; opts[cnt].name; cnt++) {
8877                create_trace_option_file(tr, &topts[cnt], flags,
8878                                         &opts[cnt]);
8879                MEM_FAIL(topts[cnt].entry == NULL,
8880                          "Failed to create trace option: %s",
8881                          opts[cnt].name);
8882        }
8883}
8884
8885static struct dentry *
8886create_trace_option_core_file(struct trace_array *tr,
8887                              const char *option, long index)
8888{
8889        struct dentry *t_options;
8890
8891        t_options = trace_options_init_dentry(tr);
8892        if (!t_options)
8893                return NULL;
8894
8895        return trace_create_file(option, 0644, t_options,
8896                                 (void *)&tr->trace_flags_index[index],
8897                                 &trace_options_core_fops);
8898}
8899
8900static void create_trace_options_dir(struct trace_array *tr)
8901{
8902        struct dentry *t_options;
8903        bool top_level = tr == &global_trace;
8904        int i;
8905
8906        t_options = trace_options_init_dentry(tr);
8907        if (!t_options)
8908                return;
8909
8910        for (i = 0; trace_options[i]; i++) {
8911                if (top_level ||
8912                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8913                        create_trace_option_core_file(tr, trace_options[i], i);
8914        }
8915}
8916
8917static ssize_t
8918rb_simple_read(struct file *filp, char __user *ubuf,
8919               size_t cnt, loff_t *ppos)
8920{
8921        struct trace_array *tr = filp->private_data;
8922        char buf[64];
8923        int r;
8924
8925        r = tracer_tracing_is_on(tr);
8926        r = sprintf(buf, "%d\n", r);
8927
8928        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8929}
8930
8931static ssize_t
8932rb_simple_write(struct file *filp, const char __user *ubuf,
8933                size_t cnt, loff_t *ppos)
8934{
8935        struct trace_array *tr = filp->private_data;
8936        struct trace_buffer *buffer = tr->array_buffer.buffer;
8937        unsigned long val;
8938        int ret;
8939
8940        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8941        if (ret)
8942                return ret;
8943
8944        if (buffer) {
8945                mutex_lock(&trace_types_lock);
8946                if (!!val == tracer_tracing_is_on(tr)) {
8947                        val = 0; /* do nothing */
8948                } else if (val) {
8949                        tracer_tracing_on(tr);
8950                        if (tr->current_trace->start)
8951                                tr->current_trace->start(tr);
8952                } else {
8953                        tracer_tracing_off(tr);
8954                        if (tr->current_trace->stop)
8955                                tr->current_trace->stop(tr);
8956                }
8957                mutex_unlock(&trace_types_lock);
8958        }
8959
8960        (*ppos)++;
8961
8962        return cnt;
8963}
8964
8965static const struct file_operations rb_simple_fops = {
8966        .open           = tracing_open_generic_tr,
8967        .read           = rb_simple_read,
8968        .write          = rb_simple_write,
8969        .release        = tracing_release_generic_tr,
8970        .llseek         = default_llseek,
8971};
8972
8973static ssize_t
8974buffer_percent_read(struct file *filp, char __user *ubuf,
8975                    size_t cnt, loff_t *ppos)
8976{
8977        struct trace_array *tr = filp->private_data;
8978        char buf[64];
8979        int r;
8980
8981        r = tr->buffer_percent;
8982        r = sprintf(buf, "%d\n", r);
8983
8984        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8985}
8986
8987static ssize_t
8988buffer_percent_write(struct file *filp, const char __user *ubuf,
8989                     size_t cnt, loff_t *ppos)
8990{
8991        struct trace_array *tr = filp->private_data;
8992        unsigned long val;
8993        int ret;
8994
8995        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8996        if (ret)
8997                return ret;
8998
8999        if (val > 100)
9000                return -EINVAL;
9001
9002        if (!val)
9003                val = 1;
9004
9005        tr->buffer_percent = val;
9006
9007        (*ppos)++;
9008
9009        return cnt;
9010}
9011
9012static const struct file_operations buffer_percent_fops = {
9013        .open           = tracing_open_generic_tr,
9014        .read           = buffer_percent_read,
9015        .write          = buffer_percent_write,
9016        .release        = tracing_release_generic_tr,
9017        .llseek         = default_llseek,
9018};
9019
9020static struct dentry *trace_instance_dir;
9021
9022static void
9023init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9024
9025static int
9026allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9027{
9028        enum ring_buffer_flags rb_flags;
9029
9030        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9031
9032        buf->tr = tr;
9033
9034        buf->buffer = ring_buffer_alloc(size, rb_flags);
9035        if (!buf->buffer)
9036                return -ENOMEM;
9037
9038        buf->data = alloc_percpu(struct trace_array_cpu);
9039        if (!buf->data) {
9040                ring_buffer_free(buf->buffer);
9041                buf->buffer = NULL;
9042                return -ENOMEM;
9043        }
9044
9045        /* Allocate the first page for all buffers */
9046        set_buffer_entries(&tr->array_buffer,
9047                           ring_buffer_size(tr->array_buffer.buffer, 0));
9048
9049        return 0;
9050}
9051
9052static int allocate_trace_buffers(struct trace_array *tr, int size)
9053{
9054        int ret;
9055
9056        ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9057        if (ret)
9058                return ret;
9059
9060#ifdef CONFIG_TRACER_MAX_TRACE
9061        ret = allocate_trace_buffer(tr, &tr->max_buffer,
9062                                    allocate_snapshot ? size : 1);
9063        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9064                ring_buffer_free(tr->array_buffer.buffer);
9065                tr->array_buffer.buffer = NULL;
9066                free_percpu(tr->array_buffer.data);
9067                tr->array_buffer.data = NULL;
9068                return -ENOMEM;
9069        }
9070        tr->allocated_snapshot = allocate_snapshot;
9071
9072        /*
9073         * Only the top level trace array gets its snapshot allocated
9074         * from the kernel command line.
9075         */
9076        allocate_snapshot = false;
9077#endif
9078
9079        return 0;
9080}
9081
9082static void free_trace_buffer(struct array_buffer *buf)
9083{
9084        if (buf->buffer) {
9085                ring_buffer_free(buf->buffer);
9086                buf->buffer = NULL;
9087                free_percpu(buf->data);
9088                buf->data = NULL;
9089        }
9090}
9091
9092static void free_trace_buffers(struct trace_array *tr)
9093{
9094        if (!tr)
9095                return;
9096
9097        free_trace_buffer(&tr->array_buffer);
9098
9099#ifdef CONFIG_TRACER_MAX_TRACE
9100        free_trace_buffer(&tr->max_buffer);
9101#endif
9102}
9103
9104static void init_trace_flags_index(struct trace_array *tr)
9105{
9106        int i;
9107
9108        /* Used by the trace options files */
9109        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9110                tr->trace_flags_index[i] = i;
9111}
9112
9113static void __update_tracer_options(struct trace_array *tr)
9114{
9115        struct tracer *t;
9116
9117        for (t = trace_types; t; t = t->next)
9118                add_tracer_options(tr, t);
9119}
9120
9121static void update_tracer_options(struct trace_array *tr)
9122{
9123        mutex_lock(&trace_types_lock);
9124        __update_tracer_options(tr);
9125        mutex_unlock(&trace_types_lock);
9126}
9127
9128/* Must have trace_types_lock held */
9129struct trace_array *trace_array_find(const char *instance)
9130{
9131        struct trace_array *tr, *found = NULL;
9132
9133        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9134                if (tr->name && strcmp(tr->name, instance) == 0) {
9135                        found = tr;
9136                        break;
9137                }
9138        }
9139
9140        return found;
9141}
9142
9143struct trace_array *trace_array_find_get(const char *instance)
9144{
9145        struct trace_array *tr;
9146
9147        mutex_lock(&trace_types_lock);
9148        tr = trace_array_find(instance);
9149        if (tr)
9150                tr->ref++;
9151        mutex_unlock(&trace_types_lock);
9152
9153        return tr;
9154}
9155
9156static int trace_array_create_dir(struct trace_array *tr)
9157{
9158        int ret;
9159
9160        tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9161        if (!tr->dir)
9162                return -EINVAL;
9163
9164        ret = event_trace_add_tracer(tr->dir, tr);
9165        if (ret) {
9166                tracefs_remove(tr->dir);
9167                return ret;
9168        }
9169
9170        init_tracer_tracefs(tr, tr->dir);
9171        __update_tracer_options(tr);
9172
9173        return ret;
9174}
9175
9176static struct trace_array *trace_array_create(const char *name)
9177{
9178        struct trace_array *tr;
9179        int ret;
9180
9181        ret = -ENOMEM;
9182        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9183        if (!tr)
9184                return ERR_PTR(ret);
9185
9186        tr->name = kstrdup(name, GFP_KERNEL);
9187        if (!tr->name)
9188                goto out_free_tr;
9189
9190        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9191                goto out_free_tr;
9192
9193        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9194
9195        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9196
9197        raw_spin_lock_init(&tr->start_lock);
9198
9199        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9200
9201        tr->current_trace = &nop_trace;
9202
9203        INIT_LIST_HEAD(&tr->systems);
9204        INIT_LIST_HEAD(&tr->events);
9205        INIT_LIST_HEAD(&tr->hist_vars);
9206        INIT_LIST_HEAD(&tr->err_log);
9207
9208        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9209                goto out_free_tr;
9210
9211        if (ftrace_allocate_ftrace_ops(tr) < 0)
9212                goto out_free_tr;
9213
9214        ftrace_init_trace_array(tr);
9215
9216        init_trace_flags_index(tr);
9217
9218        if (trace_instance_dir) {
9219                ret = trace_array_create_dir(tr);
9220                if (ret)
9221                        goto out_free_tr;
9222        } else
9223                __trace_early_add_events(tr);
9224
9225        list_add(&tr->list, &ftrace_trace_arrays);
9226
9227        tr->ref++;
9228
9229        return tr;
9230
9231 out_free_tr:
9232        ftrace_free_ftrace_ops(tr);
9233        free_trace_buffers(tr);
9234        free_cpumask_var(tr->tracing_cpumask);
9235        kfree(tr->name);
9236        kfree(tr);
9237
9238        return ERR_PTR(ret);
9239}
9240
9241static int instance_mkdir(const char *name)
9242{
9243        struct trace_array *tr;
9244        int ret;
9245
9246        mutex_lock(&event_mutex);
9247        mutex_lock(&trace_types_lock);
9248
9249        ret = -EEXIST;
9250        if (trace_array_find(name))
9251                goto out_unlock;
9252
9253        tr = trace_array_create(name);
9254
9255        ret = PTR_ERR_OR_ZERO(tr);
9256
9257out_unlock:
9258        mutex_unlock(&trace_types_lock);
9259        mutex_unlock(&event_mutex);
9260        return ret;
9261}
9262
9263/**
9264 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9265 * @name: The name of the trace array to be looked up/created.
9266 *
9267 * Returns pointer to trace array with given name.
9268 * NULL, if it cannot be created.
9269 *
9270 * NOTE: This function increments the reference counter associated with the
9271 * trace array returned. This makes sure it cannot be freed while in use.
9272 * Use trace_array_put() once the trace array is no longer needed.
9273 * If the trace_array is to be freed, trace_array_destroy() needs to
9274 * be called after the trace_array_put(), or simply let user space delete
9275 * it from the tracefs instances directory. But until the
9276 * trace_array_put() is called, user space can not delete it.
9277 *
9278 */
9279struct trace_array *trace_array_get_by_name(const char *name)
9280{
9281        struct trace_array *tr;
9282
9283        mutex_lock(&event_mutex);
9284        mutex_lock(&trace_types_lock);
9285
9286        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9287                if (tr->name && strcmp(tr->name, name) == 0)
9288                        goto out_unlock;
9289        }
9290
9291        tr = trace_array_create(name);
9292
9293        if (IS_ERR(tr))
9294                tr = NULL;
9295out_unlock:
9296        if (tr)
9297                tr->ref++;
9298
9299        mutex_unlock(&trace_types_lock);
9300        mutex_unlock(&event_mutex);
9301        return tr;
9302}
9303EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9304
9305static int __remove_instance(struct trace_array *tr)
9306{
9307        int i;
9308
9309        /* Reference counter for a newly created trace array = 1. */
9310        if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9311                return -EBUSY;
9312
9313        list_del(&tr->list);
9314
9315        /* Disable all the flags that were enabled coming in */
9316        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9317                if ((1 << i) & ZEROED_TRACE_FLAGS)
9318                        set_tracer_flag(tr, 1 << i, 0);
9319        }
9320
9321        tracing_set_nop(tr);
9322        clear_ftrace_function_probes(tr);
9323        event_trace_del_tracer(tr);
9324        ftrace_clear_pids(tr);
9325        ftrace_destroy_function_files(tr);
9326        tracefs_remove(tr->dir);
9327        free_percpu(tr->last_func_repeats);
9328        free_trace_buffers(tr);
9329
9330        for (i = 0; i < tr->nr_topts; i++) {
9331                kfree(tr->topts[i].topts);
9332        }
9333        kfree(tr->topts);
9334
9335        free_cpumask_var(tr->tracing_cpumask);
9336        kfree(tr->name);
9337        kfree(tr);
9338
9339        return 0;
9340}
9341
9342int trace_array_destroy(struct trace_array *this_tr)
9343{
9344        struct trace_array *tr;
9345        int ret;
9346
9347        if (!this_tr)
9348                return -EINVAL;
9349
9350        mutex_lock(&event_mutex);
9351        mutex_lock(&trace_types_lock);
9352
9353        ret = -ENODEV;
9354
9355        /* Making sure trace array exists before destroying it. */
9356        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9357                if (tr == this_tr) {
9358                        ret = __remove_instance(tr);
9359                        break;
9360                }
9361        }
9362
9363        mutex_unlock(&trace_types_lock);
9364        mutex_unlock(&event_mutex);
9365
9366        return ret;
9367}
9368EXPORT_SYMBOL_GPL(trace_array_destroy);
9369
9370static int instance_rmdir(const char *name)
9371{
9372        struct trace_array *tr;
9373        int ret;
9374
9375        mutex_lock(&event_mutex);
9376        mutex_lock(&trace_types_lock);
9377
9378        ret = -ENODEV;
9379        tr = trace_array_find(name);
9380        if (tr)
9381                ret = __remove_instance(tr);
9382
9383        mutex_unlock(&trace_types_lock);
9384        mutex_unlock(&event_mutex);
9385
9386        return ret;
9387}
9388
9389static __init void create_trace_instances(struct dentry *d_tracer)
9390{
9391        struct trace_array *tr;
9392
9393        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9394                                                         instance_mkdir,
9395                                                         instance_rmdir);
9396        if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9397                return;
9398
9399        mutex_lock(&event_mutex);
9400        mutex_lock(&trace_types_lock);
9401
9402        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9403                if (!tr->name)
9404                        continue;
9405                if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9406                             "Failed to create instance directory\n"))
9407                        break;
9408        }
9409
9410        mutex_unlock(&trace_types_lock);
9411        mutex_unlock(&event_mutex);
9412}
9413
9414static void
9415init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9416{
9417        struct trace_event_file *file;
9418        int cpu;
9419
9420        trace_create_file("available_tracers", 0444, d_tracer,
9421                        tr, &show_traces_fops);
9422
9423        trace_create_file("current_tracer", 0644, d_tracer,
9424                        tr, &set_tracer_fops);
9425
9426        trace_create_file("tracing_cpumask", 0644, d_tracer,
9427                          tr, &tracing_cpumask_fops);
9428
9429        trace_create_file("trace_options", 0644, d_tracer,
9430                          tr, &tracing_iter_fops);
9431
9432        trace_create_file("trace", 0644, d_tracer,
9433                          tr, &tracing_fops);
9434
9435        trace_create_file("trace_pipe", 0444, d_tracer,
9436                          tr, &tracing_pipe_fops);
9437
9438        trace_create_file("buffer_size_kb", 0644, d_tracer,
9439                          tr, &tracing_entries_fops);
9440
9441        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9442                          tr, &tracing_total_entries_fops);
9443
9444        trace_create_file("free_buffer", 0200, d_tracer,
9445                          tr, &tracing_free_buffer_fops);
9446
9447        trace_create_file("trace_marker", 0220, d_tracer,
9448                          tr, &tracing_mark_fops);
9449
9450        file = __find_event_file(tr, "ftrace", "print");
9451        if (file && file->dir)
9452                trace_create_file("trigger", 0644, file->dir, file,
9453                                  &event_trigger_fops);
9454        tr->trace_marker_file = file;
9455
9456        trace_create_file("trace_marker_raw", 0220, d_tracer,
9457                          tr, &tracing_mark_raw_fops);
9458
9459        trace_create_file("trace_clock", 0644, d_tracer, tr,
9460                          &trace_clock_fops);
9461
9462        trace_create_file("tracing_on", 0644, d_tracer,
9463                          tr, &rb_simple_fops);
9464
9465        trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9466                          &trace_time_stamp_mode_fops);
9467
9468        tr->buffer_percent = 50;
9469
9470        trace_create_file("buffer_percent", 0444, d_tracer,
9471                        tr, &buffer_percent_fops);
9472
9473        create_trace_options_dir(tr);
9474
9475        trace_create_maxlat_file(tr, d_tracer);
9476
9477        if (ftrace_create_function_files(tr, d_tracer))
9478                MEM_FAIL(1, "Could not allocate function filter files");
9479
9480#ifdef CONFIG_TRACER_SNAPSHOT
9481        trace_create_file("snapshot", 0644, d_tracer,
9482                          tr, &snapshot_fops);
9483#endif
9484
9485        trace_create_file("error_log", 0644, d_tracer,
9486                          tr, &tracing_err_log_fops);
9487
9488        for_each_tracing_cpu(cpu)
9489                tracing_init_tracefs_percpu(tr, cpu);
9490
9491        ftrace_init_tracefs(tr, d_tracer);
9492}
9493
9494static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9495{
9496        struct vfsmount *mnt;
9497        struct file_system_type *type;
9498
9499        /*
9500         * To maintain backward compatibility for tools that mount
9501         * debugfs to get to the tracing facility, tracefs is automatically
9502         * mounted to the debugfs/tracing directory.
9503         */
9504        type = get_fs_type("tracefs");
9505        if (!type)
9506                return NULL;
9507        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9508        put_filesystem(type);
9509        if (IS_ERR(mnt))
9510                return NULL;
9511        mntget(mnt);
9512
9513        return mnt;
9514}
9515
9516/**
9517 * tracing_init_dentry - initialize top level trace array
9518 *
9519 * This is called when creating files or directories in the tracing
9520 * directory. It is called via fs_initcall() by any of the boot up code
9521 * and expects to return the dentry of the top level tracing directory.
9522 */
9523int tracing_init_dentry(void)
9524{
9525        struct trace_array *tr = &global_trace;
9526
9527        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9528                pr_warn("Tracing disabled due to lockdown\n");
9529                return -EPERM;
9530        }
9531
9532        /* The top level trace array uses  NULL as parent */
9533        if (tr->dir)
9534                return 0;
9535
9536        if (WARN_ON(!tracefs_initialized()))
9537                return -ENODEV;
9538
9539        /*
9540         * As there may still be users that expect the tracing
9541         * files to exist in debugfs/tracing, we must automount
9542         * the tracefs file system there, so older tools still
9543         * work with the newer kernel.
9544         */
9545        tr->dir = debugfs_create_automount("tracing", NULL,
9546                                           trace_automount, NULL);
9547
9548        return 0;
9549}
9550
9551extern struct trace_eval_map *__start_ftrace_eval_maps[];
9552extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9553
9554static struct workqueue_struct *eval_map_wq __initdata;
9555static struct work_struct eval_map_work __initdata;
9556
9557static void __init eval_map_work_func(struct work_struct *work)
9558{
9559        int len;
9560
9561        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9562        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9563}
9564
9565static int __init trace_eval_init(void)
9566{
9567        INIT_WORK(&eval_map_work, eval_map_work_func);
9568
9569        eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9570        if (!eval_map_wq) {
9571                pr_err("Unable to allocate eval_map_wq\n");
9572                /* Do work here */
9573                eval_map_work_func(&eval_map_work);
9574                return -ENOMEM;
9575        }
9576
9577        queue_work(eval_map_wq, &eval_map_work);
9578        return 0;
9579}
9580
9581static int __init trace_eval_sync(void)
9582{
9583        /* Make sure the eval map updates are finished */
9584        if (eval_map_wq)
9585                destroy_workqueue(eval_map_wq);
9586        return 0;
9587}
9588
9589late_initcall_sync(trace_eval_sync);
9590
9591
9592#ifdef CONFIG_MODULES
9593static void trace_module_add_evals(struct module *mod)
9594{
9595        if (!mod->num_trace_evals)
9596                return;
9597
9598        /*
9599         * Modules with bad taint do not have events created, do
9600         * not bother with enums either.
9601         */
9602        if (trace_module_has_bad_taint(mod))
9603                return;
9604
9605        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9606}
9607
9608#ifdef CONFIG_TRACE_EVAL_MAP_FILE
9609static void trace_module_remove_evals(struct module *mod)
9610{
9611        union trace_eval_map_item *map;
9612        union trace_eval_map_item **last = &trace_eval_maps;
9613
9614        if (!mod->num_trace_evals)
9615                return;
9616
9617        mutex_lock(&trace_eval_mutex);
9618
9619        map = trace_eval_maps;
9620
9621        while (map) {
9622                if (map->head.mod == mod)
9623                        break;
9624                map = trace_eval_jmp_to_tail(map);
9625                last = &map->tail.next;
9626                map = map->tail.next;
9627        }
9628        if (!map)
9629                goto out;
9630
9631        *last = trace_eval_jmp_to_tail(map)->tail.next;
9632        kfree(map);
9633 out:
9634        mutex_unlock(&trace_eval_mutex);
9635}
9636#else
9637static inline void trace_module_remove_evals(struct module *mod) { }
9638#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9639
9640static int trace_module_notify(struct notifier_block *self,
9641                               unsigned long val, void *data)
9642{
9643        struct module *mod = data;
9644
9645        switch (val) {
9646        case MODULE_STATE_COMING:
9647                trace_module_add_evals(mod);
9648                break;
9649        case MODULE_STATE_GOING:
9650                trace_module_remove_evals(mod);
9651                break;
9652        }
9653
9654        return NOTIFY_OK;
9655}
9656
9657static struct notifier_block trace_module_nb = {
9658        .notifier_call = trace_module_notify,
9659        .priority = 0,
9660};
9661#endif /* CONFIG_MODULES */
9662
9663static __init int tracer_init_tracefs(void)
9664{
9665        int ret;
9666
9667        trace_access_lock_init();
9668
9669        ret = tracing_init_dentry();
9670        if (ret)
9671                return 0;
9672
9673        event_trace_init();
9674
9675        init_tracer_tracefs(&global_trace, NULL);
9676        ftrace_init_tracefs_toplevel(&global_trace, NULL);
9677
9678        trace_create_file("tracing_thresh", 0644, NULL,
9679                        &global_trace, &tracing_thresh_fops);
9680
9681        trace_create_file("README", 0444, NULL,
9682                        NULL, &tracing_readme_fops);
9683
9684        trace_create_file("saved_cmdlines", 0444, NULL,
9685                        NULL, &tracing_saved_cmdlines_fops);
9686
9687        trace_create_file("saved_cmdlines_size", 0644, NULL,
9688                          NULL, &tracing_saved_cmdlines_size_fops);
9689
9690        trace_create_file("saved_tgids", 0444, NULL,
9691                        NULL, &tracing_saved_tgids_fops);
9692
9693        trace_eval_init();
9694
9695        trace_create_eval_file(NULL);
9696
9697#ifdef CONFIG_MODULES
9698        register_module_notifier(&trace_module_nb);
9699#endif
9700
9701#ifdef CONFIG_DYNAMIC_FTRACE
9702        trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9703                        NULL, &tracing_dyn_info_fops);
9704#endif
9705
9706        create_trace_instances(NULL);
9707
9708        update_tracer_options(&global_trace);
9709
9710        return 0;
9711}
9712
9713fs_initcall(tracer_init_tracefs);
9714
9715static int trace_panic_handler(struct notifier_block *this,
9716                               unsigned long event, void *unused)
9717{
9718        if (ftrace_dump_on_oops)
9719                ftrace_dump(ftrace_dump_on_oops);
9720        return NOTIFY_OK;
9721}
9722
9723static struct notifier_block trace_panic_notifier = {
9724        .notifier_call  = trace_panic_handler,
9725        .next           = NULL,
9726        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9727};
9728
9729static int trace_die_handler(struct notifier_block *self,
9730                             unsigned long val,
9731                             void *data)
9732{
9733        switch (val) {
9734        case DIE_OOPS:
9735                if (ftrace_dump_on_oops)
9736                        ftrace_dump(ftrace_dump_on_oops);
9737                break;
9738        default:
9739                break;
9740        }
9741        return NOTIFY_OK;
9742}
9743
9744static struct notifier_block trace_die_notifier = {
9745        .notifier_call = trace_die_handler,
9746        .priority = 200
9747};
9748
9749/*
9750 * printk is set to max of 1024, we really don't need it that big.
9751 * Nothing should be printing 1000 characters anyway.
9752 */
9753#define TRACE_MAX_PRINT         1000
9754
9755/*
9756 * Define here KERN_TRACE so that we have one place to modify
9757 * it if we decide to change what log level the ftrace dump
9758 * should be at.
9759 */
9760#define KERN_TRACE              KERN_EMERG
9761
9762void
9763trace_printk_seq(struct trace_seq *s)
9764{
9765        /* Probably should print a warning here. */
9766        if (s->seq.len >= TRACE_MAX_PRINT)
9767                s->seq.len = TRACE_MAX_PRINT;
9768
9769        /*
9770         * More paranoid code. Although the buffer size is set to
9771         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9772         * an extra layer of protection.
9773         */
9774        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9775                s->seq.len = s->seq.size - 1;
9776
9777        /* should be zero ended, but we are paranoid. */
9778        s->buffer[s->seq.len] = 0;
9779
9780        printk(KERN_TRACE "%s", s->buffer);
9781
9782        trace_seq_init(s);
9783}
9784
9785void trace_init_global_iter(struct trace_iterator *iter)
9786{
9787        iter->tr = &global_trace;
9788        iter->trace = iter->tr->current_trace;
9789        iter->cpu_file = RING_BUFFER_ALL_CPUS;
9790        iter->array_buffer = &global_trace.array_buffer;
9791
9792        if (iter->trace && iter->trace->open)
9793                iter->trace->open(iter);
9794
9795        /* Annotate start of buffers if we had overruns */
9796        if (ring_buffer_overruns(iter->array_buffer->buffer))
9797                iter->iter_flags |= TRACE_FILE_ANNOTATE;
9798
9799        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9800        if (trace_clocks[iter->tr->clock_id].in_ns)
9801                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9802}
9803
9804void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9805{
9806        /* use static because iter can be a bit big for the stack */
9807        static struct trace_iterator iter;
9808        static atomic_t dump_running;
9809        struct trace_array *tr = &global_trace;
9810        unsigned int old_userobj;
9811        unsigned long flags;
9812        int cnt = 0, cpu;
9813
9814        /* Only allow one dump user at a time. */
9815        if (atomic_inc_return(&dump_running) != 1) {
9816                atomic_dec(&dump_running);
9817                return;
9818        }
9819
9820        /*
9821         * Always turn off tracing when we dump.
9822         * We don't need to show trace output of what happens
9823         * between multiple crashes.
9824         *
9825         * If the user does a sysrq-z, then they can re-enable
9826         * tracing with echo 1 > tracing_on.
9827         */
9828        tracing_off();
9829
9830        local_irq_save(flags);
9831
9832        /* Simulate the iterator */
9833        trace_init_global_iter(&iter);
9834        /* Can not use kmalloc for iter.temp and iter.fmt */
9835        iter.temp = static_temp_buf;
9836        iter.temp_size = STATIC_TEMP_BUF_SIZE;
9837        iter.fmt = static_fmt_buf;
9838        iter.fmt_size = STATIC_FMT_BUF_SIZE;
9839
9840        for_each_tracing_cpu(cpu) {
9841                atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9842        }
9843
9844        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9845
9846        /* don't look at user memory in panic mode */
9847        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9848
9849        switch (oops_dump_mode) {
9850        case DUMP_ALL:
9851                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852                break;
9853        case DUMP_ORIG:
9854                iter.cpu_file = raw_smp_processor_id();
9855                break;
9856        case DUMP_NONE:
9857                goto out_enable;
9858        default:
9859                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9860                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9861        }
9862
9863        printk(KERN_TRACE "Dumping ftrace buffer:\n");
9864
9865        /* Did function tracer already get disabled? */
9866        if (ftrace_is_dead()) {
9867                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9868                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9869        }
9870
9871        /*
9872         * We need to stop all tracing on all CPUS to read
9873         * the next buffer. This is a bit expensive, but is
9874         * not done often. We fill all what we can read,
9875         * and then release the locks again.
9876         */
9877
9878        while (!trace_empty(&iter)) {
9879
9880                if (!cnt)
9881                        printk(KERN_TRACE "---------------------------------\n");
9882
9883                cnt++;
9884
9885                trace_iterator_reset(&iter);
9886                iter.iter_flags |= TRACE_FILE_LAT_FMT;
9887
9888                if (trace_find_next_entry_inc(&iter) != NULL) {
9889                        int ret;
9890
9891                        ret = print_trace_line(&iter);
9892                        if (ret != TRACE_TYPE_NO_CONSUME)
9893                                trace_consume(&iter);
9894                }
9895                touch_nmi_watchdog();
9896
9897                trace_printk_seq(&iter.seq);
9898        }
9899
9900        if (!cnt)
9901                printk(KERN_TRACE "   (ftrace buffer empty)\n");
9902        else
9903                printk(KERN_TRACE "---------------------------------\n");
9904
9905 out_enable:
9906        tr->trace_flags |= old_userobj;
9907
9908        for_each_tracing_cpu(cpu) {
9909                atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9910        }
9911        atomic_dec(&dump_running);
9912        local_irq_restore(flags);
9913}
9914EXPORT_SYMBOL_GPL(ftrace_dump);
9915
9916#define WRITE_BUFSIZE  4096
9917
9918ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9919                                size_t count, loff_t *ppos,
9920                                int (*createfn)(const char *))
9921{
9922        char *kbuf, *buf, *tmp;
9923        int ret = 0;
9924        size_t done = 0;
9925        size_t size;
9926
9927        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9928        if (!kbuf)
9929                return -ENOMEM;
9930
9931        while (done < count) {
9932                size = count - done;
9933
9934                if (size >= WRITE_BUFSIZE)
9935                        size = WRITE_BUFSIZE - 1;
9936
9937                if (copy_from_user(kbuf, buffer + done, size)) {
9938                        ret = -EFAULT;
9939                        goto out;
9940                }
9941                kbuf[size] = '\0';
9942                buf = kbuf;
9943                do {
9944                        tmp = strchr(buf, '\n');
9945                        if (tmp) {
9946                                *tmp = '\0';
9947                                size = tmp - buf + 1;
9948                        } else {
9949                                size = strlen(buf);
9950                                if (done + size < count) {
9951                                        if (buf != kbuf)
9952                                                break;
9953                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9954                                        pr_warn("Line length is too long: Should be less than %d\n",
9955                                                WRITE_BUFSIZE - 2);
9956                                        ret = -EINVAL;
9957                                        goto out;
9958                                }
9959                        }
9960                        done += size;
9961
9962                        /* Remove comments */
9963                        tmp = strchr(buf, '#');
9964
9965                        if (tmp)
9966                                *tmp = '\0';
9967
9968                        ret = createfn(buf);
9969                        if (ret)
9970                                goto out;
9971                        buf += size;
9972
9973                } while (done < count);
9974        }
9975        ret = done;
9976
9977out:
9978        kfree(kbuf);
9979
9980        return ret;
9981}
9982
9983__init static int tracer_alloc_buffers(void)
9984{
9985        int ring_buf_size;
9986        int ret = -ENOMEM;
9987
9988
9989        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9990                pr_warn("Tracing disabled due to lockdown\n");
9991                return -EPERM;
9992        }
9993
9994        /*
9995         * Make sure we don't accidentally add more trace options
9996         * than we have bits for.
9997         */
9998        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9999
10000        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10001                goto out;
10002
10003        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10004                goto out_free_buffer_mask;
10005
10006        /* Only allocate trace_printk buffers if a trace_printk exists */
10007        if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10008                /* Must be called before global_trace.buffer is allocated */
10009                trace_printk_init_buffers();
10010
10011        /* To save memory, keep the ring buffer size to its minimum */
10012        if (ring_buffer_expanded)
10013                ring_buf_size = trace_buf_size;
10014        else
10015                ring_buf_size = 1;
10016
10017        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10018        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10019
10020        raw_spin_lock_init(&global_trace.start_lock);
10021
10022        /*
10023         * The prepare callbacks allocates some memory for the ring buffer. We
10024         * don't free the buffer if the CPU goes down. If we were to free
10025         * the buffer, then the user would lose any trace that was in the
10026         * buffer. The memory will be removed once the "instance" is removed.
10027         */
10028        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10029                                      "trace/RB:preapre", trace_rb_cpu_prepare,
10030                                      NULL);
10031        if (ret < 0)
10032                goto out_free_cpumask;
10033        /* Used for event triggers */
10034        ret = -ENOMEM;
10035        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10036        if (!temp_buffer)
10037                goto out_rm_hp_state;
10038
10039        if (trace_create_savedcmd() < 0)
10040                goto out_free_temp_buffer;
10041
10042        /* TODO: make the number of buffers hot pluggable with CPUS */
10043        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10044                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10045                goto out_free_savedcmd;
10046        }
10047
10048        if (global_trace.buffer_disabled)
10049                tracing_off();
10050
10051        if (trace_boot_clock) {
10052                ret = tracing_set_clock(&global_trace, trace_boot_clock);
10053                if (ret < 0)
10054                        pr_warn("Trace clock %s not defined, going back to default\n",
10055                                trace_boot_clock);
10056        }
10057
10058        /*
10059         * register_tracer() might reference current_trace, so it
10060         * needs to be set before we register anything. This is
10061         * just a bootstrap of current_trace anyway.
10062         */
10063        global_trace.current_trace = &nop_trace;
10064
10065        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10066
10067        ftrace_init_global_array_ops(&global_trace);
10068
10069        init_trace_flags_index(&global_trace);
10070
10071        register_tracer(&nop_trace);
10072
10073        /* Function tracing may start here (via kernel command line) */
10074        init_function_trace();
10075
10076        /* All seems OK, enable tracing */
10077        tracing_disabled = 0;
10078
10079        atomic_notifier_chain_register(&panic_notifier_list,
10080                                       &trace_panic_notifier);
10081
10082        register_die_notifier(&trace_die_notifier);
10083
10084        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10085
10086        INIT_LIST_HEAD(&global_trace.systems);
10087        INIT_LIST_HEAD(&global_trace.events);
10088        INIT_LIST_HEAD(&global_trace.hist_vars);
10089        INIT_LIST_HEAD(&global_trace.err_log);
10090        list_add(&global_trace.list, &ftrace_trace_arrays);
10091
10092        apply_trace_boot_options();
10093
10094        register_snapshot_cmd();
10095
10096        test_can_verify();
10097
10098        return 0;
10099
10100out_free_savedcmd:
10101        free_saved_cmdlines_buffer(savedcmd);
10102out_free_temp_buffer:
10103        ring_buffer_free(temp_buffer);
10104out_rm_hp_state:
10105        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10106out_free_cpumask:
10107        free_cpumask_var(global_trace.tracing_cpumask);
10108out_free_buffer_mask:
10109        free_cpumask_var(tracing_buffer_mask);
10110out:
10111        return ret;
10112}
10113
10114void __init early_trace_init(void)
10115{
10116        if (tracepoint_printk) {
10117                tracepoint_print_iter =
10118                        kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10119                if (MEM_FAIL(!tracepoint_print_iter,
10120                             "Failed to allocate trace iterator\n"))
10121                        tracepoint_printk = 0;
10122                else
10123                        static_key_enable(&tracepoint_printk_key.key);
10124        }
10125        tracer_alloc_buffers();
10126}
10127
10128void __init trace_init(void)
10129{
10130        trace_event_init();
10131}
10132
10133__init static void clear_boot_tracer(void)
10134{
10135        /*
10136         * The default tracer at boot buffer is an init section.
10137         * This function is called in lateinit. If we did not
10138         * find the boot tracer, then clear it out, to prevent
10139         * later registration from accessing the buffer that is
10140         * about to be freed.
10141         */
10142        if (!default_bootup_tracer)
10143                return;
10144
10145        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10146               default_bootup_tracer);
10147        default_bootup_tracer = NULL;
10148}
10149
10150#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10151__init static void tracing_set_default_clock(void)
10152{
10153        /* sched_clock_stable() is determined in late_initcall */
10154        if (!trace_boot_clock && !sched_clock_stable()) {
10155                if (security_locked_down(LOCKDOWN_TRACEFS)) {
10156                        pr_warn("Can not set tracing clock due to lockdown\n");
10157                        return;
10158                }
10159
10160                printk(KERN_WARNING
10161                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10162                       "If you want to keep using the local clock, then add:\n"
10163                       "  \"trace_clock=local\"\n"
10164                       "on the kernel command line\n");
10165                tracing_set_clock(&global_trace, "global");
10166        }
10167}
10168#else
10169static inline void tracing_set_default_clock(void) { }
10170#endif
10171
10172__init static int late_trace_init(void)
10173{
10174        if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10175                static_key_disable(&tracepoint_printk_key.key);
10176                tracepoint_printk = 0;
10177        }
10178
10179        tracing_set_default_clock();
10180        clear_boot_tracer();
10181        return 0;
10182}
10183
10184late_initcall_sync(late_trace_init);
10185