linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/security.h>
  21#include <linux/seq_file.h>
  22#include <linux/notifier.h>
  23#include <linux/irqflags.h>
  24#include <linux/debugfs.h>
  25#include <linux/tracefs.h>
  26#include <linux/pagemap.h>
  27#include <linux/hardirq.h>
  28#include <linux/linkage.h>
  29#include <linux/uaccess.h>
  30#include <linux/vmalloc.h>
  31#include <linux/ftrace.h>
  32#include <linux/module.h>
  33#include <linux/percpu.h>
  34#include <linux/splice.h>
  35#include <linux/kdebug.h>
  36#include <linux/string.h>
  37#include <linux/mount.h>
  38#include <linux/rwsem.h>
  39#include <linux/slab.h>
  40#include <linux/ctype.h>
  41#include <linux/init.h>
  42#include <linux/panic_notifier.h>
  43#include <linux/poll.h>
  44#include <linux/nmi.h>
  45#include <linux/fs.h>
  46#include <linux/trace.h>
  47#include <linux/sched/clock.h>
  48#include <linux/sched/rt.h>
  49#include <linux/fsnotify.h>
  50#include <linux/irq_work.h>
  51#include <linux/workqueue.h>
  52
  53#include "trace.h"
  54#include "trace_output.h"
  55
  56/*
  57 * On boot up, the ring buffer is set to the minimum size, so that
  58 * we do not waste memory on systems that are not using tracing.
  59 */
  60bool ring_buffer_expanded;
  61
  62/*
  63 * We need to change this state when a selftest is running.
  64 * A selftest will lurk into the ring-buffer to count the
  65 * entries inserted during the selftest although some concurrent
  66 * insertions into the ring-buffer such as trace_printk could occurred
  67 * at the same time, giving false positive or negative results.
  68 */
  69static bool __read_mostly tracing_selftest_running;
  70
  71/*
  72 * If boot-time tracing including tracers/events via kernel cmdline
  73 * is running, we do not want to run SELFTEST.
  74 */
  75bool __read_mostly tracing_selftest_disabled;
  76
  77#ifdef CONFIG_FTRACE_STARTUP_TEST
  78void __init disable_tracing_selftest(const char *reason)
  79{
  80        if (!tracing_selftest_disabled) {
  81                tracing_selftest_disabled = true;
  82                pr_info("Ftrace startup test is disabled due to %s\n", reason);
  83        }
  84}
  85#endif
  86
  87/* Pipe tracepoints to printk */
  88struct trace_iterator *tracepoint_print_iter;
  89int tracepoint_printk;
  90static bool tracepoint_printk_stop_on_boot __initdata;
  91static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  92
  93/* For tracers that don't implement custom flags */
  94static struct tracer_opt dummy_tracer_opt[] = {
  95        { }
  96};
  97
  98static int
  99dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 100{
 101        return 0;
 102}
 103
 104/*
 105 * To prevent the comm cache from being overwritten when no
 106 * tracing is active, only save the comm when a trace event
 107 * occurred.
 108 */
 109static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 110
 111/*
 112 * Kill all tracing for good (never come back).
 113 * It is initialized to 1 but will turn to zero if the initialization
 114 * of the tracer is successful. But that is the only place that sets
 115 * this back to zero.
 116 */
 117static int tracing_disabled = 1;
 118
 119cpumask_var_t __read_mostly     tracing_buffer_mask;
 120
 121/*
 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 123 *
 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 125 * is set, then ftrace_dump is called. This will output the contents
 126 * of the ftrace buffers to the console.  This is very useful for
 127 * capturing traces that lead to crashes and outputing it to a
 128 * serial console.
 129 *
 130 * It is default off, but you can enable it with either specifying
 131 * "ftrace_dump_on_oops" in the kernel command line, or setting
 132 * /proc/sys/kernel/ftrace_dump_on_oops
 133 * Set 1 if you want to dump buffers of all CPUs
 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 135 */
 136
 137enum ftrace_dump_mode ftrace_dump_on_oops;
 138
 139/* When set, tracing will stop when a WARN*() is hit */
 140int __disable_trace_on_warning;
 141
 142#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 143/* Map of enums to their values, for "eval_map" file */
 144struct trace_eval_map_head {
 145        struct module                   *mod;
 146        unsigned long                   length;
 147};
 148
 149union trace_eval_map_item;
 150
 151struct trace_eval_map_tail {
 152        /*
 153         * "end" is first and points to NULL as it must be different
 154         * than "mod" or "eval_string"
 155         */
 156        union trace_eval_map_item       *next;
 157        const char                      *end;   /* points to NULL */
 158};
 159
 160static DEFINE_MUTEX(trace_eval_mutex);
 161
 162/*
 163 * The trace_eval_maps are saved in an array with two extra elements,
 164 * one at the beginning, and one at the end. The beginning item contains
 165 * the count of the saved maps (head.length), and the module they
 166 * belong to if not built in (head.mod). The ending item contains a
 167 * pointer to the next array of saved eval_map items.
 168 */
 169union trace_eval_map_item {
 170        struct trace_eval_map           map;
 171        struct trace_eval_map_head      head;
 172        struct trace_eval_map_tail      tail;
 173};
 174
 175static union trace_eval_map_item *trace_eval_maps;
 176#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 177
 178int tracing_set_tracer(struct trace_array *tr, const char *buf);
 179static void ftrace_trace_userstack(struct trace_array *tr,
 180                                   struct trace_buffer *buffer,
 181                                   unsigned int trace_ctx);
 182
 183#define MAX_TRACER_SIZE         100
 184static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 185static char *default_bootup_tracer;
 186
 187static bool allocate_snapshot;
 188
 189static int __init set_cmdline_ftrace(char *str)
 190{
 191        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 192        default_bootup_tracer = bootup_tracer_buf;
 193        /* We are using ftrace early, expand it */
 194        ring_buffer_expanded = true;
 195        return 1;
 196}
 197__setup("ftrace=", set_cmdline_ftrace);
 198
 199static int __init set_ftrace_dump_on_oops(char *str)
 200{
 201        if (*str++ != '=' || !*str || !strcmp("1", str)) {
 202                ftrace_dump_on_oops = DUMP_ALL;
 203                return 1;
 204        }
 205
 206        if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
 207                ftrace_dump_on_oops = DUMP_ORIG;
 208                return 1;
 209        }
 210
 211        return 0;
 212}
 213__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 214
 215static int __init stop_trace_on_warning(char *str)
 216{
 217        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 218                __disable_trace_on_warning = 1;
 219        return 1;
 220}
 221__setup("traceoff_on_warning", stop_trace_on_warning);
 222
 223static int __init boot_alloc_snapshot(char *str)
 224{
 225        allocate_snapshot = true;
 226        /* We also need the main ring buffer expanded */
 227        ring_buffer_expanded = true;
 228        return 1;
 229}
 230__setup("alloc_snapshot", boot_alloc_snapshot);
 231
 232
 233static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 234
 235static int __init set_trace_boot_options(char *str)
 236{
 237        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 238        return 0;
 239}
 240__setup("trace_options=", set_trace_boot_options);
 241
 242static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 243static char *trace_boot_clock __initdata;
 244
 245static int __init set_trace_boot_clock(char *str)
 246{
 247        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 248        trace_boot_clock = trace_boot_clock_buf;
 249        return 0;
 250}
 251__setup("trace_clock=", set_trace_boot_clock);
 252
 253static int __init set_tracepoint_printk(char *str)
 254{
 255        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 256                tracepoint_printk = 1;
 257        return 1;
 258}
 259__setup("tp_printk", set_tracepoint_printk);
 260
 261static int __init set_tracepoint_printk_stop(char *str)
 262{
 263        tracepoint_printk_stop_on_boot = true;
 264        return 1;
 265}
 266__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
 267
 268unsigned long long ns2usecs(u64 nsec)
 269{
 270        nsec += 500;
 271        do_div(nsec, 1000);
 272        return nsec;
 273}
 274
 275static void
 276trace_process_export(struct trace_export *export,
 277               struct ring_buffer_event *event, int flag)
 278{
 279        struct trace_entry *entry;
 280        unsigned int size = 0;
 281
 282        if (export->flags & flag) {
 283                entry = ring_buffer_event_data(event);
 284                size = ring_buffer_event_length(event);
 285                export->write(export, entry, size);
 286        }
 287}
 288
 289static DEFINE_MUTEX(ftrace_export_lock);
 290
 291static struct trace_export __rcu *ftrace_exports_list __read_mostly;
 292
 293static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
 294static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
 295static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
 296
 297static inline void ftrace_exports_enable(struct trace_export *export)
 298{
 299        if (export->flags & TRACE_EXPORT_FUNCTION)
 300                static_branch_inc(&trace_function_exports_enabled);
 301
 302        if (export->flags & TRACE_EXPORT_EVENT)
 303                static_branch_inc(&trace_event_exports_enabled);
 304
 305        if (export->flags & TRACE_EXPORT_MARKER)
 306                static_branch_inc(&trace_marker_exports_enabled);
 307}
 308
 309static inline void ftrace_exports_disable(struct trace_export *export)
 310{
 311        if (export->flags & TRACE_EXPORT_FUNCTION)
 312                static_branch_dec(&trace_function_exports_enabled);
 313
 314        if (export->flags & TRACE_EXPORT_EVENT)
 315                static_branch_dec(&trace_event_exports_enabled);
 316
 317        if (export->flags & TRACE_EXPORT_MARKER)
 318                static_branch_dec(&trace_marker_exports_enabled);
 319}
 320
 321static void ftrace_exports(struct ring_buffer_event *event, int flag)
 322{
 323        struct trace_export *export;
 324
 325        preempt_disable_notrace();
 326
 327        export = rcu_dereference_raw_check(ftrace_exports_list);
 328        while (export) {
 329                trace_process_export(export, event, flag);
 330                export = rcu_dereference_raw_check(export->next);
 331        }
 332
 333        preempt_enable_notrace();
 334}
 335
 336static inline void
 337add_trace_export(struct trace_export **list, struct trace_export *export)
 338{
 339        rcu_assign_pointer(export->next, *list);
 340        /*
 341         * We are entering export into the list but another
 342         * CPU might be walking that list. We need to make sure
 343         * the export->next pointer is valid before another CPU sees
 344         * the export pointer included into the list.
 345         */
 346        rcu_assign_pointer(*list, export);
 347}
 348
 349static inline int
 350rm_trace_export(struct trace_export **list, struct trace_export *export)
 351{
 352        struct trace_export **p;
 353
 354        for (p = list; *p != NULL; p = &(*p)->next)
 355                if (*p == export)
 356                        break;
 357
 358        if (*p != export)
 359                return -1;
 360
 361        rcu_assign_pointer(*p, (*p)->next);
 362
 363        return 0;
 364}
 365
 366static inline void
 367add_ftrace_export(struct trace_export **list, struct trace_export *export)
 368{
 369        ftrace_exports_enable(export);
 370
 371        add_trace_export(list, export);
 372}
 373
 374static inline int
 375rm_ftrace_export(struct trace_export **list, struct trace_export *export)
 376{
 377        int ret;
 378
 379        ret = rm_trace_export(list, export);
 380        ftrace_exports_disable(export);
 381
 382        return ret;
 383}
 384
 385int register_ftrace_export(struct trace_export *export)
 386{
 387        if (WARN_ON_ONCE(!export->write))
 388                return -1;
 389
 390        mutex_lock(&ftrace_export_lock);
 391
 392        add_ftrace_export(&ftrace_exports_list, export);
 393
 394        mutex_unlock(&ftrace_export_lock);
 395
 396        return 0;
 397}
 398EXPORT_SYMBOL_GPL(register_ftrace_export);
 399
 400int unregister_ftrace_export(struct trace_export *export)
 401{
 402        int ret;
 403
 404        mutex_lock(&ftrace_export_lock);
 405
 406        ret = rm_ftrace_export(&ftrace_exports_list, export);
 407
 408        mutex_unlock(&ftrace_export_lock);
 409
 410        return ret;
 411}
 412EXPORT_SYMBOL_GPL(unregister_ftrace_export);
 413
 414/* trace_flags holds trace_options default values */
 415#define TRACE_DEFAULT_FLAGS                                             \
 416        (FUNCTION_DEFAULT_FLAGS |                                       \
 417         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 418         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 419         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 420         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
 421         TRACE_ITER_HASH_PTR)
 422
 423/* trace_options that are only supported by global_trace */
 424#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 425               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 426
 427/* trace_flags that are default zero for instances */
 428#define ZEROED_TRACE_FLAGS \
 429        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 430
 431/*
 432 * The global_trace is the descriptor that holds the top-level tracing
 433 * buffers for the live tracing.
 434 */
 435static struct trace_array global_trace = {
 436        .trace_flags = TRACE_DEFAULT_FLAGS,
 437};
 438
 439LIST_HEAD(ftrace_trace_arrays);
 440
 441int trace_array_get(struct trace_array *this_tr)
 442{
 443        struct trace_array *tr;
 444        int ret = -ENODEV;
 445
 446        mutex_lock(&trace_types_lock);
 447        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 448                if (tr == this_tr) {
 449                        tr->ref++;
 450                        ret = 0;
 451                        break;
 452                }
 453        }
 454        mutex_unlock(&trace_types_lock);
 455
 456        return ret;
 457}
 458
 459static void __trace_array_put(struct trace_array *this_tr)
 460{
 461        WARN_ON(!this_tr->ref);
 462        this_tr->ref--;
 463}
 464
 465/**
 466 * trace_array_put - Decrement the reference counter for this trace array.
 467 * @this_tr : pointer to the trace array
 468 *
 469 * NOTE: Use this when we no longer need the trace array returned by
 470 * trace_array_get_by_name(). This ensures the trace array can be later
 471 * destroyed.
 472 *
 473 */
 474void trace_array_put(struct trace_array *this_tr)
 475{
 476        if (!this_tr)
 477                return;
 478
 479        mutex_lock(&trace_types_lock);
 480        __trace_array_put(this_tr);
 481        mutex_unlock(&trace_types_lock);
 482}
 483EXPORT_SYMBOL_GPL(trace_array_put);
 484
 485int tracing_check_open_get_tr(struct trace_array *tr)
 486{
 487        int ret;
 488
 489        ret = security_locked_down(LOCKDOWN_TRACEFS);
 490        if (ret)
 491                return ret;
 492
 493        if (tracing_disabled)
 494                return -ENODEV;
 495
 496        if (tr && trace_array_get(tr) < 0)
 497                return -ENODEV;
 498
 499        return 0;
 500}
 501
 502int call_filter_check_discard(struct trace_event_call *call, void *rec,
 503                              struct trace_buffer *buffer,
 504                              struct ring_buffer_event *event)
 505{
 506        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 507            !filter_match_preds(call->filter, rec)) {
 508                __trace_event_discard_commit(buffer, event);
 509                return 1;
 510        }
 511
 512        return 0;
 513}
 514
 515void trace_free_pid_list(struct trace_pid_list *pid_list)
 516{
 517        vfree(pid_list->pids);
 518        kfree(pid_list);
 519}
 520
 521/**
 522 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 523 * @filtered_pids: The list of pids to check
 524 * @search_pid: The PID to find in @filtered_pids
 525 *
 526 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
 527 */
 528bool
 529trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 530{
 531        /*
 532         * If pid_max changed after filtered_pids was created, we
 533         * by default ignore all pids greater than the previous pid_max.
 534         */
 535        if (search_pid >= filtered_pids->pid_max)
 536                return false;
 537
 538        return test_bit(search_pid, filtered_pids->pids);
 539}
 540
 541/**
 542 * trace_ignore_this_task - should a task be ignored for tracing
 543 * @filtered_pids: The list of pids to check
 544 * @filtered_no_pids: The list of pids not to be traced
 545 * @task: The task that should be ignored if not filtered
 546 *
 547 * Checks if @task should be traced or not from @filtered_pids.
 548 * Returns true if @task should *NOT* be traced.
 549 * Returns false if @task should be traced.
 550 */
 551bool
 552trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 553                       struct trace_pid_list *filtered_no_pids,
 554                       struct task_struct *task)
 555{
 556        /*
 557         * If filtered_no_pids is not empty, and the task's pid is listed
 558         * in filtered_no_pids, then return true.
 559         * Otherwise, if filtered_pids is empty, that means we can
 560         * trace all tasks. If it has content, then only trace pids
 561         * within filtered_pids.
 562         */
 563
 564        return (filtered_pids &&
 565                !trace_find_filtered_pid(filtered_pids, task->pid)) ||
 566                (filtered_no_pids &&
 567                 trace_find_filtered_pid(filtered_no_pids, task->pid));
 568}
 569
 570/**
 571 * trace_filter_add_remove_task - Add or remove a task from a pid_list
 572 * @pid_list: The list to modify
 573 * @self: The current task for fork or NULL for exit
 574 * @task: The task to add or remove
 575 *
 576 * If adding a task, if @self is defined, the task is only added if @self
 577 * is also included in @pid_list. This happens on fork and tasks should
 578 * only be added when the parent is listed. If @self is NULL, then the
 579 * @task pid will be removed from the list, which would happen on exit
 580 * of a task.
 581 */
 582void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 583                                  struct task_struct *self,
 584                                  struct task_struct *task)
 585{
 586        if (!pid_list)
 587                return;
 588
 589        /* For forks, we only add if the forking task is listed */
 590        if (self) {
 591                if (!trace_find_filtered_pid(pid_list, self->pid))
 592                        return;
 593        }
 594
 595        /* Sorry, but we don't support pid_max changing after setting */
 596        if (task->pid >= pid_list->pid_max)
 597                return;
 598
 599        /* "self" is set for forks, and NULL for exits */
 600        if (self)
 601                set_bit(task->pid, pid_list->pids);
 602        else
 603                clear_bit(task->pid, pid_list->pids);
 604}
 605
 606/**
 607 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 608 * @pid_list: The pid list to show
 609 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 610 * @pos: The position of the file
 611 *
 612 * This is used by the seq_file "next" operation to iterate the pids
 613 * listed in a trace_pid_list structure.
 614 *
 615 * Returns the pid+1 as we want to display pid of zero, but NULL would
 616 * stop the iteration.
 617 */
 618void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 619{
 620        unsigned long pid = (unsigned long)v;
 621
 622        (*pos)++;
 623
 624        /* pid already is +1 of the actual previous bit */
 625        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 626
 627        /* Return pid + 1 to allow zero to be represented */
 628        if (pid < pid_list->pid_max)
 629                return (void *)(pid + 1);
 630
 631        return NULL;
 632}
 633
 634/**
 635 * trace_pid_start - Used for seq_file to start reading pid lists
 636 * @pid_list: The pid list to show
 637 * @pos: The position of the file
 638 *
 639 * This is used by seq_file "start" operation to start the iteration
 640 * of listing pids.
 641 *
 642 * Returns the pid+1 as we want to display pid of zero, but NULL would
 643 * stop the iteration.
 644 */
 645void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 646{
 647        unsigned long pid;
 648        loff_t l = 0;
 649
 650        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 651        if (pid >= pid_list->pid_max)
 652                return NULL;
 653
 654        /* Return pid + 1 so that zero can be the exit value */
 655        for (pid++; pid && l < *pos;
 656             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 657                ;
 658        return (void *)pid;
 659}
 660
 661/**
 662 * trace_pid_show - show the current pid in seq_file processing
 663 * @m: The seq_file structure to write into
 664 * @v: A void pointer of the pid (+1) value to display
 665 *
 666 * Can be directly used by seq_file operations to display the current
 667 * pid value.
 668 */
 669int trace_pid_show(struct seq_file *m, void *v)
 670{
 671        unsigned long pid = (unsigned long)v - 1;
 672
 673        seq_printf(m, "%lu\n", pid);
 674        return 0;
 675}
 676
 677/* 128 should be much more than enough */
 678#define PID_BUF_SIZE            127
 679
 680int trace_pid_write(struct trace_pid_list *filtered_pids,
 681                    struct trace_pid_list **new_pid_list,
 682                    const char __user *ubuf, size_t cnt)
 683{
 684        struct trace_pid_list *pid_list;
 685        struct trace_parser parser;
 686        unsigned long val;
 687        int nr_pids = 0;
 688        ssize_t read = 0;
 689        ssize_t ret = 0;
 690        loff_t pos;
 691        pid_t pid;
 692
 693        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 694                return -ENOMEM;
 695
 696        /*
 697         * Always recreate a new array. The write is an all or nothing
 698         * operation. Always create a new array when adding new pids by
 699         * the user. If the operation fails, then the current list is
 700         * not modified.
 701         */
 702        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 703        if (!pid_list) {
 704                trace_parser_put(&parser);
 705                return -ENOMEM;
 706        }
 707
 708        pid_list->pid_max = READ_ONCE(pid_max);
 709
 710        /* Only truncating will shrink pid_max */
 711        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 712                pid_list->pid_max = filtered_pids->pid_max;
 713
 714        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 715        if (!pid_list->pids) {
 716                trace_parser_put(&parser);
 717                kfree(pid_list);
 718                return -ENOMEM;
 719        }
 720
 721        if (filtered_pids) {
 722                /* copy the current bits to the new max */
 723                for_each_set_bit(pid, filtered_pids->pids,
 724                                 filtered_pids->pid_max) {
 725                        set_bit(pid, pid_list->pids);
 726                        nr_pids++;
 727                }
 728        }
 729
 730        while (cnt > 0) {
 731
 732                pos = 0;
 733
 734                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 735                if (ret < 0 || !trace_parser_loaded(&parser))
 736                        break;
 737
 738                read += ret;
 739                ubuf += ret;
 740                cnt -= ret;
 741
 742                ret = -EINVAL;
 743                if (kstrtoul(parser.buffer, 0, &val))
 744                        break;
 745                if (val >= pid_list->pid_max)
 746                        break;
 747
 748                pid = (pid_t)val;
 749
 750                set_bit(pid, pid_list->pids);
 751                nr_pids++;
 752
 753                trace_parser_clear(&parser);
 754                ret = 0;
 755        }
 756        trace_parser_put(&parser);
 757
 758        if (ret < 0) {
 759                trace_free_pid_list(pid_list);
 760                return ret;
 761        }
 762
 763        if (!nr_pids) {
 764                /* Cleared the list of pids */
 765                trace_free_pid_list(pid_list);
 766                read = ret;
 767                pid_list = NULL;
 768        }
 769
 770        *new_pid_list = pid_list;
 771
 772        return read;
 773}
 774
 775static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 776{
 777        u64 ts;
 778
 779        /* Early boot up does not have a buffer yet */
 780        if (!buf->buffer)
 781                return trace_clock_local();
 782
 783        ts = ring_buffer_time_stamp(buf->buffer);
 784        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 785
 786        return ts;
 787}
 788
 789u64 ftrace_now(int cpu)
 790{
 791        return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 792}
 793
 794/**
 795 * tracing_is_enabled - Show if global_trace has been enabled
 796 *
 797 * Shows if the global trace has been enabled or not. It uses the
 798 * mirror flag "buffer_disabled" to be used in fast paths such as for
 799 * the irqsoff tracer. But it may be inaccurate due to races. If you
 800 * need to know the accurate state, use tracing_is_on() which is a little
 801 * slower, but accurate.
 802 */
 803int tracing_is_enabled(void)
 804{
 805        /*
 806         * For quick access (irqsoff uses this in fast path), just
 807         * return the mirror variable of the state of the ring buffer.
 808         * It's a little racy, but we don't really care.
 809         */
 810        smp_rmb();
 811        return !global_trace.buffer_disabled;
 812}
 813
 814/*
 815 * trace_buf_size is the size in bytes that is allocated
 816 * for a buffer. Note, the number of bytes is always rounded
 817 * to page size.
 818 *
 819 * This number is purposely set to a low number of 16384.
 820 * If the dump on oops happens, it will be much appreciated
 821 * to not have to wait for all that output. Anyway this can be
 822 * boot time and run time configurable.
 823 */
 824#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 825
 826static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 827
 828/* trace_types holds a link list of available tracers. */
 829static struct tracer            *trace_types __read_mostly;
 830
 831/*
 832 * trace_types_lock is used to protect the trace_types list.
 833 */
 834DEFINE_MUTEX(trace_types_lock);
 835
 836/*
 837 * serialize the access of the ring buffer
 838 *
 839 * ring buffer serializes readers, but it is low level protection.
 840 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 841 * are not protected by ring buffer.
 842 *
 843 * The content of events may become garbage if we allow other process consumes
 844 * these events concurrently:
 845 *   A) the page of the consumed events may become a normal page
 846 *      (not reader page) in ring buffer, and this page will be rewritten
 847 *      by events producer.
 848 *   B) The page of the consumed events may become a page for splice_read,
 849 *      and this page will be returned to system.
 850 *
 851 * These primitives allow multi process access to different cpu ring buffer
 852 * concurrently.
 853 *
 854 * These primitives don't distinguish read-only and read-consume access.
 855 * Multi read-only access are also serialized.
 856 */
 857
 858#ifdef CONFIG_SMP
 859static DECLARE_RWSEM(all_cpu_access_lock);
 860static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 861
 862static inline void trace_access_lock(int cpu)
 863{
 864        if (cpu == RING_BUFFER_ALL_CPUS) {
 865                /* gain it for accessing the whole ring buffer. */
 866                down_write(&all_cpu_access_lock);
 867        } else {
 868                /* gain it for accessing a cpu ring buffer. */
 869
 870                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 871                down_read(&all_cpu_access_lock);
 872
 873                /* Secondly block other access to this @cpu ring buffer. */
 874                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 875        }
 876}
 877
 878static inline void trace_access_unlock(int cpu)
 879{
 880        if (cpu == RING_BUFFER_ALL_CPUS) {
 881                up_write(&all_cpu_access_lock);
 882        } else {
 883                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 884                up_read(&all_cpu_access_lock);
 885        }
 886}
 887
 888static inline void trace_access_lock_init(void)
 889{
 890        int cpu;
 891
 892        for_each_possible_cpu(cpu)
 893                mutex_init(&per_cpu(cpu_access_lock, cpu));
 894}
 895
 896#else
 897
 898static DEFINE_MUTEX(access_lock);
 899
 900static inline void trace_access_lock(int cpu)
 901{
 902        (void)cpu;
 903        mutex_lock(&access_lock);
 904}
 905
 906static inline void trace_access_unlock(int cpu)
 907{
 908        (void)cpu;
 909        mutex_unlock(&access_lock);
 910}
 911
 912static inline void trace_access_lock_init(void)
 913{
 914}
 915
 916#endif
 917
 918#ifdef CONFIG_STACKTRACE
 919static void __ftrace_trace_stack(struct trace_buffer *buffer,
 920                                 unsigned int trace_ctx,
 921                                 int skip, struct pt_regs *regs);
 922static inline void ftrace_trace_stack(struct trace_array *tr,
 923                                      struct trace_buffer *buffer,
 924                                      unsigned int trace_ctx,
 925                                      int skip, struct pt_regs *regs);
 926
 927#else
 928static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 929                                        unsigned int trace_ctx,
 930                                        int skip, struct pt_regs *regs)
 931{
 932}
 933static inline void ftrace_trace_stack(struct trace_array *tr,
 934                                      struct trace_buffer *buffer,
 935                                      unsigned long trace_ctx,
 936                                      int skip, struct pt_regs *regs)
 937{
 938}
 939
 940#endif
 941
 942static __always_inline void
 943trace_event_setup(struct ring_buffer_event *event,
 944                  int type, unsigned int trace_ctx)
 945{
 946        struct trace_entry *ent = ring_buffer_event_data(event);
 947
 948        tracing_generic_entry_update(ent, type, trace_ctx);
 949}
 950
 951static __always_inline struct ring_buffer_event *
 952__trace_buffer_lock_reserve(struct trace_buffer *buffer,
 953                          int type,
 954                          unsigned long len,
 955                          unsigned int trace_ctx)
 956{
 957        struct ring_buffer_event *event;
 958
 959        event = ring_buffer_lock_reserve(buffer, len);
 960        if (event != NULL)
 961                trace_event_setup(event, type, trace_ctx);
 962
 963        return event;
 964}
 965
 966void tracer_tracing_on(struct trace_array *tr)
 967{
 968        if (tr->array_buffer.buffer)
 969                ring_buffer_record_on(tr->array_buffer.buffer);
 970        /*
 971         * This flag is looked at when buffers haven't been allocated
 972         * yet, or by some tracers (like irqsoff), that just want to
 973         * know if the ring buffer has been disabled, but it can handle
 974         * races of where it gets disabled but we still do a record.
 975         * As the check is in the fast path of the tracers, it is more
 976         * important to be fast than accurate.
 977         */
 978        tr->buffer_disabled = 0;
 979        /* Make the flag seen by readers */
 980        smp_wmb();
 981}
 982
 983/**
 984 * tracing_on - enable tracing buffers
 985 *
 986 * This function enables tracing buffers that may have been
 987 * disabled with tracing_off.
 988 */
 989void tracing_on(void)
 990{
 991        tracer_tracing_on(&global_trace);
 992}
 993EXPORT_SYMBOL_GPL(tracing_on);
 994
 995
 996static __always_inline void
 997__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 998{
 999        __this_cpu_write(trace_taskinfo_save, true);
1000
1001        /* If this is the temp buffer, we need to commit fully */
1002        if (this_cpu_read(trace_buffered_event) == event) {
1003                /* Length is in event->array[0] */
1004                ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                /* Release the temp buffer */
1006                this_cpu_dec(trace_buffered_event_cnt);
1007        } else
1008                ring_buffer_unlock_commit(buffer, event);
1009}
1010
1011/**
1012 * __trace_puts - write a constant string into the trace buffer.
1013 * @ip:    The address of the caller
1014 * @str:   The constant string to write
1015 * @size:  The size of the string.
1016 */
1017int __trace_puts(unsigned long ip, const char *str, int size)
1018{
1019        struct ring_buffer_event *event;
1020        struct trace_buffer *buffer;
1021        struct print_entry *entry;
1022        unsigned int trace_ctx;
1023        int alloc;
1024
1025        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                return 0;
1027
1028        if (unlikely(tracing_selftest_running || tracing_disabled))
1029                return 0;
1030
1031        alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033        trace_ctx = tracing_gen_ctx();
1034        buffer = global_trace.array_buffer.buffer;
1035        ring_buffer_nest_start(buffer);
1036        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                            trace_ctx);
1038        if (!event) {
1039                size = 0;
1040                goto out;
1041        }
1042
1043        entry = ring_buffer_event_data(event);
1044        entry->ip = ip;
1045
1046        memcpy(&entry->buf, str, size);
1047
1048        /* Add a newline if necessary */
1049        if (entry->buf[size - 1] != '\n') {
1050                entry->buf[size] = '\n';
1051                entry->buf[size + 1] = '\0';
1052        } else
1053                entry->buf[size] = '\0';
1054
1055        __buffer_unlock_commit(buffer, event);
1056        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057 out:
1058        ring_buffer_nest_end(buffer);
1059        return size;
1060}
1061EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063/**
1064 * __trace_bputs - write the pointer to a constant string into trace buffer
1065 * @ip:    The address of the caller
1066 * @str:   The constant string to write to the buffer to
1067 */
1068int __trace_bputs(unsigned long ip, const char *str)
1069{
1070        struct ring_buffer_event *event;
1071        struct trace_buffer *buffer;
1072        struct bputs_entry *entry;
1073        unsigned int trace_ctx;
1074        int size = sizeof(struct bputs_entry);
1075        int ret = 0;
1076
1077        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                return 0;
1079
1080        if (unlikely(tracing_selftest_running || tracing_disabled))
1081                return 0;
1082
1083        trace_ctx = tracing_gen_ctx();
1084        buffer = global_trace.array_buffer.buffer;
1085
1086        ring_buffer_nest_start(buffer);
1087        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                            trace_ctx);
1089        if (!event)
1090                goto out;
1091
1092        entry = ring_buffer_event_data(event);
1093        entry->ip                       = ip;
1094        entry->str                      = str;
1095
1096        __buffer_unlock_commit(buffer, event);
1097        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099        ret = 1;
1100 out:
1101        ring_buffer_nest_end(buffer);
1102        return ret;
1103}
1104EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106#ifdef CONFIG_TRACER_SNAPSHOT
1107static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                           void *cond_data)
1109{
1110        struct tracer *tracer = tr->current_trace;
1111        unsigned long flags;
1112
1113        if (in_nmi()) {
1114                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                return;
1117        }
1118
1119        if (!tr->allocated_snapshot) {
1120                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                internal_trace_puts("*** stopping trace here!   ***\n");
1122                tracing_off();
1123                return;
1124        }
1125
1126        /* Note, snapshot can not be used when the tracer uses it */
1127        if (tracer->use_max_tr) {
1128                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                return;
1131        }
1132
1133        local_irq_save(flags);
1134        update_max_tr(tr, current, smp_processor_id(), cond_data);
1135        local_irq_restore(flags);
1136}
1137
1138void tracing_snapshot_instance(struct trace_array *tr)
1139{
1140        tracing_snapshot_instance_cond(tr, NULL);
1141}
1142
1143/**
1144 * tracing_snapshot - take a snapshot of the current buffer.
1145 *
1146 * This causes a swap between the snapshot buffer and the current live
1147 * tracing buffer. You can use this to take snapshots of the live
1148 * trace when some condition is triggered, but continue to trace.
1149 *
1150 * Note, make sure to allocate the snapshot with either
1151 * a tracing_snapshot_alloc(), or by doing it manually
1152 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153 *
1154 * If the snapshot buffer is not allocated, it will stop tracing.
1155 * Basically making a permanent snapshot.
1156 */
1157void tracing_snapshot(void)
1158{
1159        struct trace_array *tr = &global_trace;
1160
1161        tracing_snapshot_instance(tr);
1162}
1163EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165/**
1166 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167 * @tr:         The tracing instance to snapshot
1168 * @cond_data:  The data to be tested conditionally, and possibly saved
1169 *
1170 * This is the same as tracing_snapshot() except that the snapshot is
1171 * conditional - the snapshot will only happen if the
1172 * cond_snapshot.update() implementation receiving the cond_data
1173 * returns true, which means that the trace array's cond_snapshot
1174 * update() operation used the cond_data to determine whether the
1175 * snapshot should be taken, and if it was, presumably saved it along
1176 * with the snapshot.
1177 */
1178void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179{
1180        tracing_snapshot_instance_cond(tr, cond_data);
1181}
1182EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184/**
1185 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186 * @tr:         The tracing instance
1187 *
1188 * When the user enables a conditional snapshot using
1189 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190 * with the snapshot.  This accessor is used to retrieve it.
1191 *
1192 * Should not be called from cond_snapshot.update(), since it takes
1193 * the tr->max_lock lock, which the code calling
1194 * cond_snapshot.update() has already done.
1195 *
1196 * Returns the cond_data associated with the trace array's snapshot.
1197 */
1198void *tracing_cond_snapshot_data(struct trace_array *tr)
1199{
1200        void *cond_data = NULL;
1201
1202        arch_spin_lock(&tr->max_lock);
1203
1204        if (tr->cond_snapshot)
1205                cond_data = tr->cond_snapshot->cond_data;
1206
1207        arch_spin_unlock(&tr->max_lock);
1208
1209        return cond_data;
1210}
1211EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                        struct array_buffer *size_buf, int cpu_id);
1215static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218{
1219        int ret;
1220
1221        if (!tr->allocated_snapshot) {
1222
1223                /* allocate spare buffer */
1224                ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                if (ret < 0)
1227                        return ret;
1228
1229                tr->allocated_snapshot = true;
1230        }
1231
1232        return 0;
1233}
1234
1235static void free_snapshot(struct trace_array *tr)
1236{
1237        /*
1238         * We don't free the ring buffer. instead, resize it because
1239         * The max_tr ring buffer has some state (e.g. ring->clock) and
1240         * we want preserve it.
1241         */
1242        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243        set_buffer_entries(&tr->max_buffer, 1);
1244        tracing_reset_online_cpus(&tr->max_buffer);
1245        tr->allocated_snapshot = false;
1246}
1247
1248/**
1249 * tracing_alloc_snapshot - allocate snapshot buffer.
1250 *
1251 * This only allocates the snapshot buffer if it isn't already
1252 * allocated - it doesn't also take a snapshot.
1253 *
1254 * This is meant to be used in cases where the snapshot buffer needs
1255 * to be set up for events that can't sleep but need to be able to
1256 * trigger a snapshot.
1257 */
1258int tracing_alloc_snapshot(void)
1259{
1260        struct trace_array *tr = &global_trace;
1261        int ret;
1262
1263        ret = tracing_alloc_snapshot_instance(tr);
1264        WARN_ON(ret < 0);
1265
1266        return ret;
1267}
1268EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270/**
1271 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272 *
1273 * This is similar to tracing_snapshot(), but it will allocate the
1274 * snapshot buffer if it isn't already allocated. Use this only
1275 * where it is safe to sleep, as the allocation may sleep.
1276 *
1277 * This causes a swap between the snapshot buffer and the current live
1278 * tracing buffer. You can use this to take snapshots of the live
1279 * trace when some condition is triggered, but continue to trace.
1280 */
1281void tracing_snapshot_alloc(void)
1282{
1283        int ret;
1284
1285        ret = tracing_alloc_snapshot();
1286        if (ret < 0)
1287                return;
1288
1289        tracing_snapshot();
1290}
1291EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293/**
1294 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295 * @tr:         The tracing instance
1296 * @cond_data:  User data to associate with the snapshot
1297 * @update:     Implementation of the cond_snapshot update function
1298 *
1299 * Check whether the conditional snapshot for the given instance has
1300 * already been enabled, or if the current tracer is already using a
1301 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302 * save the cond_data and update function inside.
1303 *
1304 * Returns 0 if successful, error otherwise.
1305 */
1306int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                 cond_update_fn_t update)
1308{
1309        struct cond_snapshot *cond_snapshot;
1310        int ret = 0;
1311
1312        cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313        if (!cond_snapshot)
1314                return -ENOMEM;
1315
1316        cond_snapshot->cond_data = cond_data;
1317        cond_snapshot->update = update;
1318
1319        mutex_lock(&trace_types_lock);
1320
1321        ret = tracing_alloc_snapshot_instance(tr);
1322        if (ret)
1323                goto fail_unlock;
1324
1325        if (tr->current_trace->use_max_tr) {
1326                ret = -EBUSY;
1327                goto fail_unlock;
1328        }
1329
1330        /*
1331         * The cond_snapshot can only change to NULL without the
1332         * trace_types_lock. We don't care if we race with it going
1333         * to NULL, but we want to make sure that it's not set to
1334         * something other than NULL when we get here, which we can
1335         * do safely with only holding the trace_types_lock and not
1336         * having to take the max_lock.
1337         */
1338        if (tr->cond_snapshot) {
1339                ret = -EBUSY;
1340                goto fail_unlock;
1341        }
1342
1343        arch_spin_lock(&tr->max_lock);
1344        tr->cond_snapshot = cond_snapshot;
1345        arch_spin_unlock(&tr->max_lock);
1346
1347        mutex_unlock(&trace_types_lock);
1348
1349        return ret;
1350
1351 fail_unlock:
1352        mutex_unlock(&trace_types_lock);
1353        kfree(cond_snapshot);
1354        return ret;
1355}
1356EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358/**
1359 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360 * @tr:         The tracing instance
1361 *
1362 * Check whether the conditional snapshot for the given instance is
1363 * enabled; if so, free the cond_snapshot associated with it,
1364 * otherwise return -EINVAL.
1365 *
1366 * Returns 0 if successful, error otherwise.
1367 */
1368int tracing_snapshot_cond_disable(struct trace_array *tr)
1369{
1370        int ret = 0;
1371
1372        arch_spin_lock(&tr->max_lock);
1373
1374        if (!tr->cond_snapshot)
1375                ret = -EINVAL;
1376        else {
1377                kfree(tr->cond_snapshot);
1378                tr->cond_snapshot = NULL;
1379        }
1380
1381        arch_spin_unlock(&tr->max_lock);
1382
1383        return ret;
1384}
1385EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386#else
1387void tracing_snapshot(void)
1388{
1389        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390}
1391EXPORT_SYMBOL_GPL(tracing_snapshot);
1392void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393{
1394        WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395}
1396EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397int tracing_alloc_snapshot(void)
1398{
1399        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400        return -ENODEV;
1401}
1402EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403void tracing_snapshot_alloc(void)
1404{
1405        /* Give warning */
1406        tracing_snapshot();
1407}
1408EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409void *tracing_cond_snapshot_data(struct trace_array *tr)
1410{
1411        return NULL;
1412}
1413EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415{
1416        return -ENODEV;
1417}
1418EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419int tracing_snapshot_cond_disable(struct trace_array *tr)
1420{
1421        return false;
1422}
1423EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424#endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426void tracer_tracing_off(struct trace_array *tr)
1427{
1428        if (tr->array_buffer.buffer)
1429                ring_buffer_record_off(tr->array_buffer.buffer);
1430        /*
1431         * This flag is looked at when buffers haven't been allocated
1432         * yet, or by some tracers (like irqsoff), that just want to
1433         * know if the ring buffer has been disabled, but it can handle
1434         * races of where it gets disabled but we still do a record.
1435         * As the check is in the fast path of the tracers, it is more
1436         * important to be fast than accurate.
1437         */
1438        tr->buffer_disabled = 1;
1439        /* Make the flag seen by readers */
1440        smp_wmb();
1441}
1442
1443/**
1444 * tracing_off - turn off tracing buffers
1445 *
1446 * This function stops the tracing buffers from recording data.
1447 * It does not disable any overhead the tracers themselves may
1448 * be causing. This function simply causes all recording to
1449 * the ring buffers to fail.
1450 */
1451void tracing_off(void)
1452{
1453        tracer_tracing_off(&global_trace);
1454}
1455EXPORT_SYMBOL_GPL(tracing_off);
1456
1457void disable_trace_on_warning(void)
1458{
1459        if (__disable_trace_on_warning) {
1460                trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                        "Disabling tracing due to warning\n");
1462                tracing_off();
1463        }
1464}
1465
1466/**
1467 * tracer_tracing_is_on - show real state of ring buffer enabled
1468 * @tr : the trace array to know if ring buffer is enabled
1469 *
1470 * Shows real state of the ring buffer if it is enabled or not.
1471 */
1472bool tracer_tracing_is_on(struct trace_array *tr)
1473{
1474        if (tr->array_buffer.buffer)
1475                return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476        return !tr->buffer_disabled;
1477}
1478
1479/**
1480 * tracing_is_on - show state of ring buffers enabled
1481 */
1482int tracing_is_on(void)
1483{
1484        return tracer_tracing_is_on(&global_trace);
1485}
1486EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488static int __init set_buf_size(char *str)
1489{
1490        unsigned long buf_size;
1491
1492        if (!str)
1493                return 0;
1494        buf_size = memparse(str, &str);
1495        /* nr_entries can not be zero */
1496        if (buf_size == 0)
1497                return 0;
1498        trace_buf_size = buf_size;
1499        return 1;
1500}
1501__setup("trace_buf_size=", set_buf_size);
1502
1503static int __init set_tracing_thresh(char *str)
1504{
1505        unsigned long threshold;
1506        int ret;
1507
1508        if (!str)
1509                return 0;
1510        ret = kstrtoul(str, 0, &threshold);
1511        if (ret < 0)
1512                return 0;
1513        tracing_thresh = threshold * 1000;
1514        return 1;
1515}
1516__setup("tracing_thresh=", set_tracing_thresh);
1517
1518unsigned long nsecs_to_usecs(unsigned long nsecs)
1519{
1520        return nsecs / 1000;
1521}
1522
1523/*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529#undef C
1530#define C(a, b) b
1531
1532/* These must match the bit positions in trace_iterator_flags */
1533static const char *trace_options[] = {
1534        TRACE_FLAGS
1535        NULL
1536};
1537
1538static struct {
1539        u64 (*func)(void);
1540        const char *name;
1541        int in_ns;              /* is this clock in nanoseconds? */
1542} trace_clocks[] = {
1543        { trace_clock_local,            "local",        1 },
1544        { trace_clock_global,           "global",       1 },
1545        { trace_clock_counter,          "counter",      0 },
1546        { trace_clock_jiffies,          "uptime",       0 },
1547        { trace_clock,                  "perf",         1 },
1548        { ktime_get_mono_fast_ns,       "mono",         1 },
1549        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550        { ktime_get_boot_fast_ns,       "boot",         1 },
1551        ARCH_TRACE_CLOCKS
1552};
1553
1554bool trace_clock_in_ns(struct trace_array *tr)
1555{
1556        if (trace_clocks[tr->clock_id].in_ns)
1557                return true;
1558
1559        return false;
1560}
1561
1562/*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
1565int trace_parser_get_init(struct trace_parser *parser, int size)
1566{
1567        memset(parser, 0, sizeof(*parser));
1568
1569        parser->buffer = kmalloc(size, GFP_KERNEL);
1570        if (!parser->buffer)
1571                return 1;
1572
1573        parser->size = size;
1574        return 0;
1575}
1576
1577/*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
1580void trace_parser_put(struct trace_parser *parser)
1581{
1582        kfree(parser->buffer);
1583        parser->buffer = NULL;
1584}
1585
1586/*
1587 * trace_get_user - reads the user input string separated by  space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
1597int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598        size_t cnt, loff_t *ppos)
1599{
1600        char ch;
1601        size_t read = 0;
1602        ssize_t ret;
1603
1604        if (!*ppos)
1605                trace_parser_clear(parser);
1606
1607        ret = get_user(ch, ubuf++);
1608        if (ret)
1609                goto out;
1610
1611        read++;
1612        cnt--;
1613
1614        /*
1615         * The parser is not finished with the last write,
1616         * continue reading the user input without skipping spaces.
1617         */
1618        if (!parser->cont) {
1619                /* skip white space */
1620                while (cnt && isspace(ch)) {
1621                        ret = get_user(ch, ubuf++);
1622                        if (ret)
1623                                goto out;
1624                        read++;
1625                        cnt--;
1626                }
1627
1628                parser->idx = 0;
1629
1630                /* only spaces were written */
1631                if (isspace(ch) || !ch) {
1632                        *ppos += read;
1633                        ret = read;
1634                        goto out;
1635                }
1636        }
1637
1638        /* read the non-space input */
1639        while (cnt && !isspace(ch) && ch) {
1640                if (parser->idx < parser->size - 1)
1641                        parser->buffer[parser->idx++] = ch;
1642                else {
1643                        ret = -EINVAL;
1644                        goto out;
1645                }
1646                ret = get_user(ch, ubuf++);
1647                if (ret)
1648                        goto out;
1649                read++;
1650                cnt--;
1651        }
1652
1653        /* We either got finished input or we have to wait for another call. */
1654        if (isspace(ch) || !ch) {
1655                parser->buffer[parser->idx] = 0;
1656                parser->cont = false;
1657        } else if (parser->idx < parser->size - 1) {
1658                parser->cont = true;
1659                parser->buffer[parser->idx++] = ch;
1660                /* Make sure the parsed string always terminates with '\0'. */
1661                parser->buffer[parser->idx] = 0;
1662        } else {
1663                ret = -EINVAL;
1664                goto out;
1665        }
1666
1667        *ppos += read;
1668        ret = read;
1669
1670out:
1671        return ret;
1672}
1673
1674/* TODO add a seq_buf_to_buffer() */
1675static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676{
1677        int len;
1678
1679        if (trace_seq_used(s) <= s->seq.readpos)
1680                return -EBUSY;
1681
1682        len = trace_seq_used(s) - s->seq.readpos;
1683        if (cnt > len)
1684                cnt = len;
1685        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687        s->seq.readpos += cnt;
1688        return cnt;
1689}
1690
1691unsigned long __read_mostly     tracing_thresh;
1692static const struct file_operations tracing_max_lat_fops;
1693
1694#ifdef LATENCY_FS_NOTIFY
1695
1696static struct workqueue_struct *fsnotify_wq;
1697
1698static void latency_fsnotify_workfn(struct work_struct *work)
1699{
1700        struct trace_array *tr = container_of(work, struct trace_array,
1701                                              fsnotify_work);
1702        fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703}
1704
1705static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706{
1707        struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                              fsnotify_irqwork);
1709        queue_work(fsnotify_wq, &tr->fsnotify_work);
1710}
1711
1712static void trace_create_maxlat_file(struct trace_array *tr,
1713                                     struct dentry *d_tracer)
1714{
1715        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717        tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                              d_tracer, &tr->max_latency,
1719                                              &tracing_max_lat_fops);
1720}
1721
1722__init static int latency_fsnotify_init(void)
1723{
1724        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
1726        if (!fsnotify_wq) {
1727                pr_err("Unable to allocate tr_max_lat_wq\n");
1728                return -ENOMEM;
1729        }
1730        return 0;
1731}
1732
1733late_initcall_sync(latency_fsnotify_init);
1734
1735void latency_fsnotify(struct trace_array *tr)
1736{
1737        if (!fsnotify_wq)
1738                return;
1739        /*
1740         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741         * possible that we are called from __schedule() or do_idle(), which
1742         * could cause a deadlock.
1743         */
1744        irq_work_queue(&tr->fsnotify_irqwork);
1745}
1746
1747/*
1748 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749 *  defined(CONFIG_FSNOTIFY)
1750 */
1751#else
1752
1753#define trace_create_maxlat_file(tr, d_tracer)                          \
1754        trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                          &tr->max_latency, &tracing_max_lat_fops)
1756
1757#endif
1758
1759#ifdef CONFIG_TRACER_MAX_TRACE
1760/*
1761 * Copy the new maximum trace into the separate maximum-trace
1762 * structure. (this way the maximum trace is permanently saved,
1763 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764 */
1765static void
1766__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767{
1768        struct array_buffer *trace_buf = &tr->array_buffer;
1769        struct array_buffer *max_buf = &tr->max_buffer;
1770        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773        max_buf->cpu = cpu;
1774        max_buf->time_start = data->preempt_timestamp;
1775
1776        max_data->saved_latency = tr->max_latency;
1777        max_data->critical_start = data->critical_start;
1778        max_data->critical_end = data->critical_end;
1779
1780        strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781        max_data->pid = tsk->pid;
1782        /*
1783         * If tsk == current, then use current_uid(), as that does not use
1784         * RCU. The irq tracer can be called out of RCU scope.
1785         */
1786        if (tsk == current)
1787                max_data->uid = current_uid();
1788        else
1789                max_data->uid = task_uid(tsk);
1790
1791        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792        max_data->policy = tsk->policy;
1793        max_data->rt_priority = tsk->rt_priority;
1794
1795        /* record this tasks comm */
1796        tracing_record_cmdline(tsk);
1797        latency_fsnotify(tr);
1798}
1799
1800/**
1801 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802 * @tr: tracer
1803 * @tsk: the task with the latency
1804 * @cpu: The cpu that initiated the trace.
1805 * @cond_data: User data associated with a conditional snapshot
1806 *
1807 * Flip the buffers between the @tr and the max_tr and record information
1808 * about which task was the cause of this latency.
1809 */
1810void
1811update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812              void *cond_data)
1813{
1814        if (tr->stop_count)
1815                return;
1816
1817        WARN_ON_ONCE(!irqs_disabled());
1818
1819        if (!tr->allocated_snapshot) {
1820                /* Only the nop tracer should hit this when disabling */
1821                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                return;
1823        }
1824
1825        arch_spin_lock(&tr->max_lock);
1826
1827        /* Inherit the recordable setting from array_buffer */
1828        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                ring_buffer_record_on(tr->max_buffer.buffer);
1830        else
1831                ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833#ifdef CONFIG_TRACER_SNAPSHOT
1834        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                goto out_unlock;
1836#endif
1837        swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839        __update_max_tr(tr, tsk, cpu);
1840
1841 out_unlock:
1842        arch_spin_unlock(&tr->max_lock);
1843}
1844
1845/**
1846 * update_max_tr_single - only copy one trace over, and reset the rest
1847 * @tr: tracer
1848 * @tsk: task with the latency
1849 * @cpu: the cpu of the buffer to copy.
1850 *
1851 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852 */
1853void
1854update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855{
1856        int ret;
1857
1858        if (tr->stop_count)
1859                return;
1860
1861        WARN_ON_ONCE(!irqs_disabled());
1862        if (!tr->allocated_snapshot) {
1863                /* Only the nop tracer should hit this when disabling */
1864                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                return;
1866        }
1867
1868        arch_spin_lock(&tr->max_lock);
1869
1870        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872        if (ret == -EBUSY) {
1873                /*
1874                 * We failed to swap the buffer due to a commit taking
1875                 * place on this CPU. We fail to record, but we reset
1876                 * the max trace buffer (no one writes directly to it)
1877                 * and flag that it failed.
1878                 */
1879                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                        "Failed to swap buffers due to commit in progress\n");
1881        }
1882
1883        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885        __update_max_tr(tr, tsk, cpu);
1886        arch_spin_unlock(&tr->max_lock);
1887}
1888#endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890static int wait_on_pipe(struct trace_iterator *iter, int full)
1891{
1892        /* Iterators are static, they should be filled or empty */
1893        if (trace_buffer_iter(iter, iter->cpu_file))
1894                return 0;
1895
1896        return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                full);
1898}
1899
1900#ifdef CONFIG_FTRACE_STARTUP_TEST
1901static bool selftests_can_run;
1902
1903struct trace_selftests {
1904        struct list_head                list;
1905        struct tracer                   *type;
1906};
1907
1908static LIST_HEAD(postponed_selftests);
1909
1910static int save_selftest(struct tracer *type)
1911{
1912        struct trace_selftests *selftest;
1913
1914        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915        if (!selftest)
1916                return -ENOMEM;
1917
1918        selftest->type = type;
1919        list_add(&selftest->list, &postponed_selftests);
1920        return 0;
1921}
1922
1923static int run_tracer_selftest(struct tracer *type)
1924{
1925        struct trace_array *tr = &global_trace;
1926        struct tracer *saved_tracer = tr->current_trace;
1927        int ret;
1928
1929        if (!type->selftest || tracing_selftest_disabled)
1930                return 0;
1931
1932        /*
1933         * If a tracer registers early in boot up (before scheduling is
1934         * initialized and such), then do not run its selftests yet.
1935         * Instead, run it a little later in the boot process.
1936         */
1937        if (!selftests_can_run)
1938                return save_selftest(type);
1939
1940        if (!tracing_is_on()) {
1941                pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                        type->name);
1943                return 0;
1944        }
1945
1946        /*
1947         * Run a selftest on this tracer.
1948         * Here we reset the trace buffer, and set the current
1949         * tracer to be this tracer. The tracer can then run some
1950         * internal tracing to verify that everything is in order.
1951         * If we fail, we do not register this tracer.
1952         */
1953        tracing_reset_online_cpus(&tr->array_buffer);
1954
1955        tr->current_trace = type;
1956
1957#ifdef CONFIG_TRACER_MAX_TRACE
1958        if (type->use_max_tr) {
1959                /* If we expanded the buffers, make sure the max is expanded too */
1960                if (ring_buffer_expanded)
1961                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                           RING_BUFFER_ALL_CPUS);
1963                tr->allocated_snapshot = true;
1964        }
1965#endif
1966
1967        /* the test is responsible for initializing and enabling */
1968        pr_info("Testing tracer %s: ", type->name);
1969        ret = type->selftest(type, tr);
1970        /* the test is responsible for resetting too */
1971        tr->current_trace = saved_tracer;
1972        if (ret) {
1973                printk(KERN_CONT "FAILED!\n");
1974                /* Add the warning after printing 'FAILED' */
1975                WARN_ON(1);
1976                return -1;
1977        }
1978        /* Only reset on passing, to avoid touching corrupted buffers */
1979        tracing_reset_online_cpus(&tr->array_buffer);
1980
1981#ifdef CONFIG_TRACER_MAX_TRACE
1982        if (type->use_max_tr) {
1983                tr->allocated_snapshot = false;
1984
1985                /* Shrink the max buffer again */
1986                if (ring_buffer_expanded)
1987                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                           RING_BUFFER_ALL_CPUS);
1989        }
1990#endif
1991
1992        printk(KERN_CONT "PASSED\n");
1993        return 0;
1994}
1995
1996static __init int init_trace_selftests(void)
1997{
1998        struct trace_selftests *p, *n;
1999        struct tracer *t, **last;
2000        int ret;
2001
2002        selftests_can_run = true;
2003
2004        mutex_lock(&trace_types_lock);
2005
2006        if (list_empty(&postponed_selftests))
2007                goto out;
2008
2009        pr_info("Running postponed tracer tests:\n");
2010
2011        tracing_selftest_running = true;
2012        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                /* This loop can take minutes when sanitizers are enabled, so
2014                 * lets make sure we allow RCU processing.
2015                 */
2016                cond_resched();
2017                ret = run_tracer_selftest(p->type);
2018                /* If the test fails, then warn and remove from available_tracers */
2019                if (ret < 0) {
2020                        WARN(1, "tracer: %s failed selftest, disabling\n",
2021                             p->type->name);
2022                        last = &trace_types;
2023                        for (t = trace_types; t; t = t->next) {
2024                                if (t == p->type) {
2025                                        *last = t->next;
2026                                        break;
2027                                }
2028                                last = &t->next;
2029                        }
2030                }
2031                list_del(&p->list);
2032                kfree(p);
2033        }
2034        tracing_selftest_running = false;
2035
2036 out:
2037        mutex_unlock(&trace_types_lock);
2038
2039        return 0;
2040}
2041core_initcall(init_trace_selftests);
2042#else
2043static inline int run_tracer_selftest(struct tracer *type)
2044{
2045        return 0;
2046}
2047#endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051static void __init apply_trace_boot_options(void);
2052
2053/**
2054 * register_tracer - register a tracer with the ftrace system.
2055 * @type: the plugin for the tracer
2056 *
2057 * Register a new plugin tracer.
2058 */
2059int __init register_tracer(struct tracer *type)
2060{
2061        struct tracer *t;
2062        int ret = 0;
2063
2064        if (!type->name) {
2065                pr_info("Tracer must have a name\n");
2066                return -1;
2067        }
2068
2069        if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                return -1;
2072        }
2073
2074        if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                pr_warn("Can not register tracer %s due to lockdown\n",
2076                           type->name);
2077                return -EPERM;
2078        }
2079
2080        mutex_lock(&trace_types_lock);
2081
2082        tracing_selftest_running = true;
2083
2084        for (t = trace_types; t; t = t->next) {
2085                if (strcmp(type->name, t->name) == 0) {
2086                        /* already found */
2087                        pr_info("Tracer %s already registered\n",
2088                                type->name);
2089                        ret = -1;
2090                        goto out;
2091                }
2092        }
2093
2094        if (!type->set_flag)
2095                type->set_flag = &dummy_set_flag;
2096        if (!type->flags) {
2097                /*allocate a dummy tracer_flags*/
2098                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                if (!type->flags) {
2100                        ret = -ENOMEM;
2101                        goto out;
2102                }
2103                type->flags->val = 0;
2104                type->flags->opts = dummy_tracer_opt;
2105        } else
2106                if (!type->flags->opts)
2107                        type->flags->opts = dummy_tracer_opt;
2108
2109        /* store the tracer for __set_tracer_option */
2110        type->flags->trace = type;
2111
2112        ret = run_tracer_selftest(type);
2113        if (ret < 0)
2114                goto out;
2115
2116        type->next = trace_types;
2117        trace_types = type;
2118        add_tracer_options(&global_trace, type);
2119
2120 out:
2121        tracing_selftest_running = false;
2122        mutex_unlock(&trace_types_lock);
2123
2124        if (ret || !default_bootup_tracer)
2125                goto out_unlock;
2126
2127        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                goto out_unlock;
2129
2130        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131        /* Do we want this tracer to start on bootup? */
2132        tracing_set_tracer(&global_trace, type->name);
2133        default_bootup_tracer = NULL;
2134
2135        apply_trace_boot_options();
2136
2137        /* disable other selftests, since this will break it. */
2138        disable_tracing_selftest("running a tracer");
2139
2140 out_unlock:
2141        return ret;
2142}
2143
2144static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145{
2146        struct trace_buffer *buffer = buf->buffer;
2147
2148        if (!buffer)
2149                return;
2150
2151        ring_buffer_record_disable(buffer);
2152
2153        /* Make sure all commits have finished */
2154        synchronize_rcu();
2155        ring_buffer_reset_cpu(buffer, cpu);
2156
2157        ring_buffer_record_enable(buffer);
2158}
2159
2160void tracing_reset_online_cpus(struct array_buffer *buf)
2161{
2162        struct trace_buffer *buffer = buf->buffer;
2163
2164        if (!buffer)
2165                return;
2166
2167        ring_buffer_record_disable(buffer);
2168
2169        /* Make sure all commits have finished */
2170        synchronize_rcu();
2171
2172        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174        ring_buffer_reset_online_cpus(buffer);
2175
2176        ring_buffer_record_enable(buffer);
2177}
2178
2179/* Must have trace_types_lock held */
2180void tracing_reset_all_online_cpus(void)
2181{
2182        struct trace_array *tr;
2183
2184        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                if (!tr->clear_trace)
2186                        continue;
2187                tr->clear_trace = false;
2188                tracing_reset_online_cpus(&tr->array_buffer);
2189#ifdef CONFIG_TRACER_MAX_TRACE
2190                tracing_reset_online_cpus(&tr->max_buffer);
2191#endif
2192        }
2193}
2194
2195/*
2196 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197 * is the tgid last observed corresponding to pid=i.
2198 */
2199static int *tgid_map;
2200
2201/* The maximum valid index into tgid_map. */
2202static size_t tgid_map_max;
2203
2204#define SAVED_CMDLINES_DEFAULT 128
2205#define NO_CMDLINE_MAP UINT_MAX
2206static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207struct saved_cmdlines_buffer {
2208        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209        unsigned *map_cmdline_to_pid;
2210        unsigned cmdline_num;
2211        int cmdline_idx;
2212        char *saved_cmdlines;
2213};
2214static struct saved_cmdlines_buffer *savedcmd;
2215
2216static inline char *get_saved_cmdlines(int idx)
2217{
2218        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219}
2220
2221static inline void set_cmdline(int idx, const char *cmdline)
2222{
2223        strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224}
2225
2226static int allocate_cmdlines_buffer(unsigned int val,
2227                                    struct saved_cmdlines_buffer *s)
2228{
2229        s->map_cmdline_to_pid = kmalloc_array(val,
2230                                              sizeof(*s->map_cmdline_to_pid),
2231                                              GFP_KERNEL);
2232        if (!s->map_cmdline_to_pid)
2233                return -ENOMEM;
2234
2235        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236        if (!s->saved_cmdlines) {
2237                kfree(s->map_cmdline_to_pid);
2238                return -ENOMEM;
2239        }
2240
2241        s->cmdline_idx = 0;
2242        s->cmdline_num = val;
2243        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244               sizeof(s->map_pid_to_cmdline));
2245        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246               val * sizeof(*s->map_cmdline_to_pid));
2247
2248        return 0;
2249}
2250
2251static int trace_create_savedcmd(void)
2252{
2253        int ret;
2254
2255        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256        if (!savedcmd)
2257                return -ENOMEM;
2258
2259        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260        if (ret < 0) {
2261                kfree(savedcmd);
2262                savedcmd = NULL;
2263                return -ENOMEM;
2264        }
2265
2266        return 0;
2267}
2268
2269int is_tracing_stopped(void)
2270{
2271        return global_trace.stop_count;
2272}
2273
2274/**
2275 * tracing_start - quick start of the tracer
2276 *
2277 * If tracing is enabled but was stopped by tracing_stop,
2278 * this will start the tracer back up.
2279 */
2280void tracing_start(void)
2281{
2282        struct trace_buffer *buffer;
2283        unsigned long flags;
2284
2285        if (tracing_disabled)
2286                return;
2287
2288        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289        if (--global_trace.stop_count) {
2290                if (global_trace.stop_count < 0) {
2291                        /* Someone screwed up their debugging */
2292                        WARN_ON_ONCE(1);
2293                        global_trace.stop_count = 0;
2294                }
2295                goto out;
2296        }
2297
2298        /* Prevent the buffers from switching */
2299        arch_spin_lock(&global_trace.max_lock);
2300
2301        buffer = global_trace.array_buffer.buffer;
2302        if (buffer)
2303                ring_buffer_record_enable(buffer);
2304
2305#ifdef CONFIG_TRACER_MAX_TRACE
2306        buffer = global_trace.max_buffer.buffer;
2307        if (buffer)
2308                ring_buffer_record_enable(buffer);
2309#endif
2310
2311        arch_spin_unlock(&global_trace.max_lock);
2312
2313 out:
2314        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315}
2316
2317static void tracing_start_tr(struct trace_array *tr)
2318{
2319        struct trace_buffer *buffer;
2320        unsigned long flags;
2321
2322        if (tracing_disabled)
2323                return;
2324
2325        /* If global, we need to also start the max tracer */
2326        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                return tracing_start();
2328
2329        raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331        if (--tr->stop_count) {
2332                if (tr->stop_count < 0) {
2333                        /* Someone screwed up their debugging */
2334                        WARN_ON_ONCE(1);
2335                        tr->stop_count = 0;
2336                }
2337                goto out;
2338        }
2339
2340        buffer = tr->array_buffer.buffer;
2341        if (buffer)
2342                ring_buffer_record_enable(buffer);
2343
2344 out:
2345        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346}
2347
2348/**
2349 * tracing_stop - quick stop of the tracer
2350 *
2351 * Light weight way to stop tracing. Use in conjunction with
2352 * tracing_start.
2353 */
2354void tracing_stop(void)
2355{
2356        struct trace_buffer *buffer;
2357        unsigned long flags;
2358
2359        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360        if (global_trace.stop_count++)
2361                goto out;
2362
2363        /* Prevent the buffers from switching */
2364        arch_spin_lock(&global_trace.max_lock);
2365
2366        buffer = global_trace.array_buffer.buffer;
2367        if (buffer)
2368                ring_buffer_record_disable(buffer);
2369
2370#ifdef CONFIG_TRACER_MAX_TRACE
2371        buffer = global_trace.max_buffer.buffer;
2372        if (buffer)
2373                ring_buffer_record_disable(buffer);
2374#endif
2375
2376        arch_spin_unlock(&global_trace.max_lock);
2377
2378 out:
2379        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380}
2381
2382static void tracing_stop_tr(struct trace_array *tr)
2383{
2384        struct trace_buffer *buffer;
2385        unsigned long flags;
2386
2387        /* If global, we need to also stop the max tracer */
2388        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                return tracing_stop();
2390
2391        raw_spin_lock_irqsave(&tr->start_lock, flags);
2392        if (tr->stop_count++)
2393                goto out;
2394
2395        buffer = tr->array_buffer.buffer;
2396        if (buffer)
2397                ring_buffer_record_disable(buffer);
2398
2399 out:
2400        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401}
2402
2403static int trace_save_cmdline(struct task_struct *tsk)
2404{
2405        unsigned tpid, idx;
2406
2407        /* treat recording of idle task as a success */
2408        if (!tsk->pid)
2409                return 1;
2410
2411        tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413        /*
2414         * It's not the end of the world if we don't get
2415         * the lock, but we also don't want to spin
2416         * nor do we want to disable interrupts,
2417         * so if we miss here, then better luck next time.
2418         */
2419        if (!arch_spin_trylock(&trace_cmdline_lock))
2420                return 0;
2421
2422        idx = savedcmd->map_pid_to_cmdline[tpid];
2423        if (idx == NO_CMDLINE_MAP) {
2424                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                savedcmd->cmdline_idx = idx;
2428        }
2429
2430        savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431        set_cmdline(idx, tsk->comm);
2432
2433        arch_spin_unlock(&trace_cmdline_lock);
2434
2435        return 1;
2436}
2437
2438static void __trace_find_cmdline(int pid, char comm[])
2439{
2440        unsigned map;
2441        int tpid;
2442
2443        if (!pid) {
2444                strcpy(comm, "<idle>");
2445                return;
2446        }
2447
2448        if (WARN_ON_ONCE(pid < 0)) {
2449                strcpy(comm, "<XXX>");
2450                return;
2451        }
2452
2453        tpid = pid & (PID_MAX_DEFAULT - 1);
2454        map = savedcmd->map_pid_to_cmdline[tpid];
2455        if (map != NO_CMDLINE_MAP) {
2456                tpid = savedcmd->map_cmdline_to_pid[map];
2457                if (tpid == pid) {
2458                        strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                        return;
2460                }
2461        }
2462        strcpy(comm, "<...>");
2463}
2464
2465void trace_find_cmdline(int pid, char comm[])
2466{
2467        preempt_disable();
2468        arch_spin_lock(&trace_cmdline_lock);
2469
2470        __trace_find_cmdline(pid, comm);
2471
2472        arch_spin_unlock(&trace_cmdline_lock);
2473        preempt_enable();
2474}
2475
2476static int *trace_find_tgid_ptr(int pid)
2477{
2478        /*
2479         * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480         * if we observe a non-NULL tgid_map then we also observe the correct
2481         * tgid_map_max.
2482         */
2483        int *map = smp_load_acquire(&tgid_map);
2484
2485        if (unlikely(!map || pid > tgid_map_max))
2486                return NULL;
2487
2488        return &map[pid];
2489}
2490
2491int trace_find_tgid(int pid)
2492{
2493        int *ptr = trace_find_tgid_ptr(pid);
2494
2495        return ptr ? *ptr : 0;
2496}
2497
2498static int trace_save_tgid(struct task_struct *tsk)
2499{
2500        int *ptr;
2501
2502        /* treat recording of idle task as a success */
2503        if (!tsk->pid)
2504                return 1;
2505
2506        ptr = trace_find_tgid_ptr(tsk->pid);
2507        if (!ptr)
2508                return 0;
2509
2510        *ptr = tsk->tgid;
2511        return 1;
2512}
2513
2514static bool tracing_record_taskinfo_skip(int flags)
2515{
2516        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                return true;
2518        if (!__this_cpu_read(trace_taskinfo_save))
2519                return true;
2520        return false;
2521}
2522
2523/**
2524 * tracing_record_taskinfo - record the task info of a task
2525 *
2526 * @task:  task to record
2527 * @flags: TRACE_RECORD_CMDLINE for recording comm
2528 *         TRACE_RECORD_TGID for recording tgid
2529 */
2530void tracing_record_taskinfo(struct task_struct *task, int flags)
2531{
2532        bool done;
2533
2534        if (tracing_record_taskinfo_skip(flags))
2535                return;
2536
2537        /*
2538         * Record as much task information as possible. If some fail, continue
2539         * to try to record the others.
2540         */
2541        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544        /* If recording any information failed, retry again soon. */
2545        if (!done)
2546                return;
2547
2548        __this_cpu_write(trace_taskinfo_save, false);
2549}
2550
2551/**
2552 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553 *
2554 * @prev: previous task during sched_switch
2555 * @next: next task during sched_switch
2556 * @flags: TRACE_RECORD_CMDLINE for recording comm
2557 *         TRACE_RECORD_TGID for recording tgid
2558 */
2559void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                          struct task_struct *next, int flags)
2561{
2562        bool done;
2563
2564        if (tracing_record_taskinfo_skip(flags))
2565                return;
2566
2567        /*
2568         * Record as much task information as possible. If some fail, continue
2569         * to try to record the others.
2570         */
2571        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576        /* If recording any information failed, retry again soon. */
2577        if (!done)
2578                return;
2579
2580        __this_cpu_write(trace_taskinfo_save, false);
2581}
2582
2583/* Helpers to record a specific task information */
2584void tracing_record_cmdline(struct task_struct *task)
2585{
2586        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587}
2588
2589void tracing_record_tgid(struct task_struct *task)
2590{
2591        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592}
2593
2594/*
2595 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597 * simplifies those functions and keeps them in sync.
2598 */
2599enum print_line_t trace_handle_return(struct trace_seq *s)
2600{
2601        return trace_seq_has_overflowed(s) ?
2602                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603}
2604EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607{
2608        unsigned int trace_flags = irqs_status;
2609        unsigned int pc;
2610
2611        pc = preempt_count();
2612
2613        if (pc & NMI_MASK)
2614                trace_flags |= TRACE_FLAG_NMI;
2615        if (pc & HARDIRQ_MASK)
2616                trace_flags |= TRACE_FLAG_HARDIRQ;
2617        if (in_serving_softirq())
2618                trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620        if (tif_need_resched())
2621                trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622        if (test_preempt_need_resched())
2623                trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624        return (trace_flags << 16) | (pc & 0xff);
2625}
2626
2627struct ring_buffer_event *
2628trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                          int type,
2630                          unsigned long len,
2631                          unsigned int trace_ctx)
2632{
2633        return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634}
2635
2636DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638static int trace_buffered_event_ref;
2639
2640/**
2641 * trace_buffered_event_enable - enable buffering events
2642 *
2643 * When events are being filtered, it is quicker to use a temporary
2644 * buffer to write the event data into if there's a likely chance
2645 * that it will not be committed. The discard of the ring buffer
2646 * is not as fast as committing, and is much slower than copying
2647 * a commit.
2648 *
2649 * When an event is to be filtered, allocate per cpu buffers to
2650 * write the event data into, and if the event is filtered and discarded
2651 * it is simply dropped, otherwise, the entire data is to be committed
2652 * in one shot.
2653 */
2654void trace_buffered_event_enable(void)
2655{
2656        struct ring_buffer_event *event;
2657        struct page *page;
2658        int cpu;
2659
2660        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662        if (trace_buffered_event_ref++)
2663                return;
2664
2665        for_each_tracing_cpu(cpu) {
2666                page = alloc_pages_node(cpu_to_node(cpu),
2667                                        GFP_KERNEL | __GFP_NORETRY, 0);
2668                if (!page)
2669                        goto failed;
2670
2671                event = page_address(page);
2672                memset(event, 0, sizeof(*event));
2673
2674                per_cpu(trace_buffered_event, cpu) = event;
2675
2676                preempt_disable();
2677                if (cpu == smp_processor_id() &&
2678                    __this_cpu_read(trace_buffered_event) !=
2679                    per_cpu(trace_buffered_event, cpu))
2680                        WARN_ON_ONCE(1);
2681                preempt_enable();
2682        }
2683
2684        return;
2685 failed:
2686        trace_buffered_event_disable();
2687}
2688
2689static void enable_trace_buffered_event(void *data)
2690{
2691        /* Probably not needed, but do it anyway */
2692        smp_rmb();
2693        this_cpu_dec(trace_buffered_event_cnt);
2694}
2695
2696static void disable_trace_buffered_event(void *data)
2697{
2698        this_cpu_inc(trace_buffered_event_cnt);
2699}
2700
2701/**
2702 * trace_buffered_event_disable - disable buffering events
2703 *
2704 * When a filter is removed, it is faster to not use the buffered
2705 * events, and to commit directly into the ring buffer. Free up
2706 * the temp buffers when there are no more users. This requires
2707 * special synchronization with current events.
2708 */
2709void trace_buffered_event_disable(void)
2710{
2711        int cpu;
2712
2713        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                return;
2717
2718        if (--trace_buffered_event_ref)
2719                return;
2720
2721        preempt_disable();
2722        /* For each CPU, set the buffer as used. */
2723        smp_call_function_many(tracing_buffer_mask,
2724                               disable_trace_buffered_event, NULL, 1);
2725        preempt_enable();
2726
2727        /* Wait for all current users to finish */
2728        synchronize_rcu();
2729
2730        for_each_tracing_cpu(cpu) {
2731                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                per_cpu(trace_buffered_event, cpu) = NULL;
2733        }
2734        /*
2735         * Make sure trace_buffered_event is NULL before clearing
2736         * trace_buffered_event_cnt.
2737         */
2738        smp_wmb();
2739
2740        preempt_disable();
2741        /* Do the work on each cpu */
2742        smp_call_function_many(tracing_buffer_mask,
2743                               enable_trace_buffered_event, NULL, 1);
2744        preempt_enable();
2745}
2746
2747static struct trace_buffer *temp_buffer;
2748
2749struct ring_buffer_event *
2750trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                          struct trace_event_file *trace_file,
2752                          int type, unsigned long len,
2753                          unsigned int trace_ctx)
2754{
2755        struct ring_buffer_event *entry;
2756        struct trace_array *tr = trace_file->tr;
2757        int val;
2758
2759        *current_rb = tr->array_buffer.buffer;
2760
2761        if (!tr->no_filter_buffering_ref &&
2762            (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763            (entry = this_cpu_read(trace_buffered_event))) {
2764                /*
2765                 * Filtering is on, so try to use the per cpu buffer first.
2766                 * This buffer will simulate a ring_buffer_event,
2767                 * where the type_len is zero and the array[0] will
2768                 * hold the full length.
2769                 * (see include/linux/ring-buffer.h for details on
2770                 *  how the ring_buffer_event is structured).
2771                 *
2772                 * Using a temp buffer during filtering and copying it
2773                 * on a matched filter is quicker than writing directly
2774                 * into the ring buffer and then discarding it when
2775                 * it doesn't match. That is because the discard
2776                 * requires several atomic operations to get right.
2777                 * Copying on match and doing nothing on a failed match
2778                 * is still quicker than no copy on match, but having
2779                 * to discard out of the ring buffer on a failed match.
2780                 */
2781                int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                /*
2786                 * Preemption is disabled, but interrupts and NMIs
2787                 * can still come in now. If that happens after
2788                 * the above increment, then it will have to go
2789                 * back to the old method of allocating the event
2790                 * on the ring buffer, and if the filter fails, it
2791                 * will have to call ring_buffer_discard_commit()
2792                 * to remove it.
2793                 *
2794                 * Need to also check the unlikely case that the
2795                 * length is bigger than the temp buffer size.
2796                 * If that happens, then the reserve is pretty much
2797                 * guaranteed to fail, as the ring buffer currently
2798                 * only allows events less than a page. But that may
2799                 * change in the future, so let the ring buffer reserve
2800                 * handle the failure in that case.
2801                 */
2802                if (val == 1 && likely(len <= max_len)) {
2803                        trace_event_setup(entry, type, trace_ctx);
2804                        entry->array[0] = len;
2805                        return entry;
2806                }
2807                this_cpu_dec(trace_buffered_event_cnt);
2808        }
2809
2810        entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                            trace_ctx);
2812        /*
2813         * If tracing is off, but we have triggers enabled
2814         * we still need to look at the event data. Use the temp_buffer
2815         * to store the trace event for the trigger to use. It's recursive
2816         * safe and will not be recorded anywhere.
2817         */
2818        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                *current_rb = temp_buffer;
2820                entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                    trace_ctx);
2822        }
2823        return entry;
2824}
2825EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830static void output_printk(struct trace_event_buffer *fbuffer)
2831{
2832        struct trace_event_call *event_call;
2833        struct trace_event_file *file;
2834        struct trace_event *event;
2835        unsigned long flags;
2836        struct trace_iterator *iter = tracepoint_print_iter;
2837
2838        /* We should never get here if iter is NULL */
2839        if (WARN_ON_ONCE(!iter))
2840                return;
2841
2842        event_call = fbuffer->trace_file->event_call;
2843        if (!event_call || !event_call->event.funcs ||
2844            !event_call->event.funcs->trace)
2845                return;
2846
2847        file = fbuffer->trace_file;
2848        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850             !filter_match_preds(file->filter, fbuffer->entry)))
2851                return;
2852
2853        event = &fbuffer->trace_file->event_call->event;
2854
2855        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856        trace_seq_init(&iter->seq);
2857        iter->ent = fbuffer->entry;
2858        event_call->event.funcs->trace(iter, 0, event);
2859        trace_seq_putc(&iter->seq, 0);
2860        printk("%s", iter->seq.buffer);
2861
2862        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863}
2864
2865int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                             void *buffer, size_t *lenp,
2867                             loff_t *ppos)
2868{
2869        int save_tracepoint_printk;
2870        int ret;
2871
2872        mutex_lock(&tracepoint_printk_mutex);
2873        save_tracepoint_printk = tracepoint_printk;
2874
2875        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877        /*
2878         * This will force exiting early, as tracepoint_printk
2879         * is always zero when tracepoint_printk_iter is not allocated
2880         */
2881        if (!tracepoint_print_iter)
2882                tracepoint_printk = 0;
2883
2884        if (save_tracepoint_printk == tracepoint_printk)
2885                goto out;
2886
2887        if (tracepoint_printk)
2888                static_key_enable(&tracepoint_printk_key.key);
2889        else
2890                static_key_disable(&tracepoint_printk_key.key);
2891
2892 out:
2893        mutex_unlock(&tracepoint_printk_mutex);
2894
2895        return ret;
2896}
2897
2898void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899{
2900        enum event_trigger_type tt = ETT_NONE;
2901        struct trace_event_file *file = fbuffer->trace_file;
2902
2903        if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904                        fbuffer->entry, &tt))
2905                goto discard;
2906
2907        if (static_key_false(&tracepoint_printk_key.key))
2908                output_printk(fbuffer);
2909
2910        if (static_branch_unlikely(&trace_event_exports_enabled))
2911                ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912
2913        trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914                        fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915
2916discard:
2917        if (tt)
2918                event_triggers_post_call(file, tt);
2919
2920}
2921EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922
2923/*
2924 * Skip 3:
2925 *
2926 *   trace_buffer_unlock_commit_regs()
2927 *   trace_event_buffer_commit()
2928 *   trace_event_raw_event_xxx()
2929 */
2930# define STACK_SKIP 3
2931
2932void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933                                     struct trace_buffer *buffer,
2934                                     struct ring_buffer_event *event,
2935                                     unsigned int trace_ctx,
2936                                     struct pt_regs *regs)
2937{
2938        __buffer_unlock_commit(buffer, event);
2939
2940        /*
2941         * If regs is not set, then skip the necessary functions.
2942         * Note, we can still get here via blktrace, wakeup tracer
2943         * and mmiotrace, but that's ok if they lose a function or
2944         * two. They are not that meaningful.
2945         */
2946        ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947        ftrace_trace_userstack(tr, buffer, trace_ctx);
2948}
2949
2950/*
2951 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952 */
2953void
2954trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955                                   struct ring_buffer_event *event)
2956{
2957        __buffer_unlock_commit(buffer, event);
2958}
2959
2960void
2961trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962               parent_ip, unsigned int trace_ctx)
2963{
2964        struct trace_event_call *call = &event_function;
2965        struct trace_buffer *buffer = tr->array_buffer.buffer;
2966        struct ring_buffer_event *event;
2967        struct ftrace_entry *entry;
2968
2969        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970                                            trace_ctx);
2971        if (!event)
2972                return;
2973        entry   = ring_buffer_event_data(event);
2974        entry->ip                       = ip;
2975        entry->parent_ip                = parent_ip;
2976
2977        if (!call_filter_check_discard(call, entry, buffer, event)) {
2978                if (static_branch_unlikely(&trace_function_exports_enabled))
2979                        ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980                __buffer_unlock_commit(buffer, event);
2981        }
2982}
2983
2984#ifdef CONFIG_STACKTRACE
2985
2986/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987#define FTRACE_KSTACK_NESTING   4
2988
2989#define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990
2991struct ftrace_stack {
2992        unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2993};
2994
2995
2996struct ftrace_stacks {
2997        struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2998};
2999
3000static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002
3003static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004                                 unsigned int trace_ctx,
3005                                 int skip, struct pt_regs *regs)
3006{
3007        struct trace_event_call *call = &event_kernel_stack;
3008        struct ring_buffer_event *event;
3009        unsigned int size, nr_entries;
3010        struct ftrace_stack *fstack;
3011        struct stack_entry *entry;
3012        int stackidx;
3013
3014        /*
3015         * Add one, for this function and the call to save_stack_trace()
3016         * If regs is set, then these functions will not be in the way.
3017         */
3018#ifndef CONFIG_UNWINDER_ORC
3019        if (!regs)
3020                skip++;
3021#endif
3022
3023        preempt_disable_notrace();
3024
3025        stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026
3027        /* This should never happen. If it does, yell once and skip */
3028        if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029                goto out;
3030
3031        /*
3032         * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033         * interrupt will either see the value pre increment or post
3034         * increment. If the interrupt happens pre increment it will have
3035         * restored the counter when it returns.  We just need a barrier to
3036         * keep gcc from moving things around.
3037         */
3038        barrier();
3039
3040        fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041        size = ARRAY_SIZE(fstack->calls);
3042
3043        if (regs) {
3044                nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045                                                   size, skip);
3046        } else {
3047                nr_entries = stack_trace_save(fstack->calls, size, skip);
3048        }
3049
3050        size = nr_entries * sizeof(unsigned long);
3051        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052                                    (sizeof(*entry) - sizeof(entry->caller)) + size,
3053                                    trace_ctx);
3054        if (!event)
3055                goto out;
3056        entry = ring_buffer_event_data(event);
3057
3058        memcpy(&entry->caller, fstack->calls, size);
3059        entry->size = nr_entries;
3060
3061        if (!call_filter_check_discard(call, entry, buffer, event))
3062                __buffer_unlock_commit(buffer, event);
3063
3064 out:
3065        /* Again, don't let gcc optimize things here */
3066        barrier();
3067        __this_cpu_dec(ftrace_stack_reserve);
3068        preempt_enable_notrace();
3069
3070}
3071
3072static inline void ftrace_trace_stack(struct trace_array *tr,
3073                                      struct trace_buffer *buffer,
3074                                      unsigned int trace_ctx,
3075                                      int skip, struct pt_regs *regs)
3076{
3077        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078                return;
3079
3080        __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081}
3082
3083void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084                   int skip)
3085{
3086        struct trace_buffer *buffer = tr->array_buffer.buffer;
3087
3088        if (rcu_is_watching()) {
3089                __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090                return;
3091        }
3092
3093        /*
3094         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095         * but if the above rcu_is_watching() failed, then the NMI
3096         * triggered someplace critical, and rcu_irq_enter() should
3097         * not be called from NMI.
3098         */
3099        if (unlikely(in_nmi()))
3100                return;
3101
3102        rcu_irq_enter_irqson();
3103        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104        rcu_irq_exit_irqson();
3105}
3106
3107/**
3108 * trace_dump_stack - record a stack back trace in the trace buffer
3109 * @skip: Number of functions to skip (helper handlers)
3110 */
3111void trace_dump_stack(int skip)
3112{
3113        if (tracing_disabled || tracing_selftest_running)
3114                return;
3115
3116#ifndef CONFIG_UNWINDER_ORC
3117        /* Skip 1 to skip this function. */
3118        skip++;
3119#endif
3120        __ftrace_trace_stack(global_trace.array_buffer.buffer,
3121                             tracing_gen_ctx(), skip, NULL);
3122}
3123EXPORT_SYMBOL_GPL(trace_dump_stack);
3124
3125#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126static DEFINE_PER_CPU(int, user_stack_count);
3127
3128static void
3129ftrace_trace_userstack(struct trace_array *tr,
3130                       struct trace_buffer *buffer, unsigned int trace_ctx)
3131{
3132        struct trace_event_call *call = &event_user_stack;
3133        struct ring_buffer_event *event;
3134        struct userstack_entry *entry;
3135
3136        if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137                return;
3138
3139        /*
3140         * NMIs can not handle page faults, even with fix ups.
3141         * The save user stack can (and often does) fault.
3142         */
3143        if (unlikely(in_nmi()))
3144                return;
3145
3146        /*
3147         * prevent recursion, since the user stack tracing may
3148         * trigger other kernel events.
3149         */
3150        preempt_disable();
3151        if (__this_cpu_read(user_stack_count))
3152                goto out;
3153
3154        __this_cpu_inc(user_stack_count);
3155
3156        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157                                            sizeof(*entry), trace_ctx);
3158        if (!event)
3159                goto out_drop_count;
3160        entry   = ring_buffer_event_data(event);
3161
3162        entry->tgid             = current->tgid;
3163        memset(&entry->caller, 0, sizeof(entry->caller));
3164
3165        stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166        if (!call_filter_check_discard(call, entry, buffer, event))
3167                __buffer_unlock_commit(buffer, event);
3168
3169 out_drop_count:
3170        __this_cpu_dec(user_stack_count);
3171 out:
3172        preempt_enable();
3173}
3174#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175static void ftrace_trace_userstack(struct trace_array *tr,
3176                                   struct trace_buffer *buffer,
3177                                   unsigned int trace_ctx)
3178{
3179}
3180#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181
3182#endif /* CONFIG_STACKTRACE */
3183
3184static inline void
3185func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186                          unsigned long long delta)
3187{
3188        entry->bottom_delta_ts = delta & U32_MAX;
3189        entry->top_delta_ts = (delta >> 32);
3190}
3191
3192void trace_last_func_repeats(struct trace_array *tr,
3193                             struct trace_func_repeats *last_info,
3194                             unsigned int trace_ctx)
3195{
3196        struct trace_buffer *buffer = tr->array_buffer.buffer;
3197        struct func_repeats_entry *entry;
3198        struct ring_buffer_event *event;
3199        u64 delta;
3200
3201        event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202                                            sizeof(*entry), trace_ctx);
3203        if (!event)
3204                return;
3205
3206        delta = ring_buffer_event_time_stamp(buffer, event) -
3207                last_info->ts_last_call;
3208
3209        entry = ring_buffer_event_data(event);
3210        entry->ip = last_info->ip;
3211        entry->parent_ip = last_info->parent_ip;
3212        entry->count = last_info->count;
3213        func_repeats_set_delta_ts(entry, delta);
3214
3215        __buffer_unlock_commit(buffer, event);
3216}
3217
3218/* created for use with alloc_percpu */
3219struct trace_buffer_struct {
3220        int nesting;
3221        char buffer[4][TRACE_BUF_SIZE];
3222};
3223
3224static struct trace_buffer_struct *trace_percpu_buffer;
3225
3226/*
3227 * This allows for lockless recording.  If we're nested too deeply, then
3228 * this returns NULL.
3229 */
3230static char *get_trace_buf(void)
3231{
3232        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233
3234        if (!buffer || buffer->nesting >= 4)
3235                return NULL;
3236
3237        buffer->nesting++;
3238
3239        /* Interrupts must see nesting incremented before we use the buffer */
3240        barrier();
3241        return &buffer->buffer[buffer->nesting - 1][0];
3242}
3243
3244static void put_trace_buf(void)
3245{
3246        /* Don't let the decrement of nesting leak before this */
3247        barrier();
3248        this_cpu_dec(trace_percpu_buffer->nesting);
3249}
3250
3251static int alloc_percpu_trace_buffer(void)
3252{
3253        struct trace_buffer_struct *buffers;
3254
3255        if (trace_percpu_buffer)
3256                return 0;
3257
3258        buffers = alloc_percpu(struct trace_buffer_struct);
3259        if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260                return -ENOMEM;
3261
3262        trace_percpu_buffer = buffers;
3263        return 0;
3264}
3265
3266static int buffers_allocated;
3267
3268void trace_printk_init_buffers(void)
3269{
3270        if (buffers_allocated)
3271                return;
3272
3273        if (alloc_percpu_trace_buffer())
3274                return;
3275
3276        /* trace_printk() is for debug use only. Don't use it in production. */
3277
3278        pr_warn("\n");
3279        pr_warn("**********************************************************\n");
3280        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281        pr_warn("**                                                      **\n");
3282        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283        pr_warn("**                                                      **\n");
3284        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285        pr_warn("** unsafe for production use.                           **\n");
3286        pr_warn("**                                                      **\n");
3287        pr_warn("** If you see this message and you are not debugging    **\n");
3288        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289        pr_warn("**                                                      **\n");
3290        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291        pr_warn("**********************************************************\n");
3292
3293        /* Expand the buffers to set size */
3294        tracing_update_buffers();
3295
3296        buffers_allocated = 1;
3297
3298        /*
3299         * trace_printk_init_buffers() can be called by modules.
3300         * If that happens, then we need to start cmdline recording
3301         * directly here. If the global_trace.buffer is already
3302         * allocated here, then this was called by module code.
3303         */
3304        if (global_trace.array_buffer.buffer)
3305                tracing_start_cmdline_record();
3306}
3307EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308
3309void trace_printk_start_comm(void)
3310{
3311        /* Start tracing comms if trace printk is set */
3312        if (!buffers_allocated)
3313                return;
3314        tracing_start_cmdline_record();
3315}
3316
3317static void trace_printk_start_stop_comm(int enabled)
3318{
3319        if (!buffers_allocated)
3320                return;
3321
3322        if (enabled)
3323                tracing_start_cmdline_record();
3324        else
3325                tracing_stop_cmdline_record();
3326}
3327
3328/**
3329 * trace_vbprintk - write binary msg to tracing buffer
3330 * @ip:    The address of the caller
3331 * @fmt:   The string format to write to the buffer
3332 * @args:  Arguments for @fmt
3333 */
3334int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335{
3336        struct trace_event_call *call = &event_bprint;
3337        struct ring_buffer_event *event;
3338        struct trace_buffer *buffer;
3339        struct trace_array *tr = &global_trace;
3340        struct bprint_entry *entry;
3341        unsigned int trace_ctx;
3342        char *tbuffer;
3343        int len = 0, size;
3344
3345        if (unlikely(tracing_selftest_running || tracing_disabled))
3346                return 0;
3347
3348        /* Don't pollute graph traces with trace_vprintk internals */
3349        pause_graph_tracing();
3350
3351        trace_ctx = tracing_gen_ctx();
3352        preempt_disable_notrace();
3353
3354        tbuffer = get_trace_buf();
3355        if (!tbuffer) {
3356                len = 0;
3357                goto out_nobuffer;
3358        }
3359
3360        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361
3362        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363                goto out_put;
3364
3365        size = sizeof(*entry) + sizeof(u32) * len;
3366        buffer = tr->array_buffer.buffer;
3367        ring_buffer_nest_start(buffer);
3368        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369                                            trace_ctx);
3370        if (!event)
3371                goto out;
3372        entry = ring_buffer_event_data(event);
3373        entry->ip                       = ip;
3374        entry->fmt                      = fmt;
3375
3376        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377        if (!call_filter_check_discard(call, entry, buffer, event)) {
3378                __buffer_unlock_commit(buffer, event);
3379                ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380        }
3381
3382out:
3383        ring_buffer_nest_end(buffer);
3384out_put:
3385        put_trace_buf();
3386
3387out_nobuffer:
3388        preempt_enable_notrace();
3389        unpause_graph_tracing();
3390
3391        return len;
3392}
3393EXPORT_SYMBOL_GPL(trace_vbprintk);
3394
3395__printf(3, 0)
3396static int
3397__trace_array_vprintk(struct trace_buffer *buffer,
3398                      unsigned long ip, const char *fmt, va_list args)
3399{
3400        struct trace_event_call *call = &event_print;
3401        struct ring_buffer_event *event;
3402        int len = 0, size;
3403        struct print_entry *entry;
3404        unsigned int trace_ctx;
3405        char *tbuffer;
3406
3407        if (tracing_disabled || tracing_selftest_running)
3408                return 0;
3409
3410        /* Don't pollute graph traces with trace_vprintk internals */
3411        pause_graph_tracing();
3412
3413        trace_ctx = tracing_gen_ctx();
3414        preempt_disable_notrace();
3415
3416
3417        tbuffer = get_trace_buf();
3418        if (!tbuffer) {
3419                len = 0;
3420                goto out_nobuffer;
3421        }
3422
3423        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424
3425        size = sizeof(*entry) + len + 1;
3426        ring_buffer_nest_start(buffer);
3427        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428                                            trace_ctx);
3429        if (!event)
3430                goto out;
3431        entry = ring_buffer_event_data(event);
3432        entry->ip = ip;
3433
3434        memcpy(&entry->buf, tbuffer, len + 1);
3435        if (!call_filter_check_discard(call, entry, buffer, event)) {
3436                __buffer_unlock_commit(buffer, event);
3437                ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438        }
3439
3440out:
3441        ring_buffer_nest_end(buffer);
3442        put_trace_buf();
3443
3444out_nobuffer:
3445        preempt_enable_notrace();
3446        unpause_graph_tracing();
3447
3448        return len;
3449}
3450
3451__printf(3, 0)
3452int trace_array_vprintk(struct trace_array *tr,
3453                        unsigned long ip, const char *fmt, va_list args)
3454{
3455        return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456}
3457
3458/**
3459 * trace_array_printk - Print a message to a specific instance
3460 * @tr: The instance trace_array descriptor
3461 * @ip: The instruction pointer that this is called from.
3462 * @fmt: The format to print (printf format)
3463 *
3464 * If a subsystem sets up its own instance, they have the right to
3465 * printk strings into their tracing instance buffer using this
3466 * function. Note, this function will not write into the top level
3467 * buffer (use trace_printk() for that), as writing into the top level
3468 * buffer should only have events that can be individually disabled.
3469 * trace_printk() is only used for debugging a kernel, and should not
3470 * be ever incorporated in normal use.
3471 *
3472 * trace_array_printk() can be used, as it will not add noise to the
3473 * top level tracing buffer.
3474 *
3475 * Note, trace_array_init_printk() must be called on @tr before this
3476 * can be used.
3477 */
3478__printf(3, 0)
3479int trace_array_printk(struct trace_array *tr,
3480                       unsigned long ip, const char *fmt, ...)
3481{
3482        int ret;
3483        va_list ap;
3484
3485        if (!tr)
3486                return -ENOENT;
3487
3488        /* This is only allowed for created instances */
3489        if (tr == &global_trace)
3490                return 0;
3491
3492        if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493                return 0;
3494
3495        va_start(ap, fmt);
3496        ret = trace_array_vprintk(tr, ip, fmt, ap);
3497        va_end(ap);
3498        return ret;
3499}
3500EXPORT_SYMBOL_GPL(trace_array_printk);
3501
3502/**
3503 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504 * @tr: The trace array to initialize the buffers for
3505 *
3506 * As trace_array_printk() only writes into instances, they are OK to
3507 * have in the kernel (unlike trace_printk()). This needs to be called
3508 * before trace_array_printk() can be used on a trace_array.
3509 */
3510int trace_array_init_printk(struct trace_array *tr)
3511{
3512        if (!tr)
3513                return -ENOENT;
3514
3515        /* This is only allowed for created instances */
3516        if (tr == &global_trace)
3517                return -EINVAL;
3518
3519        return alloc_percpu_trace_buffer();
3520}
3521EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522
3523__printf(3, 4)
3524int trace_array_printk_buf(struct trace_buffer *buffer,
3525                           unsigned long ip, const char *fmt, ...)
3526{
3527        int ret;
3528        va_list ap;
3529
3530        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531                return 0;
3532
3533        va_start(ap, fmt);
3534        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535        va_end(ap);
3536        return ret;
3537}
3538
3539__printf(2, 0)
3540int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541{
3542        return trace_array_vprintk(&global_trace, ip, fmt, args);
3543}
3544EXPORT_SYMBOL_GPL(trace_vprintk);
3545
3546static void trace_iterator_increment(struct trace_iterator *iter)
3547{
3548        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549
3550        iter->idx++;
3551        if (buf_iter)
3552                ring_buffer_iter_advance(buf_iter);
3553}
3554
3555static struct trace_entry *
3556peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557                unsigned long *lost_events)
3558{
3559        struct ring_buffer_event *event;
3560        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561
3562        if (buf_iter) {
3563                event = ring_buffer_iter_peek(buf_iter, ts);
3564                if (lost_events)
3565                        *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566                                (unsigned long)-1 : 0;
3567        } else {
3568                event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569                                         lost_events);
3570        }
3571
3572        if (event) {
3573                iter->ent_size = ring_buffer_event_length(event);
3574                return ring_buffer_event_data(event);
3575        }
3576        iter->ent_size = 0;
3577        return NULL;
3578}
3579
3580static struct trace_entry *
3581__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582                  unsigned long *missing_events, u64 *ent_ts)
3583{
3584        struct trace_buffer *buffer = iter->array_buffer->buffer;
3585        struct trace_entry *ent, *next = NULL;
3586        unsigned long lost_events = 0, next_lost = 0;
3587        int cpu_file = iter->cpu_file;
3588        u64 next_ts = 0, ts;
3589        int next_cpu = -1;
3590        int next_size = 0;
3591        int cpu;
3592
3593        /*
3594         * If we are in a per_cpu trace file, don't bother by iterating over
3595         * all cpu and peek directly.
3596         */
3597        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598                if (ring_buffer_empty_cpu(buffer, cpu_file))
3599                        return NULL;
3600                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601                if (ent_cpu)
3602                        *ent_cpu = cpu_file;
3603
3604                return ent;
3605        }
3606
3607        for_each_tracing_cpu(cpu) {
3608
3609                if (ring_buffer_empty_cpu(buffer, cpu))
3610                        continue;
3611
3612                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613
3614                /*
3615                 * Pick the entry with the smallest timestamp:
3616                 */
3617                if (ent && (!next || ts < next_ts)) {
3618                        next = ent;
3619                        next_cpu = cpu;
3620                        next_ts = ts;
3621                        next_lost = lost_events;
3622                        next_size = iter->ent_size;
3623                }
3624        }
3625
3626        iter->ent_size = next_size;
3627
3628        if (ent_cpu)
3629                *ent_cpu = next_cpu;
3630
3631        if (ent_ts)
3632                *ent_ts = next_ts;
3633
3634        if (missing_events)
3635                *missing_events = next_lost;
3636
3637        return next;
3638}
3639
3640#define STATIC_FMT_BUF_SIZE     128
3641static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642
3643static char *trace_iter_expand_format(struct trace_iterator *iter)
3644{
3645        char *tmp;
3646
3647        /*
3648         * iter->tr is NULL when used with tp_printk, which makes
3649         * this get called where it is not safe to call krealloc().
3650         */
3651        if (!iter->tr || iter->fmt == static_fmt_buf)
3652                return NULL;
3653
3654        tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655                       GFP_KERNEL);
3656        if (tmp) {
3657                iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658                iter->fmt = tmp;
3659        }
3660
3661        return tmp;
3662}
3663
3664/* Returns true if the string is safe to dereference from an event */
3665static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666{
3667        unsigned long addr = (unsigned long)str;
3668        struct trace_event *trace_event;
3669        struct trace_event_call *event;
3670
3671        /* OK if part of the event data */
3672        if ((addr >= (unsigned long)iter->ent) &&
3673            (addr < (unsigned long)iter->ent + iter->ent_size))
3674                return true;
3675
3676        /* OK if part of the temp seq buffer */
3677        if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678            (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679                return true;
3680
3681        /* Core rodata can not be freed */
3682        if (is_kernel_rodata(addr))
3683                return true;
3684
3685        if (trace_is_tracepoint_string(str))
3686                return true;
3687
3688        /*
3689         * Now this could be a module event, referencing core module
3690         * data, which is OK.
3691         */
3692        if (!iter->ent)
3693                return false;
3694
3695        trace_event = ftrace_find_event(iter->ent->type);
3696        if (!trace_event)
3697                return false;
3698
3699        event = container_of(trace_event, struct trace_event_call, event);
3700        if (!event->mod)
3701                return false;
3702
3703        /* Would rather have rodata, but this will suffice */
3704        if (within_module_core(addr, event->mod))
3705                return true;
3706
3707        return false;
3708}
3709
3710static const char *show_buffer(struct trace_seq *s)
3711{
3712        struct seq_buf *seq = &s->seq;
3713
3714        seq_buf_terminate(seq);
3715
3716        return seq->buffer;
3717}
3718
3719static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720
3721static int test_can_verify_check(const char *fmt, ...)
3722{
3723        char buf[16];
3724        va_list ap;
3725        int ret;
3726
3727        /*
3728         * The verifier is dependent on vsnprintf() modifies the va_list
3729         * passed to it, where it is sent as a reference. Some architectures
3730         * (like x86_32) passes it by value, which means that vsnprintf()
3731         * does not modify the va_list passed to it, and the verifier
3732         * would then need to be able to understand all the values that
3733         * vsnprintf can use. If it is passed by value, then the verifier
3734         * is disabled.
3735         */
3736        va_start(ap, fmt);
3737        vsnprintf(buf, 16, "%d", ap);
3738        ret = va_arg(ap, int);
3739        va_end(ap);
3740
3741        return ret;
3742}
3743
3744static void test_can_verify(void)
3745{
3746        if (!test_can_verify_check("%d %d", 0, 1)) {
3747                pr_info("trace event string verifier disabled\n");
3748                static_branch_inc(&trace_no_verify);
3749        }
3750}
3751
3752/**
3753 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754 * @iter: The iterator that holds the seq buffer and the event being printed
3755 * @fmt: The format used to print the event
3756 * @ap: The va_list holding the data to print from @fmt.
3757 *
3758 * This writes the data into the @iter->seq buffer using the data from
3759 * @fmt and @ap. If the format has a %s, then the source of the string
3760 * is examined to make sure it is safe to print, otherwise it will
3761 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762 * pointer.
3763 */
3764void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765                         va_list ap)
3766{
3767        const char *p = fmt;
3768        const char *str;
3769        int i, j;
3770
3771        if (WARN_ON_ONCE(!fmt))
3772                return;
3773
3774        if (static_branch_unlikely(&trace_no_verify))
3775                goto print;
3776
3777        /* Don't bother checking when doing a ftrace_dump() */
3778        if (iter->fmt == static_fmt_buf)
3779                goto print;
3780
3781        while (*p) {
3782                bool star = false;
3783                int len = 0;
3784
3785                j = 0;
3786
3787                /* We only care about %s and variants */
3788                for (i = 0; p[i]; i++) {
3789                        if (i + 1 >= iter->fmt_size) {
3790                                /*
3791                                 * If we can't expand the copy buffer,
3792                                 * just print it.
3793                                 */
3794                                if (!trace_iter_expand_format(iter))
3795                                        goto print;
3796                        }
3797
3798                        if (p[i] == '\\' && p[i+1]) {
3799                                i++;
3800                                continue;
3801                        }
3802                        if (p[i] == '%') {
3803                                /* Need to test cases like %08.*s */
3804                                for (j = 1; p[i+j]; j++) {
3805                                        if (isdigit(p[i+j]) ||
3806                                            p[i+j] == '.')
3807                                                continue;
3808                                        if (p[i+j] == '*') {
3809                                                star = true;
3810                                                continue;
3811                                        }
3812                                        break;
3813                                }
3814                                if (p[i+j] == 's')
3815                                        break;
3816                                star = false;
3817                        }
3818                        j = 0;
3819                }
3820                /* If no %s found then just print normally */
3821                if (!p[i])
3822                        break;
3823
3824                /* Copy up to the %s, and print that */
3825                strncpy(iter->fmt, p, i);
3826                iter->fmt[i] = '\0';
3827                trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828
3829                if (star)
3830                        len = va_arg(ap, int);
3831
3832                /* The ap now points to the string data of the %s */
3833                str = va_arg(ap, const char *);
3834
3835                /*
3836                 * If you hit this warning, it is likely that the
3837                 * trace event in question used %s on a string that
3838                 * was saved at the time of the event, but may not be
3839                 * around when the trace is read. Use __string(),
3840                 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841                 * instead. See samples/trace_events/trace-events-sample.h
3842                 * for reference.
3843                 */
3844                if (WARN_ONCE(!trace_safe_str(iter, str),
3845                              "fmt: '%s' current_buffer: '%s'",
3846                              fmt, show_buffer(&iter->seq))) {
3847                        int ret;
3848
3849                        /* Try to safely read the string */
3850                        if (star) {
3851                                if (len + 1 > iter->fmt_size)
3852                                        len = iter->fmt_size - 1;
3853                                if (len < 0)
3854                                        len = 0;
3855                                ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856                                iter->fmt[len] = 0;
3857                                star = false;
3858                        } else {
3859                                ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860                                                                  iter->fmt_size);
3861                        }
3862                        if (ret < 0)
3863                                trace_seq_printf(&iter->seq, "(0x%px)", str);
3864                        else
3865                                trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866                                                 str, iter->fmt);
3867                        str = "[UNSAFE-MEMORY]";
3868                        strcpy(iter->fmt, "%s");
3869                } else {
3870                        strncpy(iter->fmt, p + i, j + 1);
3871                        iter->fmt[j+1] = '\0';
3872                }
3873                if (star)
3874                        trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875                else
3876                        trace_seq_printf(&iter->seq, iter->fmt, str);
3877
3878                p += i + j + 1;
3879        }
3880 print:
3881        if (*p)
3882                trace_seq_vprintf(&iter->seq, p, ap);
3883}
3884
3885const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886{
3887        const char *p, *new_fmt;
3888        char *q;
3889
3890        if (WARN_ON_ONCE(!fmt))
3891                return fmt;
3892
3893        if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894                return fmt;
3895
3896        p = fmt;
3897        new_fmt = q = iter->fmt;
3898        while (*p) {
3899                if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900                        if (!trace_iter_expand_format(iter))
3901                                return fmt;
3902
3903                        q += iter->fmt - new_fmt;
3904                        new_fmt = iter->fmt;
3905                }
3906
3907                *q++ = *p++;
3908
3909                /* Replace %p with %px */
3910                if (p[-1] == '%') {
3911                        if (p[0] == '%') {
3912                                *q++ = *p++;
3913                        } else if (p[0] == 'p' && !isalnum(p[1])) {
3914                                *q++ = *p++;
3915                                *q++ = 'x';
3916                        }
3917                }
3918        }
3919        *q = '\0';
3920
3921        return new_fmt;
3922}
3923
3924#define STATIC_TEMP_BUF_SIZE    128
3925static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926
3927/* Find the next real entry, without updating the iterator itself */
3928struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929                                          int *ent_cpu, u64 *ent_ts)
3930{
3931        /* __find_next_entry will reset ent_size */
3932        int ent_size = iter->ent_size;
3933        struct trace_entry *entry;
3934
3935        /*
3936         * If called from ftrace_dump(), then the iter->temp buffer
3937         * will be the static_temp_buf and not created from kmalloc.
3938         * If the entry size is greater than the buffer, we can
3939         * not save it. Just return NULL in that case. This is only
3940         * used to add markers when two consecutive events' time
3941         * stamps have a large delta. See trace_print_lat_context()
3942         */
3943        if (iter->temp == static_temp_buf &&
3944            STATIC_TEMP_BUF_SIZE < ent_size)
3945                return NULL;
3946
3947        /*
3948         * The __find_next_entry() may call peek_next_entry(), which may
3949         * call ring_buffer_peek() that may make the contents of iter->ent
3950         * undefined. Need to copy iter->ent now.
3951         */
3952        if (iter->ent && iter->ent != iter->temp) {
3953                if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954                    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955                        void *temp;
3956                        temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957                        if (!temp)
3958                                return NULL;
3959                        kfree(iter->temp);
3960                        iter->temp = temp;
3961                        iter->temp_size = iter->ent_size;
3962                }
3963                memcpy(iter->temp, iter->ent, iter->ent_size);
3964                iter->ent = iter->temp;
3965        }
3966        entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967        /* Put back the original ent_size */
3968        iter->ent_size = ent_size;
3969
3970        return entry;
3971}
3972
3973/* Find the next real entry, and increment the iterator to the next entry */
3974void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975{
3976        iter->ent = __find_next_entry(iter, &iter->cpu,
3977                                      &iter->lost_events, &iter->ts);
3978
3979        if (iter->ent)
3980                trace_iterator_increment(iter);
3981
3982        return iter->ent ? iter : NULL;
3983}
3984
3985static void trace_consume(struct trace_iterator *iter)
3986{
3987        ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988                            &iter->lost_events);
3989}
3990
3991static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992{
3993        struct trace_iterator *iter = m->private;
3994        int i = (int)*pos;
3995        void *ent;
3996
3997        WARN_ON_ONCE(iter->leftover);
3998
3999        (*pos)++;
4000
4001        /* can't go backwards */
4002        if (iter->idx > i)
4003                return NULL;
4004
4005        if (iter->idx < 0)
4006                ent = trace_find_next_entry_inc(iter);
4007        else
4008                ent = iter;
4009
4010        while (ent && iter->idx < i)
4011                ent = trace_find_next_entry_inc(iter);
4012
4013        iter->pos = *pos;
4014
4015        return ent;
4016}
4017
4018void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019{
4020        struct ring_buffer_iter *buf_iter;
4021        unsigned long entries = 0;
4022        u64 ts;
4023
4024        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025
4026        buf_iter = trace_buffer_iter(iter, cpu);
4027        if (!buf_iter)
4028                return;
4029
4030        ring_buffer_iter_reset(buf_iter);
4031
4032        /*
4033         * We could have the case with the max latency tracers
4034         * that a reset never took place on a cpu. This is evident
4035         * by the timestamp being before the start of the buffer.
4036         */
4037        while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038                if (ts >= iter->array_buffer->time_start)
4039                        break;
4040                entries++;
4041                ring_buffer_iter_advance(buf_iter);
4042        }
4043
4044        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045}
4046
4047/*
4048 * The current tracer is copied to avoid a global locking
4049 * all around.
4050 */
4051static void *s_start(struct seq_file *m, loff_t *pos)
4052{
4053        struct trace_iterator *iter = m->private;
4054        struct trace_array *tr = iter->tr;
4055        int cpu_file = iter->cpu_file;
4056        void *p = NULL;
4057        loff_t l = 0;
4058        int cpu;
4059
4060        /*
4061         * copy the tracer to avoid using a global lock all around.
4062         * iter->trace is a copy of current_trace, the pointer to the
4063         * name may be used instead of a strcmp(), as iter->trace->name
4064         * will point to the same string as current_trace->name.
4065         */
4066        mutex_lock(&trace_types_lock);
4067        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068                *iter->trace = *tr->current_trace;
4069        mutex_unlock(&trace_types_lock);
4070
4071#ifdef CONFIG_TRACER_MAX_TRACE
4072        if (iter->snapshot && iter->trace->use_max_tr)
4073                return ERR_PTR(-EBUSY);
4074#endif
4075
4076        if (*pos != iter->pos) {
4077                iter->ent = NULL;
4078                iter->cpu = 0;
4079                iter->idx = -1;
4080
4081                if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082                        for_each_tracing_cpu(cpu)
4083                                tracing_iter_reset(iter, cpu);
4084                } else
4085                        tracing_iter_reset(iter, cpu_file);
4086
4087                iter->leftover = 0;
4088                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089                        ;
4090
4091        } else {
4092                /*
4093                 * If we overflowed the seq_file before, then we want
4094                 * to just reuse the trace_seq buffer again.
4095                 */
4096                if (iter->leftover)
4097                        p = iter;
4098                else {
4099                        l = *pos - 1;
4100                        p = s_next(m, p, &l);
4101                }
4102        }
4103
4104        trace_event_read_lock();
4105        trace_access_lock(cpu_file);
4106        return p;
4107}
4108
4109static void s_stop(struct seq_file *m, void *p)
4110{
4111        struct trace_iterator *iter = m->private;
4112
4113#ifdef CONFIG_TRACER_MAX_TRACE
4114        if (iter->snapshot && iter->trace->use_max_tr)
4115                return;
4116#endif
4117
4118        trace_access_unlock(iter->cpu_file);
4119        trace_event_read_unlock();
4120}
4121
4122static void
4123get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124                      unsigned long *entries, int cpu)
4125{
4126        unsigned long count;
4127
4128        count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129        /*
4130         * If this buffer has skipped entries, then we hold all
4131         * entries for the trace and we need to ignore the
4132         * ones before the time stamp.
4133         */
4134        if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135                count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136                /* total is the same as the entries */
4137                *total = count;
4138        } else
4139                *total = count +
4140                        ring_buffer_overrun_cpu(buf->buffer, cpu);
4141        *entries = count;
4142}
4143
4144static void
4145get_total_entries(struct array_buffer *buf,
4146                  unsigned long *total, unsigned long *entries)
4147{
4148        unsigned long t, e;
4149        int cpu;
4150
4151        *total = 0;
4152        *entries = 0;
4153
4154        for_each_tracing_cpu(cpu) {
4155                get_total_entries_cpu(buf, &t, &e, cpu);
4156                *total += t;
4157                *entries += e;
4158        }
4159}
4160
4161unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162{
4163        unsigned long total, entries;
4164
4165        if (!tr)
4166                tr = &global_trace;
4167
4168        get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169
4170        return entries;
4171}
4172
4173unsigned long trace_total_entries(struct trace_array *tr)
4174{
4175        unsigned long total, entries;
4176
4177        if (!tr)
4178                tr = &global_trace;
4179
4180        get_total_entries(&tr->array_buffer, &total, &entries);
4181
4182        return entries;
4183}
4184
4185static void print_lat_help_header(struct seq_file *m)
4186{
4187        seq_puts(m, "#                    _------=> CPU#            \n"
4188                    "#                   / _-----=> irqs-off        \n"
4189                    "#                  | / _----=> need-resched    \n"
4190                    "#                  || / _---=> hardirq/softirq \n"
4191                    "#                  ||| / _--=> preempt-depth   \n"
4192                    "#                  |||| /     delay            \n"
4193                    "#  cmd     pid     ||||| time  |   caller      \n"
4194                    "#     \\   /        |||||  \\    |   /         \n");
4195}
4196
4197static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198{
4199        unsigned long total;
4200        unsigned long entries;
4201
4202        get_total_entries(buf, &total, &entries);
4203        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204                   entries, total, num_online_cpus());
4205        seq_puts(m, "#\n");
4206}
4207
4208static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209                                   unsigned int flags)
4210{
4211        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212
4213        print_event_info(buf, m);
4214
4215        seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216        seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217}
4218
4219static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220                                       unsigned int flags)
4221{
4222        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223        const char *space = "            ";
4224        int prec = tgid ? 12 : 2;
4225
4226        print_event_info(buf, m);
4227
4228        seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229        seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230        seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231        seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232        seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233        seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234        seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235}
4236
4237void
4238print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239{
4240        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241        struct array_buffer *buf = iter->array_buffer;
4242        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243        struct tracer *type = iter->trace;
4244        unsigned long entries;
4245        unsigned long total;
4246        const char *name = "preemption";
4247
4248        name = type->name;
4249
4250        get_total_entries(buf, &total, &entries);
4251
4252        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253                   name, UTS_RELEASE);
4254        seq_puts(m, "# -----------------------------------"
4255                 "---------------------------------\n");
4256        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258                   nsecs_to_usecs(data->saved_latency),
4259                   entries,
4260                   total,
4261                   buf->cpu,
4262#if defined(CONFIG_PREEMPT_NONE)
4263                   "server",
4264#elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265                   "desktop",
4266#elif defined(CONFIG_PREEMPT)
4267                   "preempt",
4268#elif defined(CONFIG_PREEMPT_RT)
4269                   "preempt_rt",
4270#else
4271                   "unknown",
4272#endif
4273                   /* These are reserved for later use */
4274                   0, 0, 0, 0);
4275#ifdef CONFIG_SMP
4276        seq_printf(m, " #P:%d)\n", num_online_cpus());
4277#else
4278        seq_puts(m, ")\n");
4279#endif
4280        seq_puts(m, "#    -----------------\n");
4281        seq_printf(m, "#    | task: %.16s-%d "
4282                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283                   data->comm, data->pid,
4284                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285                   data->policy, data->rt_priority);
4286        seq_puts(m, "#    -----------------\n");
4287
4288        if (data->critical_start) {
4289                seq_puts(m, "#  => started at: ");
4290                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291                trace_print_seq(m, &iter->seq);
4292                seq_puts(m, "\n#  => ended at:   ");
4293                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294                trace_print_seq(m, &iter->seq);
4295                seq_puts(m, "\n#\n");
4296        }
4297
4298        seq_puts(m, "#\n");
4299}
4300
4301static void test_cpu_buff_start(struct trace_iterator *iter)
4302{
4303        struct trace_seq *s = &iter->seq;
4304        struct trace_array *tr = iter->tr;
4305
4306        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307                return;
4308
4309        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310                return;
4311
4312        if (cpumask_available(iter->started) &&
4313            cpumask_test_cpu(iter->cpu, iter->started))
4314                return;
4315
4316        if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317                return;
4318
4319        if (cpumask_available(iter->started))
4320                cpumask_set_cpu(iter->cpu, iter->started);
4321
4322        /* Don't print started cpu buffer for the first entry of the trace */
4323        if (iter->idx > 1)
4324                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325                                iter->cpu);
4326}
4327
4328static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329{
4330        struct trace_array *tr = iter->tr;
4331        struct trace_seq *s = &iter->seq;
4332        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333        struct trace_entry *entry;
4334        struct trace_event *event;
4335
4336        entry = iter->ent;
4337
4338        test_cpu_buff_start(iter);
4339
4340        event = ftrace_find_event(entry->type);
4341
4342        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344                        trace_print_lat_context(iter);
4345                else
4346                        trace_print_context(iter);
4347        }
4348
4349        if (trace_seq_has_overflowed(s))
4350                return TRACE_TYPE_PARTIAL_LINE;
4351
4352        if (event)
4353                return event->funcs->trace(iter, sym_flags, event);
4354
4355        trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356
4357        return trace_handle_return(s);
4358}
4359
4360static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361{
4362        struct trace_array *tr = iter->tr;
4363        struct trace_seq *s = &iter->seq;
4364        struct trace_entry *entry;
4365        struct trace_event *event;
4366
4367        entry = iter->ent;
4368
4369        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370                trace_seq_printf(s, "%d %d %llu ",
4371                                 entry->pid, iter->cpu, iter->ts);
4372
4373        if (trace_seq_has_overflowed(s))
4374                return TRACE_TYPE_PARTIAL_LINE;
4375
4376        event = ftrace_find_event(entry->type);
4377        if (event)
4378                return event->funcs->raw(iter, 0, event);
4379
4380        trace_seq_printf(s, "%d ?\n", entry->type);
4381
4382        return trace_handle_return(s);
4383}
4384
4385static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386{
4387        struct trace_array *tr = iter->tr;
4388        struct trace_seq *s = &iter->seq;
4389        unsigned char newline = '\n';
4390        struct trace_entry *entry;
4391        struct trace_event *event;
4392
4393        entry = iter->ent;
4394
4395        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                SEQ_PUT_HEX_FIELD(s, entry->pid);
4397                SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398                SEQ_PUT_HEX_FIELD(s, iter->ts);
4399                if (trace_seq_has_overflowed(s))
4400                        return TRACE_TYPE_PARTIAL_LINE;
4401        }
4402
4403        event = ftrace_find_event(entry->type);
4404        if (event) {
4405                enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406                if (ret != TRACE_TYPE_HANDLED)
4407                        return ret;
4408        }
4409
4410        SEQ_PUT_FIELD(s, newline);
4411
4412        return trace_handle_return(s);
4413}
4414
4415static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416{
4417        struct trace_array *tr = iter->tr;
4418        struct trace_seq *s = &iter->seq;
4419        struct trace_entry *entry;
4420        struct trace_event *event;
4421
4422        entry = iter->ent;
4423
4424        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425                SEQ_PUT_FIELD(s, entry->pid);
4426                SEQ_PUT_FIELD(s, iter->cpu);
4427                SEQ_PUT_FIELD(s, iter->ts);
4428                if (trace_seq_has_overflowed(s))
4429                        return TRACE_TYPE_PARTIAL_LINE;
4430        }
4431
4432        event = ftrace_find_event(entry->type);
4433        return event ? event->funcs->binary(iter, 0, event) :
4434                TRACE_TYPE_HANDLED;
4435}
4436
4437int trace_empty(struct trace_iterator *iter)
4438{
4439        struct ring_buffer_iter *buf_iter;
4440        int cpu;
4441
4442        /* If we are looking at one CPU buffer, only check that one */
4443        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444                cpu = iter->cpu_file;
4445                buf_iter = trace_buffer_iter(iter, cpu);
4446                if (buf_iter) {
4447                        if (!ring_buffer_iter_empty(buf_iter))
4448                                return 0;
4449                } else {
4450                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                return 0;
4452                }
4453                return 1;
4454        }
4455
4456        for_each_tracing_cpu(cpu) {
4457                buf_iter = trace_buffer_iter(iter, cpu);
4458                if (buf_iter) {
4459                        if (!ring_buffer_iter_empty(buf_iter))
4460                                return 0;
4461                } else {
4462                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                return 0;
4464                }
4465        }
4466
4467        return 1;
4468}
4469
4470/*  Called with trace_event_read_lock() held. */
4471enum print_line_t print_trace_line(struct trace_iterator *iter)
4472{
4473        struct trace_array *tr = iter->tr;
4474        unsigned long trace_flags = tr->trace_flags;
4475        enum print_line_t ret;
4476
4477        if (iter->lost_events) {
4478                if (iter->lost_events == (unsigned long)-1)
4479                        trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480                                         iter->cpu);
4481                else
4482                        trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483                                         iter->cpu, iter->lost_events);
4484                if (trace_seq_has_overflowed(&iter->seq))
4485                        return TRACE_TYPE_PARTIAL_LINE;
4486        }
4487
4488        if (iter->trace && iter->trace->print_line) {
4489                ret = iter->trace->print_line(iter);
4490                if (ret != TRACE_TYPE_UNHANDLED)
4491                        return ret;
4492        }
4493
4494        if (iter->ent->type == TRACE_BPUTS &&
4495                        trace_flags & TRACE_ITER_PRINTK &&
4496                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497                return trace_print_bputs_msg_only(iter);
4498
4499        if (iter->ent->type == TRACE_BPRINT &&
4500                        trace_flags & TRACE_ITER_PRINTK &&
4501                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502                return trace_print_bprintk_msg_only(iter);
4503
4504        if (iter->ent->type == TRACE_PRINT &&
4505                        trace_flags & TRACE_ITER_PRINTK &&
4506                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507                return trace_print_printk_msg_only(iter);
4508
4509        if (trace_flags & TRACE_ITER_BIN)
4510                return print_bin_fmt(iter);
4511
4512        if (trace_flags & TRACE_ITER_HEX)
4513                return print_hex_fmt(iter);
4514
4515        if (trace_flags & TRACE_ITER_RAW)
4516                return print_raw_fmt(iter);
4517
4518        return print_trace_fmt(iter);
4519}
4520
4521void trace_latency_header(struct seq_file *m)
4522{
4523        struct trace_iterator *iter = m->private;
4524        struct trace_array *tr = iter->tr;
4525
4526        /* print nothing if the buffers are empty */
4527        if (trace_empty(iter))
4528                return;
4529
4530        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531                print_trace_header(m, iter);
4532
4533        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534                print_lat_help_header(m);
4535}
4536
4537void trace_default_header(struct seq_file *m)
4538{
4539        struct trace_iterator *iter = m->private;
4540        struct trace_array *tr = iter->tr;
4541        unsigned long trace_flags = tr->trace_flags;
4542
4543        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544                return;
4545
4546        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547                /* print nothing if the buffers are empty */
4548                if (trace_empty(iter))
4549                        return;
4550                print_trace_header(m, iter);
4551                if (!(trace_flags & TRACE_ITER_VERBOSE))
4552                        print_lat_help_header(m);
4553        } else {
4554                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555                        if (trace_flags & TRACE_ITER_IRQ_INFO)
4556                                print_func_help_header_irq(iter->array_buffer,
4557                                                           m, trace_flags);
4558                        else
4559                                print_func_help_header(iter->array_buffer, m,
4560                                                       trace_flags);
4561                }
4562        }
4563}
4564
4565static void test_ftrace_alive(struct seq_file *m)
4566{
4567        if (!ftrace_is_dead())
4568                return;
4569        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570                    "#          MAY BE MISSING FUNCTION EVENTS\n");
4571}
4572
4573#ifdef CONFIG_TRACER_MAX_TRACE
4574static void show_snapshot_main_help(struct seq_file *m)
4575{
4576        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578                    "#                      Takes a snapshot of the main buffer.\n"
4579                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580                    "#                      (Doesn't have to be '2' works with any number that\n"
4581                    "#                       is not a '0' or '1')\n");
4582}
4583
4584static void show_snapshot_percpu_help(struct seq_file *m)
4585{
4586        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590#else
4591        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592                    "#                     Must use main snapshot file to allocate.\n");
4593#endif
4594        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595                    "#                      (Doesn't have to be '2' works with any number that\n"
4596                    "#                       is not a '0' or '1')\n");
4597}
4598
4599static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600{
4601        if (iter->tr->allocated_snapshot)
4602                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603        else
4604                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605
4606        seq_puts(m, "# Snapshot commands:\n");
4607        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608                show_snapshot_main_help(m);
4609        else
4610                show_snapshot_percpu_help(m);
4611}
4612#else
4613/* Should never be called */
4614static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615#endif
4616
4617static int s_show(struct seq_file *m, void *v)
4618{
4619        struct trace_iterator *iter = v;
4620        int ret;
4621
4622        if (iter->ent == NULL) {
4623                if (iter->tr) {
4624                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625                        seq_puts(m, "#\n");
4626                        test_ftrace_alive(m);
4627                }
4628                if (iter->snapshot && trace_empty(iter))
4629                        print_snapshot_help(m, iter);
4630                else if (iter->trace && iter->trace->print_header)
4631                        iter->trace->print_header(m);
4632                else
4633                        trace_default_header(m);
4634
4635        } else if (iter->leftover) {
4636                /*
4637                 * If we filled the seq_file buffer earlier, we
4638                 * want to just show it now.
4639                 */
4640                ret = trace_print_seq(m, &iter->seq);
4641
4642                /* ret should this time be zero, but you never know */
4643                iter->leftover = ret;
4644
4645        } else {
4646                print_trace_line(iter);
4647                ret = trace_print_seq(m, &iter->seq);
4648                /*
4649                 * If we overflow the seq_file buffer, then it will
4650                 * ask us for this data again at start up.
4651                 * Use that instead.
4652                 *  ret is 0 if seq_file write succeeded.
4653                 *        -1 otherwise.
4654                 */
4655                iter->leftover = ret;
4656        }
4657
4658        return 0;
4659}
4660
4661/*
4662 * Should be used after trace_array_get(), trace_types_lock
4663 * ensures that i_cdev was already initialized.
4664 */
4665static inline int tracing_get_cpu(struct inode *inode)
4666{
4667        if (inode->i_cdev) /* See trace_create_cpu_file() */
4668                return (long)inode->i_cdev - 1;
4669        return RING_BUFFER_ALL_CPUS;
4670}
4671
4672static const struct seq_operations tracer_seq_ops = {
4673        .start          = s_start,
4674        .next           = s_next,
4675        .stop           = s_stop,
4676        .show           = s_show,
4677};
4678
4679static struct trace_iterator *
4680__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681{
4682        struct trace_array *tr = inode->i_private;
4683        struct trace_iterator *iter;
4684        int cpu;
4685
4686        if (tracing_disabled)
4687                return ERR_PTR(-ENODEV);
4688
4689        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690        if (!iter)
4691                return ERR_PTR(-ENOMEM);
4692
4693        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694                                    GFP_KERNEL);
4695        if (!iter->buffer_iter)
4696                goto release;
4697
4698        /*
4699         * trace_find_next_entry() may need to save off iter->ent.
4700         * It will place it into the iter->temp buffer. As most
4701         * events are less than 128, allocate a buffer of that size.
4702         * If one is greater, then trace_find_next_entry() will
4703         * allocate a new buffer to adjust for the bigger iter->ent.
4704         * It's not critical if it fails to get allocated here.
4705         */
4706        iter->temp = kmalloc(128, GFP_KERNEL);
4707        if (iter->temp)
4708                iter->temp_size = 128;
4709
4710        /*
4711         * trace_event_printf() may need to modify given format
4712         * string to replace %p with %px so that it shows real address
4713         * instead of hash value. However, that is only for the event
4714         * tracing, other tracer may not need. Defer the allocation
4715         * until it is needed.
4716         */
4717        iter->fmt = NULL;
4718        iter->fmt_size = 0;
4719
4720        /*
4721         * We make a copy of the current tracer to avoid concurrent
4722         * changes on it while we are reading.
4723         */
4724        mutex_lock(&trace_types_lock);
4725        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726        if (!iter->trace)
4727                goto fail;
4728
4729        *iter->trace = *tr->current_trace;
4730
4731        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732                goto fail;
4733
4734        iter->tr = tr;
4735
4736#ifdef CONFIG_TRACER_MAX_TRACE
4737        /* Currently only the top directory has a snapshot */
4738        if (tr->current_trace->print_max || snapshot)
4739                iter->array_buffer = &tr->max_buffer;
4740        else
4741#endif
4742                iter->array_buffer = &tr->array_buffer;
4743        iter->snapshot = snapshot;
4744        iter->pos = -1;
4745        iter->cpu_file = tracing_get_cpu(inode);
4746        mutex_init(&iter->mutex);
4747
4748        /* Notify the tracer early; before we stop tracing. */
4749        if (iter->trace->open)
4750                iter->trace->open(iter);
4751
4752        /* Annotate start of buffers if we had overruns */
4753        if (ring_buffer_overruns(iter->array_buffer->buffer))
4754                iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755
4756        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757        if (trace_clocks[tr->clock_id].in_ns)
4758                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759
4760        /*
4761         * If pause-on-trace is enabled, then stop the trace while
4762         * dumping, unless this is the "snapshot" file
4763         */
4764        if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765                tracing_stop_tr(tr);
4766
4767        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768                for_each_tracing_cpu(cpu) {
4769                        iter->buffer_iter[cpu] =
4770                                ring_buffer_read_prepare(iter->array_buffer->buffer,
4771                                                         cpu, GFP_KERNEL);
4772                }
4773                ring_buffer_read_prepare_sync();
4774                for_each_tracing_cpu(cpu) {
4775                        ring_buffer_read_start(iter->buffer_iter[cpu]);
4776                        tracing_iter_reset(iter, cpu);
4777                }
4778        } else {
4779                cpu = iter->cpu_file;
4780                iter->buffer_iter[cpu] =
4781                        ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                 cpu, GFP_KERNEL);
4783                ring_buffer_read_prepare_sync();
4784                ring_buffer_read_start(iter->buffer_iter[cpu]);
4785                tracing_iter_reset(iter, cpu);
4786        }
4787
4788        mutex_unlock(&trace_types_lock);
4789
4790        return iter;
4791
4792 fail:
4793        mutex_unlock(&trace_types_lock);
4794        kfree(iter->trace);
4795        kfree(iter->temp);
4796        kfree(iter->buffer_iter);
4797release:
4798        seq_release_private(inode, file);
4799        return ERR_PTR(-ENOMEM);
4800}
4801
4802int tracing_open_generic(struct inode *inode, struct file *filp)
4803{
4804        int ret;
4805
4806        ret = tracing_check_open_get_tr(NULL);
4807        if (ret)
4808                return ret;
4809
4810        filp->private_data = inode->i_private;
4811        return 0;
4812}
4813
4814bool tracing_is_disabled(void)
4815{
4816        return (tracing_disabled) ? true: false;
4817}
4818
4819/*
4820 * Open and update trace_array ref count.
4821 * Must have the current trace_array passed to it.
4822 */
4823int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824{
4825        struct trace_array *tr = inode->i_private;
4826        int ret;
4827
4828        ret = tracing_check_open_get_tr(tr);
4829        if (ret)
4830                return ret;
4831
4832        filp->private_data = inode->i_private;
4833
4834        return 0;
4835}
4836
4837static int tracing_release(struct inode *inode, struct file *file)
4838{
4839        struct trace_array *tr = inode->i_private;
4840        struct seq_file *m = file->private_data;
4841        struct trace_iterator *iter;
4842        int cpu;
4843
4844        if (!(file->f_mode & FMODE_READ)) {
4845                trace_array_put(tr);
4846                return 0;
4847        }
4848
4849        /* Writes do not use seq_file */
4850        iter = m->private;
4851        mutex_lock(&trace_types_lock);
4852
4853        for_each_tracing_cpu(cpu) {
4854                if (iter->buffer_iter[cpu])
4855                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856        }
4857
4858        if (iter->trace && iter->trace->close)
4859                iter->trace->close(iter);
4860
4861        if (!iter->snapshot && tr->stop_count)
4862                /* reenable tracing if it was previously enabled */
4863                tracing_start_tr(tr);
4864
4865        __trace_array_put(tr);
4866
4867        mutex_unlock(&trace_types_lock);
4868
4869        mutex_destroy(&iter->mutex);
4870        free_cpumask_var(iter->started);
4871        kfree(iter->fmt);
4872        kfree(iter->temp);
4873        kfree(iter->trace);
4874        kfree(iter->buffer_iter);
4875        seq_release_private(inode, file);
4876
4877        return 0;
4878}
4879
4880static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881{
4882        struct trace_array *tr = inode->i_private;
4883
4884        trace_array_put(tr);
4885        return 0;
4886}
4887
4888static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889{
4890        struct trace_array *tr = inode->i_private;
4891
4892        trace_array_put(tr);
4893
4894        return single_release(inode, file);
4895}
4896
4897static int tracing_open(struct inode *inode, struct file *file)
4898{
4899        struct trace_array *tr = inode->i_private;
4900        struct trace_iterator *iter;
4901        int ret;
4902
4903        ret = tracing_check_open_get_tr(tr);
4904        if (ret)
4905                return ret;
4906
4907        /* If this file was open for write, then erase contents */
4908        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909                int cpu = tracing_get_cpu(inode);
4910                struct array_buffer *trace_buf = &tr->array_buffer;
4911
4912#ifdef CONFIG_TRACER_MAX_TRACE
4913                if (tr->current_trace->print_max)
4914                        trace_buf = &tr->max_buffer;
4915#endif
4916
4917                if (cpu == RING_BUFFER_ALL_CPUS)
4918                        tracing_reset_online_cpus(trace_buf);
4919                else
4920                        tracing_reset_cpu(trace_buf, cpu);
4921        }
4922
4923        if (file->f_mode & FMODE_READ) {
4924                iter = __tracing_open(inode, file, false);
4925                if (IS_ERR(iter))
4926                        ret = PTR_ERR(iter);
4927                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929        }
4930
4931        if (ret < 0)
4932                trace_array_put(tr);
4933
4934        return ret;
4935}
4936
4937/*
4938 * Some tracers are not suitable for instance buffers.
4939 * A tracer is always available for the global array (toplevel)
4940 * or if it explicitly states that it is.
4941 */
4942static bool
4943trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944{
4945        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946}
4947
4948/* Find the next tracer that this trace array may use */
4949static struct tracer *
4950get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951{
4952        while (t && !trace_ok_for_array(t, tr))
4953                t = t->next;
4954
4955        return t;
4956}
4957
4958static void *
4959t_next(struct seq_file *m, void *v, loff_t *pos)
4960{
4961        struct trace_array *tr = m->private;
4962        struct tracer *t = v;
4963
4964        (*pos)++;
4965
4966        if (t)
4967                t = get_tracer_for_array(tr, t->next);
4968
4969        return t;
4970}
4971
4972static void *t_start(struct seq_file *m, loff_t *pos)
4973{
4974        struct trace_array *tr = m->private;
4975        struct tracer *t;
4976        loff_t l = 0;
4977
4978        mutex_lock(&trace_types_lock);
4979
4980        t = get_tracer_for_array(tr, trace_types);
4981        for (; t && l < *pos; t = t_next(m, t, &l))
4982                        ;
4983
4984        return t;
4985}
4986
4987static void t_stop(struct seq_file *m, void *p)
4988{
4989        mutex_unlock(&trace_types_lock);
4990}
4991
4992static int t_show(struct seq_file *m, void *v)
4993{
4994        struct tracer *t = v;
4995
4996        if (!t)
4997                return 0;
4998
4999        seq_puts(m, t->name);
5000        if (t->next)
5001                seq_putc(m, ' ');
5002        else
5003                seq_putc(m, '\n');
5004
5005        return 0;
5006}
5007
5008static const struct seq_operations show_traces_seq_ops = {
5009        .start          = t_start,
5010        .next           = t_next,
5011        .stop           = t_stop,
5012        .show           = t_show,
5013};
5014
5015static int show_traces_open(struct inode *inode, struct file *file)
5016{
5017        struct trace_array *tr = inode->i_private;
5018        struct seq_file *m;
5019        int ret;
5020
5021        ret = tracing_check_open_get_tr(tr);
5022        if (ret)
5023                return ret;
5024
5025        ret = seq_open(file, &show_traces_seq_ops);
5026        if (ret) {
5027                trace_array_put(tr);
5028                return ret;
5029        }
5030
5031        m = file->private_data;
5032        m->private = tr;
5033
5034        return 0;
5035}
5036
5037static int show_traces_release(struct inode *inode, struct file *file)
5038{
5039        struct trace_array *tr = inode->i_private;
5040
5041        trace_array_put(tr);
5042        return seq_release(inode, file);
5043}
5044
5045static ssize_t
5046tracing_write_stub(struct file *filp, const char __user *ubuf,
5047                   size_t count, loff_t *ppos)
5048{
5049        return count;
5050}
5051
5052loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053{
5054        int ret;
5055
5056        if (file->f_mode & FMODE_READ)
5057                ret = seq_lseek(file, offset, whence);
5058        else
5059                file->f_pos = ret = 0;
5060
5061        return ret;
5062}
5063
5064static const struct file_operations tracing_fops = {
5065        .open           = tracing_open,
5066        .read           = seq_read,
5067        .write          = tracing_write_stub,
5068        .llseek         = tracing_lseek,
5069        .release        = tracing_release,
5070};
5071
5072static const struct file_operations show_traces_fops = {
5073        .open           = show_traces_open,
5074        .read           = seq_read,
5075        .llseek         = seq_lseek,
5076        .release        = show_traces_release,
5077};
5078
5079static ssize_t
5080tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081                     size_t count, loff_t *ppos)
5082{
5083        struct trace_array *tr = file_inode(filp)->i_private;
5084        char *mask_str;
5085        int len;
5086
5087        len = snprintf(NULL, 0, "%*pb\n",
5088                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089        mask_str = kmalloc(len, GFP_KERNEL);
5090        if (!mask_str)
5091                return -ENOMEM;
5092
5093        len = snprintf(mask_str, len, "%*pb\n",
5094                       cpumask_pr_args(tr->tracing_cpumask));
5095        if (len >= count) {
5096                count = -EINVAL;
5097                goto out_err;
5098        }
5099        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100
5101out_err:
5102        kfree(mask_str);
5103
5104        return count;
5105}
5106
5107int tracing_set_cpumask(struct trace_array *tr,
5108                        cpumask_var_t tracing_cpumask_new)
5109{
5110        int cpu;
5111
5112        if (!tr)
5113                return -EINVAL;
5114
5115        local_irq_disable();
5116        arch_spin_lock(&tr->max_lock);
5117        for_each_tracing_cpu(cpu) {
5118                /*
5119                 * Increase/decrease the disabled counter if we are
5120                 * about to flip a bit in the cpumask:
5121                 */
5122                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126                }
5127                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131                }
5132        }
5133        arch_spin_unlock(&tr->max_lock);
5134        local_irq_enable();
5135
5136        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137
5138        return 0;
5139}
5140
5141static ssize_t
5142tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143                      size_t count, loff_t *ppos)
5144{
5145        struct trace_array *tr = file_inode(filp)->i_private;
5146        cpumask_var_t tracing_cpumask_new;
5147        int err;
5148
5149        if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150                return -ENOMEM;
5151
5152        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153        if (err)
5154                goto err_free;
5155
5156        err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157        if (err)
5158                goto err_free;
5159
5160        free_cpumask_var(tracing_cpumask_new);
5161
5162        return count;
5163
5164err_free:
5165        free_cpumask_var(tracing_cpumask_new);
5166
5167        return err;
5168}
5169
5170static const struct file_operations tracing_cpumask_fops = {
5171        .open           = tracing_open_generic_tr,
5172        .read           = tracing_cpumask_read,
5173        .write          = tracing_cpumask_write,
5174        .release        = tracing_release_generic_tr,
5175        .llseek         = generic_file_llseek,
5176};
5177
5178static int tracing_trace_options_show(struct seq_file *m, void *v)
5179{
5180        struct tracer_opt *trace_opts;
5181        struct trace_array *tr = m->private;
5182        u32 tracer_flags;
5183        int i;
5184
5185        mutex_lock(&trace_types_lock);
5186        tracer_flags = tr->current_trace->flags->val;
5187        trace_opts = tr->current_trace->flags->opts;
5188
5189        for (i = 0; trace_options[i]; i++) {
5190                if (tr->trace_flags & (1 << i))
5191                        seq_printf(m, "%s\n", trace_options[i]);
5192                else
5193                        seq_printf(m, "no%s\n", trace_options[i]);
5194        }
5195
5196        for (i = 0; trace_opts[i].name; i++) {
5197                if (tracer_flags & trace_opts[i].bit)
5198                        seq_printf(m, "%s\n", trace_opts[i].name);
5199                else
5200                        seq_printf(m, "no%s\n", trace_opts[i].name);
5201        }
5202        mutex_unlock(&trace_types_lock);
5203
5204        return 0;
5205}
5206
5207static int __set_tracer_option(struct trace_array *tr,
5208                               struct tracer_flags *tracer_flags,
5209                               struct tracer_opt *opts, int neg)
5210{
5211        struct tracer *trace = tracer_flags->trace;
5212        int ret;
5213
5214        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215        if (ret)
5216                return ret;
5217
5218        if (neg)
5219                tracer_flags->val &= ~opts->bit;
5220        else
5221                tracer_flags->val |= opts->bit;
5222        return 0;
5223}
5224
5225/* Try to assign a tracer specific option */
5226static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227{
5228        struct tracer *trace = tr->current_trace;
5229        struct tracer_flags *tracer_flags = trace->flags;
5230        struct tracer_opt *opts = NULL;
5231        int i;
5232
5233        for (i = 0; tracer_flags->opts[i].name; i++) {
5234                opts = &tracer_flags->opts[i];
5235
5236                if (strcmp(cmp, opts->name) == 0)
5237                        return __set_tracer_option(tr, trace->flags, opts, neg);
5238        }
5239
5240        return -EINVAL;
5241}
5242
5243/* Some tracers require overwrite to stay enabled */
5244int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245{
5246        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247                return -1;
5248
5249        return 0;
5250}
5251
5252int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253{
5254        int *map;
5255
5256        if ((mask == TRACE_ITER_RECORD_TGID) ||
5257            (mask == TRACE_ITER_RECORD_CMD))
5258                lockdep_assert_held(&event_mutex);
5259
5260        /* do nothing if flag is already set */
5261        if (!!(tr->trace_flags & mask) == !!enabled)
5262                return 0;
5263
5264        /* Give the tracer a chance to approve the change */
5265        if (tr->current_trace->flag_changed)
5266                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267                        return -EINVAL;
5268
5269        if (enabled)
5270                tr->trace_flags |= mask;
5271        else
5272                tr->trace_flags &= ~mask;
5273
5274        if (mask == TRACE_ITER_RECORD_CMD)
5275                trace_event_enable_cmd_record(enabled);
5276
5277        if (mask == TRACE_ITER_RECORD_TGID) {
5278                if (!tgid_map) {
5279                        tgid_map_max = pid_max;
5280                        map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281                                       GFP_KERNEL);
5282
5283                        /*
5284                         * Pairs with smp_load_acquire() in
5285                         * trace_find_tgid_ptr() to ensure that if it observes
5286                         * the tgid_map we just allocated then it also observes
5287                         * the corresponding tgid_map_max value.
5288                         */
5289                        smp_store_release(&tgid_map, map);
5290                }
5291                if (!tgid_map) {
5292                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293                        return -ENOMEM;
5294                }
5295
5296                trace_event_enable_tgid_record(enabled);
5297        }
5298
5299        if (mask == TRACE_ITER_EVENT_FORK)
5300                trace_event_follow_fork(tr, enabled);
5301
5302        if (mask == TRACE_ITER_FUNC_FORK)
5303                ftrace_pid_follow_fork(tr, enabled);
5304
5305        if (mask == TRACE_ITER_OVERWRITE) {
5306                ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307#ifdef CONFIG_TRACER_MAX_TRACE
5308                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309#endif
5310        }
5311
5312        if (mask == TRACE_ITER_PRINTK) {
5313                trace_printk_start_stop_comm(enabled);
5314                trace_printk_control(enabled);
5315        }
5316
5317        return 0;
5318}
5319
5320int trace_set_options(struct trace_array *tr, char *option)
5321{
5322        char *cmp;
5323        int neg = 0;
5324        int ret;
5325        size_t orig_len = strlen(option);
5326        int len;
5327
5328        cmp = strstrip(option);
5329
5330        len = str_has_prefix(cmp, "no");
5331        if (len)
5332                neg = 1;
5333
5334        cmp += len;
5335
5336        mutex_lock(&event_mutex);
5337        mutex_lock(&trace_types_lock);
5338
5339        ret = match_string(trace_options, -1, cmp);
5340        /* If no option could be set, test the specific tracer options */
5341        if (ret < 0)
5342                ret = set_tracer_option(tr, cmp, neg);
5343        else
5344                ret = set_tracer_flag(tr, 1 << ret, !neg);
5345
5346        mutex_unlock(&trace_types_lock);
5347        mutex_unlock(&event_mutex);
5348
5349        /*
5350         * If the first trailing whitespace is replaced with '\0' by strstrip,
5351         * turn it back into a space.
5352         */
5353        if (orig_len > strlen(option))
5354                option[strlen(option)] = ' ';
5355
5356        return ret;
5357}
5358
5359static void __init apply_trace_boot_options(void)
5360{
5361        char *buf = trace_boot_options_buf;
5362        char *option;
5363
5364        while (true) {
5365                option = strsep(&buf, ",");
5366
5367                if (!option)
5368                        break;
5369
5370                if (*option)
5371                        trace_set_options(&global_trace, option);
5372
5373                /* Put back the comma to allow this to be called again */
5374                if (buf)
5375                        *(buf - 1) = ',';
5376        }
5377}
5378
5379static ssize_t
5380tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381                        size_t cnt, loff_t *ppos)
5382{
5383        struct seq_file *m = filp->private_data;
5384        struct trace_array *tr = m->private;
5385        char buf[64];
5386        int ret;
5387
5388        if (cnt >= sizeof(buf))
5389                return -EINVAL;
5390
5391        if (copy_from_user(buf, ubuf, cnt))
5392                return -EFAULT;
5393
5394        buf[cnt] = 0;
5395
5396        ret = trace_set_options(tr, buf);
5397        if (ret < 0)
5398                return ret;
5399
5400        *ppos += cnt;
5401
5402        return cnt;
5403}
5404
5405static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406{
5407        struct trace_array *tr = inode->i_private;
5408        int ret;
5409
5410        ret = tracing_check_open_get_tr(tr);
5411        if (ret)
5412                return ret;
5413
5414        ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415        if (ret < 0)
5416                trace_array_put(tr);
5417
5418        return ret;
5419}
5420
5421static const struct file_operations tracing_iter_fops = {
5422        .open           = tracing_trace_options_open,
5423        .read           = seq_read,
5424        .llseek         = seq_lseek,
5425        .release        = tracing_single_release_tr,
5426        .write          = tracing_trace_options_write,
5427};
5428
5429static const char readme_msg[] =
5430        "tracing mini-HOWTO:\n\n"
5431        "# echo 0 > tracing_on : quick way to disable tracing\n"
5432        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433        " Important files:\n"
5434        "  trace\t\t\t- The static contents of the buffer\n"
5435        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437        "  current_tracer\t- function and latency tracers\n"
5438        "  available_tracers\t- list of configured tracers for current_tracer\n"
5439        "  error_log\t- error log for failed commands (that support it)\n"
5440        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442        "  trace_clock\t\t-change the clock used to order events\n"
5443        "       local:   Per cpu clock but may not be synced across CPUs\n"
5444        "      global:   Synced across CPUs but slows tracing down.\n"
5445        "     counter:   Not a clock, but just an increment\n"
5446        "      uptime:   Jiffy counter from time of boot\n"
5447        "        perf:   Same clock that perf events use\n"
5448#ifdef CONFIG_X86_64
5449        "     x86-tsc:   TSC cycle counter\n"
5450#endif
5451        "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452        "       delta:   Delta difference against a buffer-wide timestamp\n"
5453        "    absolute:   Absolute (standalone) timestamp\n"
5454        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456        "  tracing_cpumask\t- Limit which CPUs to trace\n"
5457        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458        "\t\t\t  Remove sub-buffer with rmdir\n"
5459        "  trace_options\t\t- Set format or modify how tracing happens\n"
5460        "\t\t\t  Disable an option by prefixing 'no' to the\n"
5461        "\t\t\t  option name\n"
5462        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463#ifdef CONFIG_DYNAMIC_FTRACE
5464        "\n  available_filter_functions - list of functions that can be filtered on\n"
5465        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466        "\t\t\t  functions\n"
5467        "\t     accepts: func_full_name or glob-matching-pattern\n"
5468        "\t     modules: Can select a group via module\n"
5469        "\t      Format: :mod:<module-name>\n"
5470        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471        "\t    triggers: a command to perform when function is hit\n"
5472        "\t      Format: <function>:<trigger>[:count]\n"
5473        "\t     trigger: traceon, traceoff\n"
5474        "\t\t      enable_event:<system>:<event>\n"
5475        "\t\t      disable_event:<system>:<event>\n"
5476#ifdef CONFIG_STACKTRACE
5477        "\t\t      stacktrace\n"
5478#endif
5479#ifdef CONFIG_TRACER_SNAPSHOT
5480        "\t\t      snapshot\n"
5481#endif
5482        "\t\t      dump\n"
5483        "\t\t      cpudump\n"
5484        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486        "\t     The first one will disable tracing every time do_fault is hit\n"
5487        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488        "\t       The first time do trap is hit and it disables tracing, the\n"
5489        "\t       counter will decrement to 2. If tracing is already disabled,\n"
5490        "\t       the counter will not decrement. It only decrements when the\n"
5491        "\t       trigger did work\n"
5492        "\t     To remove trigger without count:\n"
5493        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494        "\t     To remove trigger with a count:\n"
5495        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498        "\t    modules: Can select a group via module command :mod:\n"
5499        "\t    Does not accept triggers\n"
5500#endif /* CONFIG_DYNAMIC_FTRACE */
5501#ifdef CONFIG_FUNCTION_TRACER
5502        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503        "\t\t    (function)\n"
5504        "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505        "\t\t    (function)\n"
5506#endif
5507#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511#endif
5512#ifdef CONFIG_TRACER_SNAPSHOT
5513        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514        "\t\t\t  snapshot buffer. Read the contents for more\n"
5515        "\t\t\t  information\n"
5516#endif
5517#ifdef CONFIG_STACK_TRACER
5518        "  stack_trace\t\t- Shows the max stack trace when active\n"
5519        "  stack_max_size\t- Shows current max stack size that was traced\n"
5520        "\t\t\t  Write into this file to reset the max size (trigger a\n"
5521        "\t\t\t  new trace)\n"
5522#ifdef CONFIG_DYNAMIC_FTRACE
5523        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524        "\t\t\t  traces\n"
5525#endif
5526#endif /* CONFIG_STACK_TRACER */
5527#ifdef CONFIG_DYNAMIC_EVENTS
5528        "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5530#endif
5531#ifdef CONFIG_KPROBE_EVENTS
5532        "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5534#endif
5535#ifdef CONFIG_UPROBE_EVENTS
5536        "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5538#endif
5539#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540        "\t  accepts: event-definitions (one definition per line)\n"
5541        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543#ifdef CONFIG_HIST_TRIGGERS
5544        "\t           s:[synthetic/]<event> <field> [<field>]\n"
5545#endif
5546        "\t           -:[<group>/]<event>\n"
5547#ifdef CONFIG_KPROBE_EVENTS
5548        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5549  "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5550#endif
5551#ifdef CONFIG_UPROBE_EVENTS
5552  "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5553#endif
5554        "\t     args: <name>=fetcharg[:type]\n"
5555        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5556#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5557        "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5558#else
5559        "\t           $stack<index>, $stack, $retval, $comm,\n"
5560#endif
5561        "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5562        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5563        "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5564        "\t           <type>\\[<array-size>\\]\n"
5565#ifdef CONFIG_HIST_TRIGGERS
5566        "\t    field: <stype> <name>;\n"
5567        "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5568        "\t           [unsigned] char/int/long\n"
5569#endif
5570#endif
5571        "  events/\t\t- Directory containing all trace event subsystems:\n"
5572        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5573        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5574        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5575        "\t\t\t  events\n"
5576        "      filter\t\t- If set, only events passing filter are traced\n"
5577        "  events/<system>/<event>/\t- Directory containing control files for\n"
5578        "\t\t\t  <event>:\n"
5579        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5580        "      filter\t\t- If set, only events passing filter are traced\n"
5581        "      trigger\t\t- If set, a command to perform when event is hit\n"
5582        "\t    Format: <trigger>[:count][if <filter>]\n"
5583        "\t   trigger: traceon, traceoff\n"
5584        "\t            enable_event:<system>:<event>\n"
5585        "\t            disable_event:<system>:<event>\n"
5586#ifdef CONFIG_HIST_TRIGGERS
5587        "\t            enable_hist:<system>:<event>\n"
5588        "\t            disable_hist:<system>:<event>\n"
5589#endif
5590#ifdef CONFIG_STACKTRACE
5591        "\t\t    stacktrace\n"
5592#endif
5593#ifdef CONFIG_TRACER_SNAPSHOT
5594        "\t\t    snapshot\n"
5595#endif
5596#ifdef CONFIG_HIST_TRIGGERS
5597        "\t\t    hist (see below)\n"
5598#endif
5599        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5600        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5601        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5602        "\t                  events/block/block_unplug/trigger\n"
5603        "\t   The first disables tracing every time block_unplug is hit.\n"
5604        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5605        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5606        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5607        "\t   Like function triggers, the counter is only decremented if it\n"
5608        "\t    enabled or disabled tracing.\n"
5609        "\t   To remove a trigger without a count:\n"
5610        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5611        "\t   To remove a trigger with a count:\n"
5612        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5613        "\t   Filters can be ignored when removing a trigger.\n"
5614#ifdef CONFIG_HIST_TRIGGERS
5615        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5616        "\t    Format: hist:keys=<field1[,field2,...]>\n"
5617        "\t            [:values=<field1[,field2,...]>]\n"
5618        "\t            [:sort=<field1[,field2,...]>]\n"
5619        "\t            [:size=#entries]\n"
5620        "\t            [:pause][:continue][:clear]\n"
5621        "\t            [:name=histname1]\n"
5622        "\t            [:<handler>.<action>]\n"
5623        "\t            [if <filter>]\n\n"
5624        "\t    Note, special fields can be used as well:\n"
5625        "\t            common_timestamp - to record current timestamp\n"
5626        "\t            common_cpu - to record the CPU the event happened on\n"
5627        "\n"
5628        "\t    When a matching event is hit, an entry is added to a hash\n"
5629        "\t    table using the key(s) and value(s) named, and the value of a\n"
5630        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5631        "\t    correspond to fields in the event's format description.  Keys\n"
5632        "\t    can be any field, or the special string 'stacktrace'.\n"
5633        "\t    Compound keys consisting of up to two fields can be specified\n"
5634        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5635        "\t    fields.  Sort keys consisting of up to two fields can be\n"
5636        "\t    specified using the 'sort' keyword.  The sort direction can\n"
5637        "\t    be modified by appending '.descending' or '.ascending' to a\n"
5638        "\t    sort field.  The 'size' parameter can be used to specify more\n"
5639        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5640        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5641        "\t    its histogram data will be shared with other triggers of the\n"
5642        "\t    same name, and trigger hits will update this common data.\n\n"
5643        "\t    Reading the 'hist' file for the event will dump the hash\n"
5644        "\t    table in its entirety to stdout.  If there are multiple hist\n"
5645        "\t    triggers attached to an event, there will be a table for each\n"
5646        "\t    trigger in the output.  The table displayed for a named\n"
5647        "\t    trigger will be the same as any other instance having the\n"
5648        "\t    same name.  The default format used to display a given field\n"
5649        "\t    can be modified by appending any of the following modifiers\n"
5650        "\t    to the field name, as applicable:\n\n"
5651        "\t            .hex        display a number as a hex value\n"
5652        "\t            .sym        display an address as a symbol\n"
5653        "\t            .sym-offset display an address as a symbol and offset\n"
5654        "\t            .execname   display a common_pid as a program name\n"
5655        "\t            .syscall    display a syscall id as a syscall name\n"
5656        "\t            .log2       display log2 value rather than raw number\n"
5657        "\t            .usecs      display a common_timestamp in microseconds\n\n"
5658        "\t    The 'pause' parameter can be used to pause an existing hist\n"
5659        "\t    trigger or to start a hist trigger but not log any events\n"
5660        "\t    until told to do so.  'continue' can be used to start or\n"
5661        "\t    restart a paused hist trigger.\n\n"
5662        "\t    The 'clear' parameter will clear the contents of a running\n"
5663        "\t    hist trigger and leave its current paused/active state\n"
5664        "\t    unchanged.\n\n"
5665        "\t    The enable_hist and disable_hist triggers can be used to\n"
5666        "\t    have one event conditionally start and stop another event's\n"
5667        "\t    already-attached hist trigger.  The syntax is analogous to\n"
5668        "\t    the enable_event and disable_event triggers.\n\n"
5669        "\t    Hist trigger handlers and actions are executed whenever a\n"
5670        "\t    a histogram entry is added or updated.  They take the form:\n\n"
5671        "\t        <handler>.<action>\n\n"
5672        "\t    The available handlers are:\n\n"
5673        "\t        onmatch(matching.event)  - invoke on addition or update\n"
5674        "\t        onmax(var)               - invoke if var exceeds current max\n"
5675        "\t        onchange(var)            - invoke action if var changes\n\n"
5676        "\t    The available actions are:\n\n"
5677        "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5678        "\t        save(field,...)                      - save current event fields\n"
5679#ifdef CONFIG_TRACER_SNAPSHOT
5680        "\t        snapshot()                           - snapshot the trace buffer\n\n"
5681#endif
5682#ifdef CONFIG_SYNTH_EVENTS
5683        "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5684        "\t  Write into this file to define/undefine new synthetic events.\n"
5685        "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5686#endif
5687#endif
5688;
5689
5690static ssize_t
5691tracing_readme_read(struct file *filp, char __user *ubuf,
5692                       size_t cnt, loff_t *ppos)
5693{
5694        return simple_read_from_buffer(ubuf, cnt, ppos,
5695                                        readme_msg, strlen(readme_msg));
5696}
5697
5698static const struct file_operations tracing_readme_fops = {
5699        .open           = tracing_open_generic,
5700        .read           = tracing_readme_read,
5701        .llseek         = generic_file_llseek,
5702};
5703
5704static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5705{
5706        int pid = ++(*pos);
5707
5708        return trace_find_tgid_ptr(pid);
5709}
5710
5711static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5712{
5713        int pid = *pos;
5714
5715        return trace_find_tgid_ptr(pid);
5716}
5717
5718static void saved_tgids_stop(struct seq_file *m, void *v)
5719{
5720}
5721
5722static int saved_tgids_show(struct seq_file *m, void *v)
5723{
5724        int *entry = (int *)v;
5725        int pid = entry - tgid_map;
5726        int tgid = *entry;
5727
5728        if (tgid == 0)
5729                return SEQ_SKIP;
5730
5731        seq_printf(m, "%d %d\n", pid, tgid);
5732        return 0;
5733}
5734
5735static const struct seq_operations tracing_saved_tgids_seq_ops = {
5736        .start          = saved_tgids_start,
5737        .stop           = saved_tgids_stop,
5738        .next           = saved_tgids_next,
5739        .show           = saved_tgids_show,
5740};
5741
5742static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5743{
5744        int ret;
5745
5746        ret = tracing_check_open_get_tr(NULL);
5747        if (ret)
5748                return ret;
5749
5750        return seq_open(filp, &tracing_saved_tgids_seq_ops);
5751}
5752
5753
5754static const struct file_operations tracing_saved_tgids_fops = {
5755        .open           = tracing_saved_tgids_open,
5756        .read           = seq_read,
5757        .llseek         = seq_lseek,
5758        .release        = seq_release,
5759};
5760
5761static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5762{
5763        unsigned int *ptr = v;
5764
5765        if (*pos || m->count)
5766                ptr++;
5767
5768        (*pos)++;
5769
5770        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5771             ptr++) {
5772                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5773                        continue;
5774
5775                return ptr;
5776        }
5777
5778        return NULL;
5779}
5780
5781static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5782{
5783        void *v;
5784        loff_t l = 0;
5785
5786        preempt_disable();
5787        arch_spin_lock(&trace_cmdline_lock);
5788
5789        v = &savedcmd->map_cmdline_to_pid[0];
5790        while (l <= *pos) {
5791                v = saved_cmdlines_next(m, v, &l);
5792                if (!v)
5793                        return NULL;
5794        }
5795
5796        return v;
5797}
5798
5799static void saved_cmdlines_stop(struct seq_file *m, void *v)
5800{
5801        arch_spin_unlock(&trace_cmdline_lock);
5802        preempt_enable();
5803}
5804
5805static int saved_cmdlines_show(struct seq_file *m, void *v)
5806{
5807        char buf[TASK_COMM_LEN];
5808        unsigned int *pid = v;
5809
5810        __trace_find_cmdline(*pid, buf);
5811        seq_printf(m, "%d %s\n", *pid, buf);
5812        return 0;
5813}
5814
5815static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5816        .start          = saved_cmdlines_start,
5817        .next           = saved_cmdlines_next,
5818        .stop           = saved_cmdlines_stop,
5819        .show           = saved_cmdlines_show,
5820};
5821
5822static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5823{
5824        int ret;
5825
5826        ret = tracing_check_open_get_tr(NULL);
5827        if (ret)
5828                return ret;
5829
5830        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5831}
5832
5833static const struct file_operations tracing_saved_cmdlines_fops = {
5834        .open           = tracing_saved_cmdlines_open,
5835        .read           = seq_read,
5836        .llseek         = seq_lseek,
5837        .release        = seq_release,
5838};
5839
5840static ssize_t
5841tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5842                                 size_t cnt, loff_t *ppos)
5843{
5844        char buf[64];
5845        int r;
5846
5847        arch_spin_lock(&trace_cmdline_lock);
5848        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5849        arch_spin_unlock(&trace_cmdline_lock);
5850
5851        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5852}
5853
5854static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5855{
5856        kfree(s->saved_cmdlines);
5857        kfree(s->map_cmdline_to_pid);
5858        kfree(s);
5859}
5860
5861static int tracing_resize_saved_cmdlines(unsigned int val)
5862{
5863        struct saved_cmdlines_buffer *s, *savedcmd_temp;
5864
5865        s = kmalloc(sizeof(*s), GFP_KERNEL);
5866        if (!s)
5867                return -ENOMEM;
5868
5869        if (allocate_cmdlines_buffer(val, s) < 0) {
5870                kfree(s);
5871                return -ENOMEM;
5872        }
5873
5874        arch_spin_lock(&trace_cmdline_lock);
5875        savedcmd_temp = savedcmd;
5876        savedcmd = s;
5877        arch_spin_unlock(&trace_cmdline_lock);
5878        free_saved_cmdlines_buffer(savedcmd_temp);
5879
5880        return 0;
5881}
5882
5883static ssize_t
5884tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5885                                  size_t cnt, loff_t *ppos)
5886{
5887        unsigned long val;
5888        int ret;
5889
5890        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5891        if (ret)
5892                return ret;
5893
5894        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5895        if (!val || val > PID_MAX_DEFAULT)
5896                return -EINVAL;
5897
5898        ret = tracing_resize_saved_cmdlines((unsigned int)val);
5899        if (ret < 0)
5900                return ret;
5901
5902        *ppos += cnt;
5903
5904        return cnt;
5905}
5906
5907static const struct file_operations tracing_saved_cmdlines_size_fops = {
5908        .open           = tracing_open_generic,
5909        .read           = tracing_saved_cmdlines_size_read,
5910        .write          = tracing_saved_cmdlines_size_write,
5911};
5912
5913#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5914static union trace_eval_map_item *
5915update_eval_map(union trace_eval_map_item *ptr)
5916{
5917        if (!ptr->map.eval_string) {
5918                if (ptr->tail.next) {
5919                        ptr = ptr->tail.next;
5920                        /* Set ptr to the next real item (skip head) */
5921                        ptr++;
5922                } else
5923                        return NULL;
5924        }
5925        return ptr;
5926}
5927
5928static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5929{
5930        union trace_eval_map_item *ptr = v;
5931
5932        /*
5933         * Paranoid! If ptr points to end, we don't want to increment past it.
5934         * This really should never happen.
5935         */
5936        (*pos)++;
5937        ptr = update_eval_map(ptr);
5938        if (WARN_ON_ONCE(!ptr))
5939                return NULL;
5940
5941        ptr++;
5942        ptr = update_eval_map(ptr);
5943
5944        return ptr;
5945}
5946
5947static void *eval_map_start(struct seq_file *m, loff_t *pos)
5948{
5949        union trace_eval_map_item *v;
5950        loff_t l = 0;
5951
5952        mutex_lock(&trace_eval_mutex);
5953
5954        v = trace_eval_maps;
5955        if (v)
5956                v++;
5957
5958        while (v && l < *pos) {
5959                v = eval_map_next(m, v, &l);
5960        }
5961
5962        return v;
5963}
5964
5965static void eval_map_stop(struct seq_file *m, void *v)
5966{
5967        mutex_unlock(&trace_eval_mutex);
5968}
5969
5970static int eval_map_show(struct seq_file *m, void *v)
5971{
5972        union trace_eval_map_item *ptr = v;
5973
5974        seq_printf(m, "%s %ld (%s)\n",
5975                   ptr->map.eval_string, ptr->map.eval_value,
5976                   ptr->map.system);
5977
5978        return 0;
5979}
5980
5981static const struct seq_operations tracing_eval_map_seq_ops = {
5982        .start          = eval_map_start,
5983        .next           = eval_map_next,
5984        .stop           = eval_map_stop,
5985        .show           = eval_map_show,
5986};
5987
5988static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5989{
5990        int ret;
5991
5992        ret = tracing_check_open_get_tr(NULL);
5993        if (ret)
5994                return ret;
5995
5996        return seq_open(filp, &tracing_eval_map_seq_ops);
5997}
5998
5999static const struct file_operations tracing_eval_map_fops = {
6000        .open           = tracing_eval_map_open,
6001        .read           = seq_read,
6002        .llseek         = seq_lseek,
6003        .release        = seq_release,
6004};
6005
6006static inline union trace_eval_map_item *
6007trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6008{
6009        /* Return tail of array given the head */
6010        return ptr + ptr->head.length + 1;
6011}
6012
6013static void
6014trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6015                           int len)
6016{
6017        struct trace_eval_map **stop;
6018        struct trace_eval_map **map;
6019        union trace_eval_map_item *map_array;
6020        union trace_eval_map_item *ptr;
6021
6022        stop = start + len;
6023
6024        /*
6025         * The trace_eval_maps contains the map plus a head and tail item,
6026         * where the head holds the module and length of array, and the
6027         * tail holds a pointer to the next list.
6028         */
6029        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6030        if (!map_array) {
6031                pr_warn("Unable to allocate trace eval mapping\n");
6032                return;
6033        }
6034
6035        mutex_lock(&trace_eval_mutex);
6036
6037        if (!trace_eval_maps)
6038                trace_eval_maps = map_array;
6039        else {
6040                ptr = trace_eval_maps;
6041                for (;;) {
6042                        ptr = trace_eval_jmp_to_tail(ptr);
6043                        if (!ptr->tail.next)
6044                                break;
6045                        ptr = ptr->tail.next;
6046
6047                }
6048                ptr->tail.next = map_array;
6049        }
6050        map_array->head.mod = mod;
6051        map_array->head.length = len;
6052        map_array++;
6053
6054        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6055                map_array->map = **map;
6056                map_array++;
6057        }
6058        memset(map_array, 0, sizeof(*map_array));
6059
6060        mutex_unlock(&trace_eval_mutex);
6061}
6062
6063static void trace_create_eval_file(struct dentry *d_tracer)
6064{
6065        trace_create_file("eval_map", 0444, d_tracer,
6066                          NULL, &tracing_eval_map_fops);
6067}
6068
6069#else /* CONFIG_TRACE_EVAL_MAP_FILE */
6070static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6071static inline void trace_insert_eval_map_file(struct module *mod,
6072                              struct trace_eval_map **start, int len) { }
6073#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6074
6075static void trace_insert_eval_map(struct module *mod,
6076                                  struct trace_eval_map **start, int len)
6077{
6078        struct trace_eval_map **map;
6079
6080        if (len <= 0)
6081                return;
6082
6083        map = start;
6084
6085        trace_event_eval_update(map, len);
6086
6087        trace_insert_eval_map_file(mod, start, len);
6088}
6089
6090static ssize_t
6091tracing_set_trace_read(struct file *filp, char __user *ubuf,
6092                       size_t cnt, loff_t *ppos)
6093{
6094        struct trace_array *tr = filp->private_data;
6095        char buf[MAX_TRACER_SIZE+2];
6096        int r;
6097
6098        mutex_lock(&trace_types_lock);
6099        r = sprintf(buf, "%s\n", tr->current_trace->name);
6100        mutex_unlock(&trace_types_lock);
6101
6102        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6103}
6104
6105int tracer_init(struct tracer *t, struct trace_array *tr)
6106{
6107        tracing_reset_online_cpus(&tr->array_buffer);
6108        return t->init(tr);
6109}
6110
6111static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6112{
6113        int cpu;
6114
6115        for_each_tracing_cpu(cpu)
6116                per_cpu_ptr(buf->data, cpu)->entries = val;
6117}
6118
6119#ifdef CONFIG_TRACER_MAX_TRACE
6120/* resize @tr's buffer to the size of @size_tr's entries */
6121static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6122                                        struct array_buffer *size_buf, int cpu_id)
6123{
6124        int cpu, ret = 0;
6125
6126        if (cpu_id == RING_BUFFER_ALL_CPUS) {
6127                for_each_tracing_cpu(cpu) {
6128                        ret = ring_buffer_resize(trace_buf->buffer,
6129                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6130                        if (ret < 0)
6131                                break;
6132                        per_cpu_ptr(trace_buf->data, cpu)->entries =
6133                                per_cpu_ptr(size_buf->data, cpu)->entries;
6134                }
6135        } else {
6136                ret = ring_buffer_resize(trace_buf->buffer,
6137                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6138                if (ret == 0)
6139                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6140                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
6141        }
6142
6143        return ret;
6144}
6145#endif /* CONFIG_TRACER_MAX_TRACE */
6146
6147static int __tracing_resize_ring_buffer(struct trace_array *tr,
6148                                        unsigned long size, int cpu)
6149{
6150        int ret;
6151
6152        /*
6153         * If kernel or user changes the size of the ring buffer
6154         * we use the size that was given, and we can forget about
6155         * expanding it later.
6156         */
6157        ring_buffer_expanded = true;
6158
6159        /* May be called before buffers are initialized */
6160        if (!tr->array_buffer.buffer)
6161                return 0;
6162
6163        ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6164        if (ret < 0)
6165                return ret;
6166
6167#ifdef CONFIG_TRACER_MAX_TRACE
6168        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6169            !tr->current_trace->use_max_tr)
6170                goto out;
6171
6172        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6173        if (ret < 0) {
6174                int r = resize_buffer_duplicate_size(&tr->array_buffer,
6175                                                     &tr->array_buffer, cpu);
6176                if (r < 0) {
6177                        /*
6178                         * AARGH! We are left with different
6179                         * size max buffer!!!!
6180                         * The max buffer is our "snapshot" buffer.
6181                         * When a tracer needs a snapshot (one of the
6182                         * latency tracers), it swaps the max buffer
6183                         * with the saved snap shot. We succeeded to
6184                         * update the size of the main buffer, but failed to
6185                         * update the size of the max buffer. But when we tried
6186                         * to reset the main buffer to the original size, we
6187                         * failed there too. This is very unlikely to
6188                         * happen, but if it does, warn and kill all
6189                         * tracing.
6190                         */
6191                        WARN_ON(1);
6192                        tracing_disabled = 1;
6193                }
6194                return ret;
6195        }
6196
6197        if (cpu == RING_BUFFER_ALL_CPUS)
6198                set_buffer_entries(&tr->max_buffer, size);
6199        else
6200                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6201
6202 out:
6203#endif /* CONFIG_TRACER_MAX_TRACE */
6204
6205        if (cpu == RING_BUFFER_ALL_CPUS)
6206                set_buffer_entries(&tr->array_buffer, size);
6207        else
6208                per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6209
6210        return ret;
6211}
6212
6213ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6214                                  unsigned long size, int cpu_id)
6215{
6216        int ret;
6217
6218        mutex_lock(&trace_types_lock);
6219
6220        if (cpu_id != RING_BUFFER_ALL_CPUS) {
6221                /* make sure, this cpu is enabled in the mask */
6222                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6223                        ret = -EINVAL;
6224                        goto out;
6225                }
6226        }
6227
6228        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6229        if (ret < 0)
6230                ret = -ENOMEM;
6231
6232out:
6233        mutex_unlock(&trace_types_lock);
6234
6235        return ret;
6236}
6237
6238
6239/**
6240 * tracing_update_buffers - used by tracing facility to expand ring buffers
6241 *
6242 * To save on memory when the tracing is never used on a system with it
6243 * configured in. The ring buffers are set to a minimum size. But once
6244 * a user starts to use the tracing facility, then they need to grow
6245 * to their default size.
6246 *
6247 * This function is to be called when a tracer is about to be used.
6248 */
6249int tracing_update_buffers(void)
6250{
6251        int ret = 0;
6252
6253        mutex_lock(&trace_types_lock);
6254        if (!ring_buffer_expanded)
6255                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6256                                                RING_BUFFER_ALL_CPUS);
6257        mutex_unlock(&trace_types_lock);
6258
6259        return ret;
6260}
6261
6262struct trace_option_dentry;
6263
6264static void
6265create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6266
6267/*
6268 * Used to clear out the tracer before deletion of an instance.
6269 * Must have trace_types_lock held.
6270 */
6271static void tracing_set_nop(struct trace_array *tr)
6272{
6273        if (tr->current_trace == &nop_trace)
6274                return;
6275        
6276        tr->current_trace->enabled--;
6277
6278        if (tr->current_trace->reset)
6279                tr->current_trace->reset(tr);
6280
6281        tr->current_trace = &nop_trace;
6282}
6283
6284static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6285{
6286        /* Only enable if the directory has been created already. */
6287        if (!tr->dir)
6288                return;
6289
6290        create_trace_option_files(tr, t);
6291}
6292
6293int tracing_set_tracer(struct trace_array *tr, const char *buf)
6294{
6295        struct tracer *t;
6296#ifdef CONFIG_TRACER_MAX_TRACE
6297        bool had_max_tr;
6298#endif
6299        int ret = 0;
6300
6301        mutex_lock(&trace_types_lock);
6302
6303        if (!ring_buffer_expanded) {
6304                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6305                                                RING_BUFFER_ALL_CPUS);
6306                if (ret < 0)
6307                        goto out;
6308                ret = 0;
6309        }
6310
6311        for (t = trace_types; t; t = t->next) {
6312                if (strcmp(t->name, buf) == 0)
6313                        break;
6314        }
6315        if (!t) {
6316                ret = -EINVAL;
6317                goto out;
6318        }
6319        if (t == tr->current_trace)
6320                goto out;
6321
6322#ifdef CONFIG_TRACER_SNAPSHOT
6323        if (t->use_max_tr) {
6324                arch_spin_lock(&tr->max_lock);
6325                if (tr->cond_snapshot)
6326                        ret = -EBUSY;
6327                arch_spin_unlock(&tr->max_lock);
6328                if (ret)
6329                        goto out;
6330        }
6331#endif
6332        /* Some tracers won't work on kernel command line */
6333        if (system_state < SYSTEM_RUNNING && t->noboot) {
6334                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6335                        t->name);
6336                goto out;
6337        }
6338
6339        /* Some tracers are only allowed for the top level buffer */
6340        if (!trace_ok_for_array(t, tr)) {
6341                ret = -EINVAL;
6342                goto out;
6343        }
6344
6345        /* If trace pipe files are being read, we can't change the tracer */
6346        if (tr->trace_ref) {
6347                ret = -EBUSY;
6348                goto out;
6349        }
6350
6351        trace_branch_disable();
6352
6353        tr->current_trace->enabled--;
6354
6355        if (tr->current_trace->reset)
6356                tr->current_trace->reset(tr);
6357
6358        /* Current trace needs to be nop_trace before synchronize_rcu */
6359        tr->current_trace = &nop_trace;
6360
6361#ifdef CONFIG_TRACER_MAX_TRACE
6362        had_max_tr = tr->allocated_snapshot;
6363
6364        if (had_max_tr && !t->use_max_tr) {
6365                /*
6366                 * We need to make sure that the update_max_tr sees that
6367                 * current_trace changed to nop_trace to keep it from
6368                 * swapping the buffers after we resize it.
6369                 * The update_max_tr is called from interrupts disabled
6370                 * so a synchronized_sched() is sufficient.
6371                 */
6372                synchronize_rcu();
6373                free_snapshot(tr);
6374        }
6375#endif
6376
6377#ifdef CONFIG_TRACER_MAX_TRACE
6378        if (t->use_max_tr && !had_max_tr) {
6379                ret = tracing_alloc_snapshot_instance(tr);
6380                if (ret < 0)
6381                        goto out;
6382        }
6383#endif
6384
6385        if (t->init) {
6386                ret = tracer_init(t, tr);
6387                if (ret)
6388                        goto out;
6389        }
6390
6391        tr->current_trace = t;
6392        tr->current_trace->enabled++;
6393        trace_branch_enable(tr);
6394 out:
6395        mutex_unlock(&trace_types_lock);
6396
6397        return ret;
6398}
6399
6400static ssize_t
6401tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6402                        size_t cnt, loff_t *ppos)
6403{
6404        struct trace_array *tr = filp->private_data;
6405        char buf[MAX_TRACER_SIZE+1];
6406        int i;
6407        size_t ret;
6408        int err;
6409
6410        ret = cnt;
6411
6412        if (cnt > MAX_TRACER_SIZE)
6413                cnt = MAX_TRACER_SIZE;
6414
6415        if (copy_from_user(buf, ubuf, cnt))
6416                return -EFAULT;
6417
6418        buf[cnt] = 0;
6419
6420        /* strip ending whitespace. */
6421        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6422                buf[i] = 0;
6423
6424        err = tracing_set_tracer(tr, buf);
6425        if (err)
6426                return err;
6427
6428        *ppos += ret;
6429
6430        return ret;
6431}
6432
6433static ssize_t
6434tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6435                   size_t cnt, loff_t *ppos)
6436{
6437        char buf[64];
6438        int r;
6439
6440        r = snprintf(buf, sizeof(buf), "%ld\n",
6441                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6442        if (r > sizeof(buf))
6443                r = sizeof(buf);
6444        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6445}
6446
6447static ssize_t
6448tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6449                    size_t cnt, loff_t *ppos)
6450{
6451        unsigned long val;
6452        int ret;
6453
6454        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6455        if (ret)
6456                return ret;
6457
6458        *ptr = val * 1000;
6459
6460        return cnt;
6461}
6462
6463static ssize_t
6464tracing_thresh_read(struct file *filp, char __user *ubuf,
6465                    size_t cnt, loff_t *ppos)
6466{
6467        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6468}
6469
6470static ssize_t
6471tracing_thresh_write(struct file *filp, const char __user *ubuf,
6472                     size_t cnt, loff_t *ppos)
6473{
6474        struct trace_array *tr = filp->private_data;
6475        int ret;
6476
6477        mutex_lock(&trace_types_lock);
6478        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6479        if (ret < 0)
6480                goto out;
6481
6482        if (tr->current_trace->update_thresh) {
6483                ret = tr->current_trace->update_thresh(tr);
6484                if (ret < 0)
6485                        goto out;
6486        }
6487
6488        ret = cnt;
6489out:
6490        mutex_unlock(&trace_types_lock);
6491
6492        return ret;
6493}
6494
6495#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6496
6497static ssize_t
6498tracing_max_lat_read(struct file *filp, char __user *ubuf,
6499                     size_t cnt, loff_t *ppos)
6500{
6501        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6502}
6503
6504static ssize_t
6505tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6506                      size_t cnt, loff_t *ppos)
6507{
6508        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6509}
6510
6511#endif
6512
6513static int tracing_open_pipe(struct inode *inode, struct file *filp)
6514{
6515        struct trace_array *tr = inode->i_private;
6516        struct trace_iterator *iter;
6517        int ret;
6518
6519        ret = tracing_check_open_get_tr(tr);
6520        if (ret)
6521                return ret;
6522
6523        mutex_lock(&trace_types_lock);
6524
6525        /* create a buffer to store the information to pass to userspace */
6526        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6527        if (!iter) {
6528                ret = -ENOMEM;
6529                __trace_array_put(tr);
6530                goto out;
6531        }
6532
6533        trace_seq_init(&iter->seq);
6534        iter->trace = tr->current_trace;
6535
6536        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6537                ret = -ENOMEM;
6538                goto fail;
6539        }
6540
6541        /* trace pipe does not show start of buffer */
6542        cpumask_setall(iter->started);
6543
6544        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6545                iter->iter_flags |= TRACE_FILE_LAT_FMT;
6546
6547        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6548        if (trace_clocks[tr->clock_id].in_ns)
6549                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6550
6551        iter->tr = tr;
6552        iter->array_buffer = &tr->array_buffer;
6553        iter->cpu_file = tracing_get_cpu(inode);
6554        mutex_init(&iter->mutex);
6555        filp->private_data = iter;
6556
6557        if (iter->trace->pipe_open)
6558                iter->trace->pipe_open(iter);
6559
6560        nonseekable_open(inode, filp);
6561
6562        tr->trace_ref++;
6563out:
6564        mutex_unlock(&trace_types_lock);
6565        return ret;
6566
6567fail:
6568        kfree(iter);
6569        __trace_array_put(tr);
6570        mutex_unlock(&trace_types_lock);
6571        return ret;
6572}
6573
6574static int tracing_release_pipe(struct inode *inode, struct file *file)
6575{
6576        struct trace_iterator *iter = file->private_data;
6577        struct trace_array *tr = inode->i_private;
6578
6579        mutex_lock(&trace_types_lock);
6580
6581        tr->trace_ref--;
6582
6583        if (iter->trace->pipe_close)
6584                iter->trace->pipe_close(iter);
6585
6586        mutex_unlock(&trace_types_lock);
6587
6588        free_cpumask_var(iter->started);
6589        mutex_destroy(&iter->mutex);
6590        kfree(iter);
6591
6592        trace_array_put(tr);
6593
6594        return 0;
6595}
6596
6597static __poll_t
6598trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6599{
6600        struct trace_array *tr = iter->tr;
6601
6602        /* Iterators are static, they should be filled or empty */
6603        if (trace_buffer_iter(iter, iter->cpu_file))
6604                return EPOLLIN | EPOLLRDNORM;
6605
6606        if (tr->trace_flags & TRACE_ITER_BLOCK)
6607                /*
6608                 * Always select as readable when in blocking mode
6609                 */
6610                return EPOLLIN | EPOLLRDNORM;
6611        else
6612                return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6613                                             filp, poll_table);
6614}
6615
6616static __poll_t
6617tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6618{
6619        struct trace_iterator *iter = filp->private_data;
6620
6621        return trace_poll(iter, filp, poll_table);
6622}
6623
6624/* Must be called with iter->mutex held. */
6625static int tracing_wait_pipe(struct file *filp)
6626{
6627        struct trace_iterator *iter = filp->private_data;
6628        int ret;
6629
6630        while (trace_empty(iter)) {
6631
6632                if ((filp->f_flags & O_NONBLOCK)) {
6633                        return -EAGAIN;
6634                }
6635
6636                /*
6637                 * We block until we read something and tracing is disabled.
6638                 * We still block if tracing is disabled, but we have never
6639                 * read anything. This allows a user to cat this file, and
6640                 * then enable tracing. But after we have read something,
6641                 * we give an EOF when tracing is again disabled.
6642                 *
6643                 * iter->pos will be 0 if we haven't read anything.
6644                 */
6645                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6646                        break;
6647
6648                mutex_unlock(&iter->mutex);
6649
6650                ret = wait_on_pipe(iter, 0);
6651
6652                mutex_lock(&iter->mutex);
6653
6654                if (ret)
6655                        return ret;
6656        }
6657
6658        return 1;
6659}
6660
6661/*
6662 * Consumer reader.
6663 */
6664static ssize_t
6665tracing_read_pipe(struct file *filp, char __user *ubuf,
6666                  size_t cnt, loff_t *ppos)
6667{
6668        struct trace_iterator *iter = filp->private_data;
6669        ssize_t sret;
6670
6671        /*
6672         * Avoid more than one consumer on a single file descriptor
6673         * This is just a matter of traces coherency, the ring buffer itself
6674         * is protected.
6675         */
6676        mutex_lock(&iter->mutex);
6677
6678        /* return any leftover data */
6679        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6680        if (sret != -EBUSY)
6681                goto out;
6682
6683        trace_seq_init(&iter->seq);
6684
6685        if (iter->trace->read) {
6686                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6687                if (sret)
6688                        goto out;
6689        }
6690
6691waitagain:
6692        sret = tracing_wait_pipe(filp);
6693        if (sret <= 0)
6694                goto out;
6695
6696        /* stop when tracing is finished */
6697        if (trace_empty(iter)) {
6698                sret = 0;
6699                goto out;
6700        }
6701
6702        if (cnt >= PAGE_SIZE)
6703                cnt = PAGE_SIZE - 1;
6704
6705        /* reset all but tr, trace, and overruns */
6706        memset(&iter->seq, 0,
6707               sizeof(struct trace_iterator) -
6708               offsetof(struct trace_iterator, seq));
6709        cpumask_clear(iter->started);
6710        trace_seq_init(&iter->seq);
6711        iter->pos = -1;
6712
6713        trace_event_read_lock();
6714        trace_access_lock(iter->cpu_file);
6715        while (trace_find_next_entry_inc(iter) != NULL) {
6716                enum print_line_t ret;
6717                int save_len = iter->seq.seq.len;
6718
6719                ret = print_trace_line(iter);
6720                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721                        /* don't print partial lines */
6722                        iter->seq.seq.len = save_len;
6723                        break;
6724                }
6725                if (ret != TRACE_TYPE_NO_CONSUME)
6726                        trace_consume(iter);
6727
6728                if (trace_seq_used(&iter->seq) >= cnt)
6729                        break;
6730
6731                /*
6732                 * Setting the full flag means we reached the trace_seq buffer
6733                 * size and we should leave by partial output condition above.
6734                 * One of the trace_seq_* functions is not used properly.
6735                 */
6736                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6737                          iter->ent->type);
6738        }
6739        trace_access_unlock(iter->cpu_file);
6740        trace_event_read_unlock();
6741
6742        /* Now copy what we have to the user */
6743        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6744        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6745                trace_seq_init(&iter->seq);
6746
6747        /*
6748         * If there was nothing to send to user, in spite of consuming trace
6749         * entries, go back to wait for more entries.
6750         */
6751        if (sret == -EBUSY)
6752                goto waitagain;
6753
6754out:
6755        mutex_unlock(&iter->mutex);
6756
6757        return sret;
6758}
6759
6760static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6761                                     unsigned int idx)
6762{
6763        __free_page(spd->pages[idx]);
6764}
6765
6766static size_t
6767tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6768{
6769        size_t count;
6770        int save_len;
6771        int ret;
6772
6773        /* Seq buffer is page-sized, exactly what we need. */
6774        for (;;) {
6775                save_len = iter->seq.seq.len;
6776                ret = print_trace_line(iter);
6777
6778                if (trace_seq_has_overflowed(&iter->seq)) {
6779                        iter->seq.seq.len = save_len;
6780                        break;
6781                }
6782
6783                /*
6784                 * This should not be hit, because it should only
6785                 * be set if the iter->seq overflowed. But check it
6786                 * anyway to be safe.
6787                 */
6788                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789                        iter->seq.seq.len = save_len;
6790                        break;
6791                }
6792
6793                count = trace_seq_used(&iter->seq) - save_len;
6794                if (rem < count) {
6795                        rem = 0;
6796                        iter->seq.seq.len = save_len;
6797                        break;
6798                }
6799
6800                if (ret != TRACE_TYPE_NO_CONSUME)
6801                        trace_consume(iter);
6802                rem -= count;
6803                if (!trace_find_next_entry_inc(iter))   {
6804                        rem = 0;
6805                        iter->ent = NULL;
6806                        break;
6807                }
6808        }
6809
6810        return rem;
6811}
6812
6813static ssize_t tracing_splice_read_pipe(struct file *filp,
6814                                        loff_t *ppos,
6815                                        struct pipe_inode_info *pipe,
6816                                        size_t len,
6817                                        unsigned int flags)
6818{
6819        struct page *pages_def[PIPE_DEF_BUFFERS];
6820        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6821        struct trace_iterator *iter = filp->private_data;
6822        struct splice_pipe_desc spd = {
6823                .pages          = pages_def,
6824                .partial        = partial_def,
6825                .nr_pages       = 0, /* This gets updated below. */
6826                .nr_pages_max   = PIPE_DEF_BUFFERS,
6827                .ops            = &default_pipe_buf_ops,
6828                .spd_release    = tracing_spd_release_pipe,
6829        };
6830        ssize_t ret;
6831        size_t rem;
6832        unsigned int i;
6833
6834        if (splice_grow_spd(pipe, &spd))
6835                return -ENOMEM;
6836
6837        mutex_lock(&iter->mutex);
6838
6839        if (iter->trace->splice_read) {
6840                ret = iter->trace->splice_read(iter, filp,
6841                                               ppos, pipe, len, flags);
6842                if (ret)
6843                        goto out_err;
6844        }
6845
6846        ret = tracing_wait_pipe(filp);
6847        if (ret <= 0)
6848                goto out_err;
6849
6850        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6851                ret = -EFAULT;
6852                goto out_err;
6853        }
6854
6855        trace_event_read_lock();
6856        trace_access_lock(iter->cpu_file);
6857
6858        /* Fill as many pages as possible. */
6859        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6860                spd.pages[i] = alloc_page(GFP_KERNEL);
6861                if (!spd.pages[i])
6862                        break;
6863
6864                rem = tracing_fill_pipe_page(rem, iter);
6865
6866                /* Copy the data into the page, so we can start over. */
6867                ret = trace_seq_to_buffer(&iter->seq,
6868                                          page_address(spd.pages[i]),
6869                                          trace_seq_used(&iter->seq));
6870                if (ret < 0) {
6871                        __free_page(spd.pages[i]);
6872                        break;
6873                }
6874                spd.partial[i].offset = 0;
6875                spd.partial[i].len = trace_seq_used(&iter->seq);
6876
6877                trace_seq_init(&iter->seq);
6878        }
6879
6880        trace_access_unlock(iter->cpu_file);
6881        trace_event_read_unlock();
6882        mutex_unlock(&iter->mutex);
6883
6884        spd.nr_pages = i;
6885
6886        if (i)
6887                ret = splice_to_pipe(pipe, &spd);
6888        else
6889                ret = 0;
6890out:
6891        splice_shrink_spd(&spd);
6892        return ret;
6893
6894out_err:
6895        mutex_unlock(&iter->mutex);
6896        goto out;
6897}
6898
6899static ssize_t
6900tracing_entries_read(struct file *filp, char __user *ubuf,
6901                     size_t cnt, loff_t *ppos)
6902{
6903        struct inode *inode = file_inode(filp);
6904        struct trace_array *tr = inode->i_private;
6905        int cpu = tracing_get_cpu(inode);
6906        char buf[64];
6907        int r = 0;
6908        ssize_t ret;
6909
6910        mutex_lock(&trace_types_lock);
6911
6912        if (cpu == RING_BUFFER_ALL_CPUS) {
6913                int cpu, buf_size_same;
6914                unsigned long size;
6915
6916                size = 0;
6917                buf_size_same = 1;
6918                /* check if all cpu sizes are same */
6919                for_each_tracing_cpu(cpu) {
6920                        /* fill in the size from first enabled cpu */
6921                        if (size == 0)
6922                                size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6923                        if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6924                                buf_size_same = 0;
6925                                break;
6926                        }
6927                }
6928
6929                if (buf_size_same) {
6930                        if (!ring_buffer_expanded)
6931                                r = sprintf(buf, "%lu (expanded: %lu)\n",
6932                                            size >> 10,
6933                                            trace_buf_size >> 10);
6934                        else
6935                                r = sprintf(buf, "%lu\n", size >> 10);
6936                } else
6937                        r = sprintf(buf, "X\n");
6938        } else
6939                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6940
6941        mutex_unlock(&trace_types_lock);
6942
6943        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6944        return ret;
6945}
6946
6947static ssize_t
6948tracing_entries_write(struct file *filp, const char __user *ubuf,
6949                      size_t cnt, loff_t *ppos)
6950{
6951        struct inode *inode = file_inode(filp);
6952        struct trace_array *tr = inode->i_private;
6953        unsigned long val;
6954        int ret;
6955
6956        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6957        if (ret)
6958                return ret;
6959
6960        /* must have at least 1 entry */
6961        if (!val)
6962                return -EINVAL;
6963
6964        /* value is in KB */
6965        val <<= 10;
6966        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6967        if (ret < 0)
6968                return ret;
6969
6970        *ppos += cnt;
6971
6972        return cnt;
6973}
6974
6975static ssize_t
6976tracing_total_entries_read(struct file *filp, char __user *ubuf,
6977                                size_t cnt, loff_t *ppos)
6978{
6979        struct trace_array *tr = filp->private_data;
6980        char buf[64];
6981        int r, cpu;
6982        unsigned long size = 0, expanded_size = 0;
6983
6984        mutex_lock(&trace_types_lock);
6985        for_each_tracing_cpu(cpu) {
6986                size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6987                if (!ring_buffer_expanded)
6988                        expanded_size += trace_buf_size >> 10;
6989        }
6990        if (ring_buffer_expanded)
6991                r = sprintf(buf, "%lu\n", size);
6992        else
6993                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6994        mutex_unlock(&trace_types_lock);
6995
6996        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6997}
6998
6999static ssize_t
7000tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7001                          size_t cnt, loff_t *ppos)
7002{
7003        /*
7004         * There is no need to read what the user has written, this function
7005         * is just to make sure that there is no error when "echo" is used
7006         */
7007
7008        *ppos += cnt;
7009
7010        return cnt;
7011}
7012
7013static int
7014tracing_free_buffer_release(struct inode *inode, struct file *filp)
7015{
7016        struct trace_array *tr = inode->i_private;
7017
7018        /* disable tracing ? */
7019        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7020                tracer_tracing_off(tr);
7021        /* resize the ring buffer to 0 */
7022        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7023
7024        trace_array_put(tr);
7025
7026        return 0;
7027}
7028
7029static ssize_t
7030tracing_mark_write(struct file *filp, const char __user *ubuf,
7031                                        size_t cnt, loff_t *fpos)
7032{
7033        struct trace_array *tr = filp->private_data;
7034        struct ring_buffer_event *event;
7035        enum event_trigger_type tt = ETT_NONE;
7036        struct trace_buffer *buffer;
7037        struct print_entry *entry;
7038        ssize_t written;
7039        int size;
7040        int len;
7041
7042/* Used in tracing_mark_raw_write() as well */
7043#define FAULTED_STR "<faulted>"
7044#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7045
7046        if (tracing_disabled)
7047                return -EINVAL;
7048
7049        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7050                return -EINVAL;
7051
7052        if (cnt > TRACE_BUF_SIZE)
7053                cnt = TRACE_BUF_SIZE;
7054
7055        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7056
7057        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7058
7059        /* If less than "<faulted>", then make sure we can still add that */
7060        if (cnt < FAULTED_SIZE)
7061                size += FAULTED_SIZE - cnt;
7062
7063        buffer = tr->array_buffer.buffer;
7064        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7065                                            tracing_gen_ctx());
7066        if (unlikely(!event))
7067                /* Ring buffer disabled, return as if not open for write */
7068                return -EBADF;
7069
7070        entry = ring_buffer_event_data(event);
7071        entry->ip = _THIS_IP_;
7072
7073        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7074        if (len) {
7075                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7076                cnt = FAULTED_SIZE;
7077                written = -EFAULT;
7078        } else
7079                written = cnt;
7080
7081        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7082                /* do not add \n before testing triggers, but add \0 */
7083                entry->buf[cnt] = '\0';
7084                tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7085        }
7086
7087        if (entry->buf[cnt - 1] != '\n') {
7088                entry->buf[cnt] = '\n';
7089                entry->buf[cnt + 1] = '\0';
7090        } else
7091                entry->buf[cnt] = '\0';
7092
7093        if (static_branch_unlikely(&trace_marker_exports_enabled))
7094                ftrace_exports(event, TRACE_EXPORT_MARKER);
7095        __buffer_unlock_commit(buffer, event);
7096
7097        if (tt)
7098                event_triggers_post_call(tr->trace_marker_file, tt);
7099
7100        if (written > 0)
7101                *fpos += written;
7102
7103        return written;
7104}
7105
7106/* Limit it for now to 3K (including tag) */
7107#define RAW_DATA_MAX_SIZE (1024*3)
7108
7109static ssize_t
7110tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7111                                        size_t cnt, loff_t *fpos)
7112{
7113        struct trace_array *tr = filp->private_data;
7114        struct ring_buffer_event *event;
7115        struct trace_buffer *buffer;
7116        struct raw_data_entry *entry;
7117        ssize_t written;
7118        int size;
7119        int len;
7120
7121#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7122
7123        if (tracing_disabled)
7124                return -EINVAL;
7125
7126        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7127                return -EINVAL;
7128
7129        /* The marker must at least have a tag id */
7130        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7131                return -EINVAL;
7132
7133        if (cnt > TRACE_BUF_SIZE)
7134                cnt = TRACE_BUF_SIZE;
7135
7136        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7137
7138        size = sizeof(*entry) + cnt;
7139        if (cnt < FAULT_SIZE_ID)
7140                size += FAULT_SIZE_ID - cnt;
7141
7142        buffer = tr->array_buffer.buffer;
7143        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7144                                            tracing_gen_ctx());
7145        if (!event)
7146                /* Ring buffer disabled, return as if not open for write */
7147                return -EBADF;
7148
7149        entry = ring_buffer_event_data(event);
7150
7151        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7152        if (len) {
7153                entry->id = -1;
7154                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7155                written = -EFAULT;
7156        } else
7157                written = cnt;
7158
7159        __buffer_unlock_commit(buffer, event);
7160
7161        if (written > 0)
7162                *fpos += written;
7163
7164        return written;
7165}
7166
7167static int tracing_clock_show(struct seq_file *m, void *v)
7168{
7169        struct trace_array *tr = m->private;
7170        int i;
7171
7172        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7173                seq_printf(m,
7174                        "%s%s%s%s", i ? " " : "",
7175                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7176                        i == tr->clock_id ? "]" : "");
7177        seq_putc(m, '\n');
7178
7179        return 0;
7180}
7181
7182int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7183{
7184        int i;
7185
7186        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7187                if (strcmp(trace_clocks[i].name, clockstr) == 0)
7188                        break;
7189        }
7190        if (i == ARRAY_SIZE(trace_clocks))
7191                return -EINVAL;
7192
7193        mutex_lock(&trace_types_lock);
7194
7195        tr->clock_id = i;
7196
7197        ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7198
7199        /*
7200         * New clock may not be consistent with the previous clock.
7201         * Reset the buffer so that it doesn't have incomparable timestamps.
7202         */
7203        tracing_reset_online_cpus(&tr->array_buffer);
7204
7205#ifdef CONFIG_TRACER_MAX_TRACE
7206        if (tr->max_buffer.buffer)
7207                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7208        tracing_reset_online_cpus(&tr->max_buffer);
7209#endif
7210
7211        mutex_unlock(&trace_types_lock);
7212
7213        return 0;
7214}
7215
7216static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7217                                   size_t cnt, loff_t *fpos)
7218{
7219        struct seq_file *m = filp->private_data;
7220        struct trace_array *tr = m->private;
7221        char buf[64];
7222        const char *clockstr;
7223        int ret;
7224
7225        if (cnt >= sizeof(buf))
7226                return -EINVAL;
7227
7228        if (copy_from_user(buf, ubuf, cnt))
7229                return -EFAULT;
7230
7231        buf[cnt] = 0;
7232
7233        clockstr = strstrip(buf);
7234
7235        ret = tracing_set_clock(tr, clockstr);
7236        if (ret)
7237                return ret;
7238
7239        *fpos += cnt;
7240
7241        return cnt;
7242}
7243
7244static int tracing_clock_open(struct inode *inode, struct file *file)
7245{
7246        struct trace_array *tr = inode->i_private;
7247        int ret;
7248
7249        ret = tracing_check_open_get_tr(tr);
7250        if (ret)
7251                return ret;
7252
7253        ret = single_open(file, tracing_clock_show, inode->i_private);
7254        if (ret < 0)
7255                trace_array_put(tr);
7256
7257        return ret;
7258}
7259
7260static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7261{
7262        struct trace_array *tr = m->private;
7263
7264        mutex_lock(&trace_types_lock);
7265
7266        if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7267                seq_puts(m, "delta [absolute]\n");
7268        else
7269                seq_puts(m, "[delta] absolute\n");
7270
7271        mutex_unlock(&trace_types_lock);
7272
7273        return 0;
7274}
7275
7276static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7277{
7278        struct trace_array *tr = inode->i_private;
7279        int ret;
7280
7281        ret = tracing_check_open_get_tr(tr);
7282        if (ret)
7283                return ret;
7284
7285        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7286        if (ret < 0)
7287                trace_array_put(tr);
7288
7289        return ret;
7290}
7291
7292u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7293{
7294        if (rbe == this_cpu_read(trace_buffered_event))
7295                return ring_buffer_time_stamp(buffer);
7296
7297        return ring_buffer_event_time_stamp(buffer, rbe);
7298}
7299
7300/*
7301 * Set or disable using the per CPU trace_buffer_event when possible.
7302 */
7303int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7304{
7305        int ret = 0;
7306
7307        mutex_lock(&trace_types_lock);
7308
7309        if (set && tr->no_filter_buffering_ref++)
7310                goto out;
7311
7312        if (!set) {
7313                if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7314                        ret = -EINVAL;
7315                        goto out;
7316                }
7317
7318                --tr->no_filter_buffering_ref;
7319        }
7320 out:
7321        mutex_unlock(&trace_types_lock);
7322
7323        return ret;
7324}
7325
7326struct ftrace_buffer_info {
7327        struct trace_iterator   iter;
7328        void                    *spare;
7329        unsigned int            spare_cpu;
7330        unsigned int            read;
7331};
7332
7333#ifdef CONFIG_TRACER_SNAPSHOT
7334static int tracing_snapshot_open(struct inode *inode, struct file *file)
7335{
7336        struct trace_array *tr = inode->i_private;
7337        struct trace_iterator *iter;
7338        struct seq_file *m;
7339        int ret;
7340
7341        ret = tracing_check_open_get_tr(tr);
7342        if (ret)
7343                return ret;
7344
7345        if (file->f_mode & FMODE_READ) {
7346                iter = __tracing_open(inode, file, true);
7347                if (IS_ERR(iter))
7348                        ret = PTR_ERR(iter);
7349        } else {
7350                /* Writes still need the seq_file to hold the private data */
7351                ret = -ENOMEM;
7352                m = kzalloc(sizeof(*m), GFP_KERNEL);
7353                if (!m)
7354                        goto out;
7355                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7356                if (!iter) {
7357                        kfree(m);
7358                        goto out;
7359                }
7360                ret = 0;
7361
7362                iter->tr = tr;
7363                iter->array_buffer = &tr->max_buffer;
7364                iter->cpu_file = tracing_get_cpu(inode);
7365                m->private = iter;
7366                file->private_data = m;
7367        }
7368out:
7369        if (ret < 0)
7370                trace_array_put(tr);
7371
7372        return ret;
7373}
7374
7375static ssize_t
7376tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7377                       loff_t *ppos)
7378{
7379        struct seq_file *m = filp->private_data;
7380        struct trace_iterator *iter = m->private;
7381        struct trace_array *tr = iter->tr;
7382        unsigned long val;
7383        int ret;
7384
7385        ret = tracing_update_buffers();
7386        if (ret < 0)
7387                return ret;
7388
7389        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7390        if (ret)
7391                return ret;
7392
7393        mutex_lock(&trace_types_lock);
7394
7395        if (tr->current_trace->use_max_tr) {
7396                ret = -EBUSY;
7397                goto out;
7398        }
7399
7400        arch_spin_lock(&tr->max_lock);
7401        if (tr->cond_snapshot)
7402                ret = -EBUSY;
7403        arch_spin_unlock(&tr->max_lock);
7404        if (ret)
7405                goto out;
7406
7407        switch (val) {
7408        case 0:
7409                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7410                        ret = -EINVAL;
7411                        break;
7412                }
7413                if (tr->allocated_snapshot)
7414                        free_snapshot(tr);
7415                break;
7416        case 1:
7417/* Only allow per-cpu swap if the ring buffer supports it */
7418#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7419                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7420                        ret = -EINVAL;
7421                        break;
7422                }
7423#endif
7424                if (tr->allocated_snapshot)
7425                        ret = resize_buffer_duplicate_size(&tr->max_buffer,
7426                                        &tr->array_buffer, iter->cpu_file);
7427                else
7428                        ret = tracing_alloc_snapshot_instance(tr);
7429                if (ret < 0)
7430                        break;
7431                local_irq_disable();
7432                /* Now, we're going to swap */
7433                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7434                        update_max_tr(tr, current, smp_processor_id(), NULL);
7435                else
7436                        update_max_tr_single(tr, current, iter->cpu_file);
7437                local_irq_enable();
7438                break;
7439        default:
7440                if (tr->allocated_snapshot) {
7441                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7442                                tracing_reset_online_cpus(&tr->max_buffer);
7443                        else
7444                                tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7445                }
7446                break;
7447        }
7448
7449        if (ret >= 0) {
7450                *ppos += cnt;
7451                ret = cnt;
7452        }
7453out:
7454        mutex_unlock(&trace_types_lock);
7455        return ret;
7456}
7457
7458static int tracing_snapshot_release(struct inode *inode, struct file *file)
7459{
7460        struct seq_file *m = file->private_data;
7461        int ret;
7462
7463        ret = tracing_release(inode, file);
7464
7465        if (file->f_mode & FMODE_READ)
7466                return ret;
7467
7468        /* If write only, the seq_file is just a stub */
7469        if (m)
7470                kfree(m->private);
7471        kfree(m);
7472
7473        return 0;
7474}
7475
7476static int tracing_buffers_open(struct inode *inode, struct file *filp);
7477static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7478                                    size_t count, loff_t *ppos);
7479static int tracing_buffers_release(struct inode *inode, struct file *file);
7480static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7481                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7482
7483static int snapshot_raw_open(struct inode *inode, struct file *filp)
7484{
7485        struct ftrace_buffer_info *info;
7486        int ret;
7487
7488        /* The following checks for tracefs lockdown */
7489        ret = tracing_buffers_open(inode, filp);
7490        if (ret < 0)
7491                return ret;
7492
7493        info = filp->private_data;
7494
7495        if (info->iter.trace->use_max_tr) {
7496                tracing_buffers_release(inode, filp);
7497                return -EBUSY;
7498        }
7499
7500        info->iter.snapshot = true;
7501        info->iter.array_buffer = &info->iter.tr->max_buffer;
7502
7503        return ret;
7504}
7505
7506#endif /* CONFIG_TRACER_SNAPSHOT */
7507
7508
7509static const struct file_operations tracing_thresh_fops = {
7510        .open           = tracing_open_generic,
7511        .read           = tracing_thresh_read,
7512        .write          = tracing_thresh_write,
7513        .llseek         = generic_file_llseek,
7514};
7515
7516#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7517static const struct file_operations tracing_max_lat_fops = {
7518        .open           = tracing_open_generic,
7519        .read           = tracing_max_lat_read,
7520        .write          = tracing_max_lat_write,
7521        .llseek         = generic_file_llseek,
7522};
7523#endif
7524
7525static const struct file_operations set_tracer_fops = {
7526        .open           = tracing_open_generic,
7527        .read           = tracing_set_trace_read,
7528        .write          = tracing_set_trace_write,
7529        .llseek         = generic_file_llseek,
7530};
7531
7532static const struct file_operations tracing_pipe_fops = {
7533        .open           = tracing_open_pipe,
7534        .poll           = tracing_poll_pipe,
7535        .read           = tracing_read_pipe,
7536        .splice_read    = tracing_splice_read_pipe,
7537        .release        = tracing_release_pipe,
7538        .llseek         = no_llseek,
7539};
7540
7541static const struct file_operations tracing_entries_fops = {
7542        .open           = tracing_open_generic_tr,
7543        .read           = tracing_entries_read,
7544        .write          = tracing_entries_write,
7545        .llseek         = generic_file_llseek,
7546        .release        = tracing_release_generic_tr,
7547};
7548
7549static const struct file_operations tracing_total_entries_fops = {
7550        .open           = tracing_open_generic_tr,
7551        .read           = tracing_total_entries_read,
7552        .llseek         = generic_file_llseek,
7553        .release        = tracing_release_generic_tr,
7554};
7555
7556static const struct file_operations tracing_free_buffer_fops = {
7557        .open           = tracing_open_generic_tr,
7558        .write          = tracing_free_buffer_write,
7559        .release        = tracing_free_buffer_release,
7560};
7561
7562static const struct file_operations tracing_mark_fops = {
7563        .open           = tracing_open_generic_tr,
7564        .write          = tracing_mark_write,
7565        .llseek         = generic_file_llseek,
7566        .release        = tracing_release_generic_tr,
7567};
7568
7569static const struct file_operations tracing_mark_raw_fops = {
7570        .open           = tracing_open_generic_tr,
7571        .write          = tracing_mark_raw_write,
7572        .llseek         = generic_file_llseek,
7573        .release        = tracing_release_generic_tr,
7574};
7575
7576static const struct file_operations trace_clock_fops = {
7577        .open           = tracing_clock_open,
7578        .read           = seq_read,
7579        .llseek         = seq_lseek,
7580        .release        = tracing_single_release_tr,
7581        .write          = tracing_clock_write,
7582};
7583
7584static const struct file_operations trace_time_stamp_mode_fops = {
7585        .open           = tracing_time_stamp_mode_open,
7586        .read           = seq_read,
7587        .llseek         = seq_lseek,
7588        .release        = tracing_single_release_tr,
7589};
7590
7591#ifdef CONFIG_TRACER_SNAPSHOT
7592static const struct file_operations snapshot_fops = {
7593        .open           = tracing_snapshot_open,
7594        .read           = seq_read,
7595        .write          = tracing_snapshot_write,
7596        .llseek         = tracing_lseek,
7597        .release        = tracing_snapshot_release,
7598};
7599
7600static const struct file_operations snapshot_raw_fops = {
7601        .open           = snapshot_raw_open,
7602        .read           = tracing_buffers_read,
7603        .release        = tracing_buffers_release,
7604        .splice_read    = tracing_buffers_splice_read,
7605        .llseek         = no_llseek,
7606};
7607
7608#endif /* CONFIG_TRACER_SNAPSHOT */
7609
7610/*
7611 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7612 * @filp: The active open file structure
7613 * @ubuf: The userspace provided buffer to read value into
7614 * @cnt: The maximum number of bytes to read
7615 * @ppos: The current "file" position
7616 *
7617 * This function implements the write interface for a struct trace_min_max_param.
7618 * The filp->private_data must point to a trace_min_max_param structure that
7619 * defines where to write the value, the min and the max acceptable values,
7620 * and a lock to protect the write.
7621 */
7622static ssize_t
7623trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7624{
7625        struct trace_min_max_param *param = filp->private_data;
7626        u64 val;
7627        int err;
7628
7629        if (!param)
7630                return -EFAULT;
7631
7632        err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7633        if (err)
7634                return err;
7635
7636        if (param->lock)
7637                mutex_lock(param->lock);
7638
7639        if (param->min && val < *param->min)
7640                err = -EINVAL;
7641
7642        if (param->max && val > *param->max)
7643                err = -EINVAL;
7644
7645        if (!err)
7646                *param->val = val;
7647
7648        if (param->lock)
7649                mutex_unlock(param->lock);
7650
7651        if (err)
7652                return err;
7653
7654        return cnt;
7655}
7656
7657/*
7658 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7659 * @filp: The active open file structure
7660 * @ubuf: The userspace provided buffer to read value into
7661 * @cnt: The maximum number of bytes to read
7662 * @ppos: The current "file" position
7663 *
7664 * This function implements the read interface for a struct trace_min_max_param.
7665 * The filp->private_data must point to a trace_min_max_param struct with valid
7666 * data.
7667 */
7668static ssize_t
7669trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7670{
7671        struct trace_min_max_param *param = filp->private_data;
7672        char buf[U64_STR_SIZE];
7673        int len;
7674        u64 val;
7675
7676        if (!param)
7677                return -EFAULT;
7678
7679        val = *param->val;
7680
7681        if (cnt > sizeof(buf))
7682                cnt = sizeof(buf);
7683
7684        len = snprintf(buf, sizeof(buf), "%llu\n", val);
7685
7686        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7687}
7688
7689const struct file_operations trace_min_max_fops = {
7690        .open           = tracing_open_generic,
7691        .read           = trace_min_max_read,
7692        .write          = trace_min_max_write,
7693};
7694
7695#define TRACING_LOG_ERRS_MAX    8
7696#define TRACING_LOG_LOC_MAX     128
7697
7698#define CMD_PREFIX "  Command: "
7699
7700struct err_info {
7701        const char      **errs; /* ptr to loc-specific array of err strings */
7702        u8              type;   /* index into errs -> specific err string */
7703        u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7704        u64             ts;
7705};
7706
7707struct tracing_log_err {
7708        struct list_head        list;
7709        struct err_info         info;
7710        char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7711        char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7712};
7713
7714static DEFINE_MUTEX(tracing_err_log_lock);
7715
7716static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7717{
7718        struct tracing_log_err *err;
7719
7720        if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7721                err = kzalloc(sizeof(*err), GFP_KERNEL);
7722                if (!err)
7723                        err = ERR_PTR(-ENOMEM);
7724                tr->n_err_log_entries++;
7725
7726                return err;
7727        }
7728
7729        err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7730        list_del(&err->list);
7731
7732        return err;
7733}
7734
7735/**
7736 * err_pos - find the position of a string within a command for error careting
7737 * @cmd: The tracing command that caused the error
7738 * @str: The string to position the caret at within @cmd
7739 *
7740 * Finds the position of the first occurrence of @str within @cmd.  The
7741 * return value can be passed to tracing_log_err() for caret placement
7742 * within @cmd.
7743 *
7744 * Returns the index within @cmd of the first occurrence of @str or 0
7745 * if @str was not found.
7746 */
7747unsigned int err_pos(char *cmd, const char *str)
7748{
7749        char *found;
7750
7751        if (WARN_ON(!strlen(cmd)))
7752                return 0;
7753
7754        found = strstr(cmd, str);
7755        if (found)
7756                return found - cmd;
7757
7758        return 0;
7759}
7760
7761/**
7762 * tracing_log_err - write an error to the tracing error log
7763 * @tr: The associated trace array for the error (NULL for top level array)
7764 * @loc: A string describing where the error occurred
7765 * @cmd: The tracing command that caused the error
7766 * @errs: The array of loc-specific static error strings
7767 * @type: The index into errs[], which produces the specific static err string
7768 * @pos: The position the caret should be placed in the cmd
7769 *
7770 * Writes an error into tracing/error_log of the form:
7771 *
7772 * <loc>: error: <text>
7773 *   Command: <cmd>
7774 *              ^
7775 *
7776 * tracing/error_log is a small log file containing the last
7777 * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7778 * unless there has been a tracing error, and the error log can be
7779 * cleared and have its memory freed by writing the empty string in
7780 * truncation mode to it i.e. echo > tracing/error_log.
7781 *
7782 * NOTE: the @errs array along with the @type param are used to
7783 * produce a static error string - this string is not copied and saved
7784 * when the error is logged - only a pointer to it is saved.  See
7785 * existing callers for examples of how static strings are typically
7786 * defined for use with tracing_log_err().
7787 */
7788void tracing_log_err(struct trace_array *tr,
7789                     const char *loc, const char *cmd,
7790                     const char **errs, u8 type, u8 pos)
7791{
7792        struct tracing_log_err *err;
7793
7794        if (!tr)
7795                tr = &global_trace;
7796
7797        mutex_lock(&tracing_err_log_lock);
7798        err = get_tracing_log_err(tr);
7799        if (PTR_ERR(err) == -ENOMEM) {
7800                mutex_unlock(&tracing_err_log_lock);
7801                return;
7802        }
7803
7804        snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7805        snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7806
7807        err->info.errs = errs;
7808        err->info.type = type;
7809        err->info.pos = pos;
7810        err->info.ts = local_clock();
7811
7812        list_add_tail(&err->list, &tr->err_log);
7813        mutex_unlock(&tracing_err_log_lock);
7814}
7815
7816static void clear_tracing_err_log(struct trace_array *tr)
7817{
7818        struct tracing_log_err *err, *next;
7819
7820        mutex_lock(&tracing_err_log_lock);
7821        list_for_each_entry_safe(err, next, &tr->err_log, list) {
7822                list_del(&err->list);
7823                kfree(err);
7824        }
7825
7826        tr->n_err_log_entries = 0;
7827        mutex_unlock(&tracing_err_log_lock);
7828}
7829
7830static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7831{
7832        struct trace_array *tr = m->private;
7833
7834        mutex_lock(&tracing_err_log_lock);
7835
7836        return seq_list_start(&tr->err_log, *pos);
7837}
7838
7839static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7840{
7841        struct trace_array *tr = m->private;
7842
7843        return seq_list_next(v, &tr->err_log, pos);
7844}
7845
7846static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7847{
7848        mutex_unlock(&tracing_err_log_lock);
7849}
7850
7851static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7852{
7853        u8 i;
7854
7855        for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7856                seq_putc(m, ' ');
7857        for (i = 0; i < pos; i++)
7858                seq_putc(m, ' ');
7859        seq_puts(m, "^\n");
7860}
7861
7862static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7863{
7864        struct tracing_log_err *err = v;
7865
7866        if (err) {
7867                const char *err_text = err->info.errs[err->info.type];
7868                u64 sec = err->info.ts;
7869                u32 nsec;
7870
7871                nsec = do_div(sec, NSEC_PER_SEC);
7872                seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7873                           err->loc, err_text);
7874                seq_printf(m, "%s", err->cmd);
7875                tracing_err_log_show_pos(m, err->info.pos);
7876        }
7877
7878        return 0;
7879}
7880
7881static const struct seq_operations tracing_err_log_seq_ops = {
7882        .start  = tracing_err_log_seq_start,
7883        .next   = tracing_err_log_seq_next,
7884        .stop   = tracing_err_log_seq_stop,
7885        .show   = tracing_err_log_seq_show
7886};
7887
7888static int tracing_err_log_open(struct inode *inode, struct file *file)
7889{
7890        struct trace_array *tr = inode->i_private;
7891        int ret = 0;
7892
7893        ret = tracing_check_open_get_tr(tr);
7894        if (ret)
7895                return ret;
7896
7897        /* If this file was opened for write, then erase contents */
7898        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7899                clear_tracing_err_log(tr);
7900
7901        if (file->f_mode & FMODE_READ) {
7902                ret = seq_open(file, &tracing_err_log_seq_ops);
7903                if (!ret) {
7904                        struct seq_file *m = file->private_data;
7905                        m->private = tr;
7906                } else {
7907                        trace_array_put(tr);
7908                }
7909        }
7910        return ret;
7911}
7912
7913static ssize_t tracing_err_log_write(struct file *file,
7914                                     const char __user *buffer,
7915                                     size_t count, loff_t *ppos)
7916{
7917        return count;
7918}
7919
7920static int tracing_err_log_release(struct inode *inode, struct file *file)
7921{
7922        struct trace_array *tr = inode->i_private;
7923
7924        trace_array_put(tr);
7925
7926        if (file->f_mode & FMODE_READ)
7927                seq_release(inode, file);
7928
7929        return 0;
7930}
7931
7932static const struct file_operations tracing_err_log_fops = {
7933        .open           = tracing_err_log_open,
7934        .write          = tracing_err_log_write,
7935        .read           = seq_read,
7936        .llseek         = seq_lseek,
7937        .release        = tracing_err_log_release,
7938};
7939
7940static int tracing_buffers_open(struct inode *inode, struct file *filp)
7941{
7942        struct trace_array *tr = inode->i_private;
7943        struct ftrace_buffer_info *info;
7944        int ret;
7945
7946        ret = tracing_check_open_get_tr(tr);
7947        if (ret)
7948                return ret;
7949
7950        info = kvzalloc(sizeof(*info), GFP_KERNEL);
7951        if (!info) {
7952                trace_array_put(tr);
7953                return -ENOMEM;
7954        }
7955
7956        mutex_lock(&trace_types_lock);
7957
7958        info->iter.tr           = tr;
7959        info->iter.cpu_file     = tracing_get_cpu(inode);
7960        info->iter.trace        = tr->current_trace;
7961        info->iter.array_buffer = &tr->array_buffer;
7962        info->spare             = NULL;
7963        /* Force reading ring buffer for first read */
7964        info->read              = (unsigned int)-1;
7965
7966        filp->private_data = info;
7967
7968        tr->trace_ref++;
7969
7970        mutex_unlock(&trace_types_lock);
7971
7972        ret = nonseekable_open(inode, filp);
7973        if (ret < 0)
7974                trace_array_put(tr);
7975
7976        return ret;
7977}
7978
7979static __poll_t
7980tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7981{
7982        struct ftrace_buffer_info *info = filp->private_data;
7983        struct trace_iterator *iter = &info->iter;
7984
7985        return trace_poll(iter, filp, poll_table);
7986}
7987
7988static ssize_t
7989tracing_buffers_read(struct file *filp, char __user *ubuf,
7990                     size_t count, loff_t *ppos)
7991{
7992        struct ftrace_buffer_info *info = filp->private_data;
7993        struct trace_iterator *iter = &info->iter;
7994        ssize_t ret = 0;
7995        ssize_t size;
7996
7997        if (!count)
7998                return 0;
7999
8000#ifdef CONFIG_TRACER_MAX_TRACE
8001        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8002                return -EBUSY;
8003#endif
8004
8005        if (!info->spare) {
8006                info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8007                                                          iter->cpu_file);
8008                if (IS_ERR(info->spare)) {
8009                        ret = PTR_ERR(info->spare);
8010                        info->spare = NULL;
8011                } else {
8012                        info->spare_cpu = iter->cpu_file;
8013                }
8014        }
8015        if (!info->spare)
8016                return ret;
8017
8018        /* Do we have previous read data to read? */
8019        if (info->read < PAGE_SIZE)
8020                goto read;
8021
8022 again:
8023        trace_access_lock(iter->cpu_file);
8024        ret = ring_buffer_read_page(iter->array_buffer->buffer,
8025                                    &info->spare,
8026                                    count,
8027                                    iter->cpu_file, 0);
8028        trace_access_unlock(iter->cpu_file);
8029
8030        if (ret < 0) {
8031                if (trace_empty(iter)) {
8032                        if ((filp->f_flags & O_NONBLOCK))
8033                                return -EAGAIN;
8034
8035                        ret = wait_on_pipe(iter, 0);
8036                        if (ret)
8037                                return ret;
8038
8039                        goto again;
8040                }
8041                return 0;
8042        }
8043
8044        info->read = 0;
8045 read:
8046        size = PAGE_SIZE - info->read;
8047        if (size > count)
8048                size = count;
8049
8050        ret = copy_to_user(ubuf, info->spare + info->read, size);
8051        if (ret == size)
8052                return -EFAULT;
8053
8054        size -= ret;
8055
8056        *ppos += size;
8057        info->read += size;
8058
8059        return size;
8060}
8061
8062static int tracing_buffers_release(struct inode *inode, struct file *file)
8063{
8064        struct ftrace_buffer_info *info = file->private_data;
8065        struct trace_iterator *iter = &info->iter;
8066
8067        mutex_lock(&trace_types_lock);
8068
8069        iter->tr->trace_ref--;
8070
8071        __trace_array_put(iter->tr);
8072
8073        if (info->spare)
8074                ring_buffer_free_read_page(iter->array_buffer->buffer,
8075                                           info->spare_cpu, info->spare);
8076        kvfree(info);
8077
8078        mutex_unlock(&trace_types_lock);
8079
8080        return 0;
8081}
8082
8083struct buffer_ref {
8084        struct trace_buffer     *buffer;
8085        void                    *page;
8086        int                     cpu;
8087        refcount_t              refcount;
8088};
8089
8090static void buffer_ref_release(struct buffer_ref *ref)
8091{
8092        if (!refcount_dec_and_test(&ref->refcount))
8093                return;
8094        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8095        kfree(ref);
8096}
8097
8098static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8099                                    struct pipe_buffer *buf)
8100{
8101        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8102
8103        buffer_ref_release(ref);
8104        buf->private = 0;
8105}
8106
8107static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8108                                struct pipe_buffer *buf)
8109{
8110        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8111
8112        if (refcount_read(&ref->refcount) > INT_MAX/2)
8113                return false;
8114
8115        refcount_inc(&ref->refcount);
8116        return true;
8117}
8118
8119/* Pipe buffer operations for a buffer. */
8120static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8121        .release                = buffer_pipe_buf_release,
8122        .get                    = buffer_pipe_buf_get,
8123};
8124
8125/*
8126 * Callback from splice_to_pipe(), if we need to release some pages
8127 * at the end of the spd in case we error'ed out in filling the pipe.
8128 */
8129static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8130{
8131        struct buffer_ref *ref =
8132                (struct buffer_ref *)spd->partial[i].private;
8133
8134        buffer_ref_release(ref);
8135        spd->partial[i].private = 0;
8136}
8137
8138static ssize_t
8139tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8140                            struct pipe_inode_info *pipe, size_t len,
8141                            unsigned int flags)
8142{
8143        struct ftrace_buffer_info *info = file->private_data;
8144        struct trace_iterator *iter = &info->iter;
8145        struct partial_page partial_def[PIPE_DEF_BUFFERS];
8146        struct page *pages_def[PIPE_DEF_BUFFERS];
8147        struct splice_pipe_desc spd = {
8148                .pages          = pages_def,
8149                .partial        = partial_def,
8150                .nr_pages_max   = PIPE_DEF_BUFFERS,
8151                .ops            = &buffer_pipe_buf_ops,
8152                .spd_release    = buffer_spd_release,
8153        };
8154        struct buffer_ref *ref;
8155        int entries, i;
8156        ssize_t ret = 0;
8157
8158#ifdef CONFIG_TRACER_MAX_TRACE
8159        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8160                return -EBUSY;
8161#endif
8162
8163        if (*ppos & (PAGE_SIZE - 1))
8164                return -EINVAL;
8165
8166        if (len & (PAGE_SIZE - 1)) {
8167                if (len < PAGE_SIZE)
8168                        return -EINVAL;
8169                len &= PAGE_MASK;
8170        }
8171
8172        if (splice_grow_spd(pipe, &spd))
8173                return -ENOMEM;
8174
8175 again:
8176        trace_access_lock(iter->cpu_file);
8177        entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8178
8179        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8180                struct page *page;
8181                int r;
8182
8183                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8184                if (!ref) {
8185                        ret = -ENOMEM;
8186                        break;
8187                }
8188
8189                refcount_set(&ref->refcount, 1);
8190                ref->buffer = iter->array_buffer->buffer;
8191                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8192                if (IS_ERR(ref->page)) {
8193                        ret = PTR_ERR(ref->page);
8194                        ref->page = NULL;
8195                        kfree(ref);
8196                        break;
8197                }
8198                ref->cpu = iter->cpu_file;
8199
8200                r = ring_buffer_read_page(ref->buffer, &ref->page,
8201                                          len, iter->cpu_file, 1);
8202                if (r < 0) {
8203                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
8204                                                   ref->page);
8205                        kfree(ref);
8206                        break;
8207                }
8208
8209                page = virt_to_page(ref->page);
8210
8211                spd.pages[i] = page;
8212                spd.partial[i].len = PAGE_SIZE;
8213                spd.partial[i].offset = 0;
8214                spd.partial[i].private = (unsigned long)ref;
8215                spd.nr_pages++;
8216                *ppos += PAGE_SIZE;
8217
8218                entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8219        }
8220
8221        trace_access_unlock(iter->cpu_file);
8222        spd.nr_pages = i;
8223
8224        /* did we read anything? */
8225        if (!spd.nr_pages) {
8226                if (ret)
8227                        goto out;
8228
8229                ret = -EAGAIN;
8230                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8231                        goto out;
8232
8233                ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8234                if (ret)
8235                        goto out;
8236
8237                goto again;
8238        }
8239
8240        ret = splice_to_pipe(pipe, &spd);
8241out:
8242        splice_shrink_spd(&spd);
8243
8244        return ret;
8245}
8246
8247static const struct file_operations tracing_buffers_fops = {
8248        .open           = tracing_buffers_open,
8249        .read           = tracing_buffers_read,
8250        .poll           = tracing_buffers_poll,
8251        .release        = tracing_buffers_release,
8252        .splice_read    = tracing_buffers_splice_read,
8253        .llseek         = no_llseek,
8254};
8255
8256static ssize_t
8257tracing_stats_read(struct file *filp, char __user *ubuf,
8258                   size_t count, loff_t *ppos)
8259{
8260        struct inode *inode = file_inode(filp);
8261        struct trace_array *tr = inode->i_private;
8262        struct array_buffer *trace_buf = &tr->array_buffer;
8263        int cpu = tracing_get_cpu(inode);
8264        struct trace_seq *s;
8265        unsigned long cnt;
8266        unsigned long long t;
8267        unsigned long usec_rem;
8268
8269        s = kmalloc(sizeof(*s), GFP_KERNEL);
8270        if (!s)
8271                return -ENOMEM;
8272
8273        trace_seq_init(s);
8274
8275        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8276        trace_seq_printf(s, "entries: %ld\n", cnt);
8277
8278        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8279        trace_seq_printf(s, "overrun: %ld\n", cnt);
8280
8281        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8282        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8283
8284        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8285        trace_seq_printf(s, "bytes: %ld\n", cnt);
8286
8287        if (trace_clocks[tr->clock_id].in_ns) {
8288                /* local or global for trace_clock */
8289                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8290                usec_rem = do_div(t, USEC_PER_SEC);
8291                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8292                                                                t, usec_rem);
8293
8294                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8295                usec_rem = do_div(t, USEC_PER_SEC);
8296                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8297        } else {
8298                /* counter or tsc mode for trace_clock */
8299                trace_seq_printf(s, "oldest event ts: %llu\n",
8300                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8301
8302                trace_seq_printf(s, "now ts: %llu\n",
8303                                ring_buffer_time_stamp(trace_buf->buffer));
8304        }
8305
8306        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8307        trace_seq_printf(s, "dropped events: %ld\n", cnt);
8308
8309        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8310        trace_seq_printf(s, "read events: %ld\n", cnt);
8311
8312        count = simple_read_from_buffer(ubuf, count, ppos,
8313                                        s->buffer, trace_seq_used(s));
8314
8315        kfree(s);
8316
8317        return count;
8318}
8319
8320static const struct file_operations tracing_stats_fops = {
8321        .open           = tracing_open_generic_tr,
8322        .read           = tracing_stats_read,
8323        .llseek         = generic_file_llseek,
8324        .release        = tracing_release_generic_tr,
8325};
8326
8327#ifdef CONFIG_DYNAMIC_FTRACE
8328
8329static ssize_t
8330tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8331                  size_t cnt, loff_t *ppos)
8332{
8333        ssize_t ret;
8334        char *buf;
8335        int r;
8336
8337        /* 256 should be plenty to hold the amount needed */
8338        buf = kmalloc(256, GFP_KERNEL);
8339        if (!buf)
8340                return -ENOMEM;
8341
8342        r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8343                      ftrace_update_tot_cnt,
8344                      ftrace_number_of_pages,
8345                      ftrace_number_of_groups);
8346
8347        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8348        kfree(buf);
8349        return ret;
8350}
8351
8352static const struct file_operations tracing_dyn_info_fops = {
8353        .open           = tracing_open_generic,
8354        .read           = tracing_read_dyn_info,
8355        .llseek         = generic_file_llseek,
8356};
8357#endif /* CONFIG_DYNAMIC_FTRACE */
8358
8359#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8360static void
8361ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8362                struct trace_array *tr, struct ftrace_probe_ops *ops,
8363                void *data)
8364{
8365        tracing_snapshot_instance(tr);
8366}
8367
8368static void
8369ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8370                      struct trace_array *tr, struct ftrace_probe_ops *ops,
8371                      void *data)
8372{
8373        struct ftrace_func_mapper *mapper = data;
8374        long *count = NULL;
8375
8376        if (mapper)
8377                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8378
8379        if (count) {
8380
8381                if (*count <= 0)
8382                        return;
8383
8384                (*count)--;
8385        }
8386
8387        tracing_snapshot_instance(tr);
8388}
8389
8390static int
8391ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8392                      struct ftrace_probe_ops *ops, void *data)
8393{
8394        struct ftrace_func_mapper *mapper = data;
8395        long *count = NULL;
8396
8397        seq_printf(m, "%ps:", (void *)ip);
8398
8399        seq_puts(m, "snapshot");
8400
8401        if (mapper)
8402                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8403
8404        if (count)
8405                seq_printf(m, ":count=%ld\n", *count);
8406        else
8407                seq_puts(m, ":unlimited\n");
8408
8409        return 0;
8410}
8411
8412static int
8413ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8414                     unsigned long ip, void *init_data, void **data)
8415{
8416        struct ftrace_func_mapper *mapper = *data;
8417
8418        if (!mapper) {
8419                mapper = allocate_ftrace_func_mapper();
8420                if (!mapper)
8421                        return -ENOMEM;
8422                *data = mapper;
8423        }
8424
8425        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8426}
8427
8428static void
8429ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8430                     unsigned long ip, void *data)
8431{
8432        struct ftrace_func_mapper *mapper = data;
8433
8434        if (!ip) {
8435                if (!mapper)
8436                        return;
8437                free_ftrace_func_mapper(mapper, NULL);
8438                return;
8439        }
8440
8441        ftrace_func_mapper_remove_ip(mapper, ip);
8442}
8443
8444static struct ftrace_probe_ops snapshot_probe_ops = {
8445        .func                   = ftrace_snapshot,
8446        .print                  = ftrace_snapshot_print,
8447};
8448
8449static struct ftrace_probe_ops snapshot_count_probe_ops = {
8450        .func                   = ftrace_count_snapshot,
8451        .print                  = ftrace_snapshot_print,
8452        .init                   = ftrace_snapshot_init,
8453        .free                   = ftrace_snapshot_free,
8454};
8455
8456static int
8457ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8458                               char *glob, char *cmd, char *param, int enable)
8459{
8460        struct ftrace_probe_ops *ops;
8461        void *count = (void *)-1;
8462        char *number;
8463        int ret;
8464
8465        if (!tr)
8466                return -ENODEV;
8467
8468        /* hash funcs only work with set_ftrace_filter */
8469        if (!enable)
8470                return -EINVAL;
8471
8472        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8473
8474        if (glob[0] == '!')
8475                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8476
8477        if (!param)
8478                goto out_reg;
8479
8480        number = strsep(&param, ":");
8481
8482        if (!strlen(number))
8483                goto out_reg;
8484
8485        /*
8486         * We use the callback data field (which is a pointer)
8487         * as our counter.
8488         */
8489        ret = kstrtoul(number, 0, (unsigned long *)&count);
8490        if (ret)
8491                return ret;
8492
8493 out_reg:
8494        ret = tracing_alloc_snapshot_instance(tr);
8495        if (ret < 0)
8496                goto out;
8497
8498        ret = register_ftrace_function_probe(glob, tr, ops, count);
8499
8500 out:
8501        return ret < 0 ? ret : 0;
8502}
8503
8504static struct ftrace_func_command ftrace_snapshot_cmd = {
8505        .name                   = "snapshot",
8506        .func                   = ftrace_trace_snapshot_callback,
8507};
8508
8509static __init int register_snapshot_cmd(void)
8510{
8511        return register_ftrace_command(&ftrace_snapshot_cmd);
8512}
8513#else
8514static inline __init int register_snapshot_cmd(void) { return 0; }
8515#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8516
8517static struct dentry *tracing_get_dentry(struct trace_array *tr)
8518{
8519        if (WARN_ON(!tr->dir))
8520                return ERR_PTR(-ENODEV);
8521
8522        /* Top directory uses NULL as the parent */
8523        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8524                return NULL;
8525
8526        /* All sub buffers have a descriptor */
8527        return tr->dir;
8528}
8529
8530static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8531{
8532        struct dentry *d_tracer;
8533
8534        if (tr->percpu_dir)
8535                return tr->percpu_dir;
8536
8537        d_tracer = tracing_get_dentry(tr);
8538        if (IS_ERR(d_tracer))
8539                return NULL;
8540
8541        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8542
8543        MEM_FAIL(!tr->percpu_dir,
8544                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8545
8546        return tr->percpu_dir;
8547}
8548
8549static struct dentry *
8550trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8551                      void *data, long cpu, const struct file_operations *fops)
8552{
8553        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8554
8555        if (ret) /* See tracing_get_cpu() */
8556                d_inode(ret)->i_cdev = (void *)(cpu + 1);
8557        return ret;
8558}
8559
8560static void
8561tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8562{
8563        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8564        struct dentry *d_cpu;
8565        char cpu_dir[30]; /* 30 characters should be more than enough */
8566
8567        if (!d_percpu)
8568                return;
8569
8570        snprintf(cpu_dir, 30, "cpu%ld", cpu);
8571        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8572        if (!d_cpu) {
8573                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8574                return;
8575        }
8576
8577        /* per cpu trace_pipe */
8578        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8579                                tr, cpu, &tracing_pipe_fops);
8580
8581        /* per cpu trace */
8582        trace_create_cpu_file("trace", 0644, d_cpu,
8583                                tr, cpu, &tracing_fops);
8584
8585        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8586                                tr, cpu, &tracing_buffers_fops);
8587
8588        trace_create_cpu_file("stats", 0444, d_cpu,
8589                                tr, cpu, &tracing_stats_fops);
8590
8591        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8592                                tr, cpu, &tracing_entries_fops);
8593
8594#ifdef CONFIG_TRACER_SNAPSHOT
8595        trace_create_cpu_file("snapshot", 0644, d_cpu,
8596                                tr, cpu, &snapshot_fops);
8597
8598        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8599                                tr, cpu, &snapshot_raw_fops);
8600#endif
8601}
8602
8603#ifdef CONFIG_FTRACE_SELFTEST
8604/* Let selftest have access to static functions in this file */
8605#include "trace_selftest.c"
8606#endif
8607
8608static ssize_t
8609trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8610                        loff_t *ppos)
8611{
8612        struct trace_option_dentry *topt = filp->private_data;
8613        char *buf;
8614
8615        if (topt->flags->val & topt->opt->bit)
8616                buf = "1\n";
8617        else
8618                buf = "0\n";
8619
8620        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8621}
8622
8623static ssize_t
8624trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8625                         loff_t *ppos)
8626{
8627        struct trace_option_dentry *topt = filp->private_data;
8628        unsigned long val;
8629        int ret;
8630
8631        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8632        if (ret)
8633                return ret;
8634
8635        if (val != 0 && val != 1)
8636                return -EINVAL;
8637
8638        if (!!(topt->flags->val & topt->opt->bit) != val) {
8639                mutex_lock(&trace_types_lock);
8640                ret = __set_tracer_option(topt->tr, topt->flags,
8641                                          topt->opt, !val);
8642                mutex_unlock(&trace_types_lock);
8643                if (ret)
8644                        return ret;
8645        }
8646
8647        *ppos += cnt;
8648
8649        return cnt;
8650}
8651
8652
8653static const struct file_operations trace_options_fops = {
8654        .open = tracing_open_generic,
8655        .read = trace_options_read,
8656        .write = trace_options_write,
8657        .llseek = generic_file_llseek,
8658};
8659
8660/*
8661 * In order to pass in both the trace_array descriptor as well as the index
8662 * to the flag that the trace option file represents, the trace_array
8663 * has a character array of trace_flags_index[], which holds the index
8664 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8665 * The address of this character array is passed to the flag option file
8666 * read/write callbacks.
8667 *
8668 * In order to extract both the index and the trace_array descriptor,
8669 * get_tr_index() uses the following algorithm.
8670 *
8671 *   idx = *ptr;
8672 *
8673 * As the pointer itself contains the address of the index (remember
8674 * index[1] == 1).
8675 *
8676 * Then to get the trace_array descriptor, by subtracting that index
8677 * from the ptr, we get to the start of the index itself.
8678 *
8679 *   ptr - idx == &index[0]
8680 *
8681 * Then a simple container_of() from that pointer gets us to the
8682 * trace_array descriptor.
8683 */
8684static void get_tr_index(void *data, struct trace_array **ptr,
8685                         unsigned int *pindex)
8686{
8687        *pindex = *(unsigned char *)data;
8688
8689        *ptr = container_of(data - *pindex, struct trace_array,
8690                            trace_flags_index);
8691}
8692
8693static ssize_t
8694trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8695                        loff_t *ppos)
8696{
8697        void *tr_index = filp->private_data;
8698        struct trace_array *tr;
8699        unsigned int index;
8700        char *buf;
8701
8702        get_tr_index(tr_index, &tr, &index);
8703
8704        if (tr->trace_flags & (1 << index))
8705                buf = "1\n";
8706        else
8707                buf = "0\n";
8708
8709        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8710}
8711
8712static ssize_t
8713trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8714                         loff_t *ppos)
8715{
8716        void *tr_index = filp->private_data;
8717        struct trace_array *tr;
8718        unsigned int index;
8719        unsigned long val;
8720        int ret;
8721
8722        get_tr_index(tr_index, &tr, &index);
8723
8724        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8725        if (ret)
8726                return ret;
8727
8728        if (val != 0 && val != 1)
8729                return -EINVAL;
8730
8731        mutex_lock(&event_mutex);
8732        mutex_lock(&trace_types_lock);
8733        ret = set_tracer_flag(tr, 1 << index, val);
8734        mutex_unlock(&trace_types_lock);
8735        mutex_unlock(&event_mutex);
8736
8737        if (ret < 0)
8738                return ret;
8739
8740        *ppos += cnt;
8741
8742        return cnt;
8743}
8744
8745static const struct file_operations trace_options_core_fops = {
8746        .open = tracing_open_generic,
8747        .read = trace_options_core_read,
8748        .write = trace_options_core_write,
8749        .llseek = generic_file_llseek,
8750};
8751
8752struct dentry *trace_create_file(const char *name,
8753                                 umode_t mode,
8754                                 struct dentry *parent,
8755                                 void *data,
8756                                 const struct file_operations *fops)
8757{
8758        struct dentry *ret;
8759
8760        ret = tracefs_create_file(name, mode, parent, data, fops);
8761        if (!ret)
8762                pr_warn("Could not create tracefs '%s' entry\n", name);
8763
8764        return ret;
8765}
8766
8767
8768static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8769{
8770        struct dentry *d_tracer;
8771
8772        if (tr->options)
8773                return tr->options;
8774
8775        d_tracer = tracing_get_dentry(tr);
8776        if (IS_ERR(d_tracer))
8777                return NULL;
8778
8779        tr->options = tracefs_create_dir("options", d_tracer);
8780        if (!tr->options) {
8781                pr_warn("Could not create tracefs directory 'options'\n");
8782                return NULL;
8783        }
8784
8785        return tr->options;
8786}
8787
8788static void
8789create_trace_option_file(struct trace_array *tr,
8790                         struct trace_option_dentry *topt,
8791                         struct tracer_flags *flags,
8792                         struct tracer_opt *opt)
8793{
8794        struct dentry *t_options;
8795
8796        t_options = trace_options_init_dentry(tr);
8797        if (!t_options)
8798                return;
8799
8800        topt->flags = flags;
8801        topt->opt = opt;
8802        topt->tr = tr;
8803
8804        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8805                                    &trace_options_fops);
8806
8807}
8808
8809static void
8810create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8811{
8812        struct trace_option_dentry *topts;
8813        struct trace_options *tr_topts;
8814        struct tracer_flags *flags;
8815        struct tracer_opt *opts;
8816        int cnt;
8817        int i;
8818
8819        if (!tracer)
8820                return;
8821
8822        flags = tracer->flags;
8823
8824        if (!flags || !flags->opts)
8825                return;
8826
8827        /*
8828         * If this is an instance, only create flags for tracers
8829         * the instance may have.
8830         */
8831        if (!trace_ok_for_array(tracer, tr))
8832                return;
8833
8834        for (i = 0; i < tr->nr_topts; i++) {
8835                /* Make sure there's no duplicate flags. */
8836                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8837                        return;
8838        }
8839
8840        opts = flags->opts;
8841
8842        for (cnt = 0; opts[cnt].name; cnt++)
8843                ;
8844
8845        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8846        if (!topts)
8847                return;
8848
8849        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8850                            GFP_KERNEL);
8851        if (!tr_topts) {
8852                kfree(topts);
8853                return;
8854        }
8855
8856        tr->topts = tr_topts;
8857        tr->topts[tr->nr_topts].tracer = tracer;
8858        tr->topts[tr->nr_topts].topts = topts;
8859        tr->nr_topts++;
8860
8861        for (cnt = 0; opts[cnt].name; cnt++) {
8862                create_trace_option_file(tr, &topts[cnt], flags,
8863                                         &opts[cnt]);
8864                MEM_FAIL(topts[cnt].entry == NULL,
8865                          "Failed to create trace option: %s",
8866                          opts[cnt].name);
8867        }
8868}
8869
8870static struct dentry *
8871create_trace_option_core_file(struct trace_array *tr,
8872                              const char *option, long index)
8873{
8874        struct dentry *t_options;
8875
8876        t_options = trace_options_init_dentry(tr);
8877        if (!t_options)
8878                return NULL;
8879
8880        return trace_create_file(option, 0644, t_options,
8881                                 (void *)&tr->trace_flags_index[index],
8882                                 &trace_options_core_fops);
8883}
8884
8885static void create_trace_options_dir(struct trace_array *tr)
8886{
8887        struct dentry *t_options;
8888        bool top_level = tr == &global_trace;
8889        int i;
8890
8891        t_options = trace_options_init_dentry(tr);
8892        if (!t_options)
8893                return;
8894
8895        for (i = 0; trace_options[i]; i++) {
8896                if (top_level ||
8897                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8898                        create_trace_option_core_file(tr, trace_options[i], i);
8899        }
8900}
8901
8902static ssize_t
8903rb_simple_read(struct file *filp, char __user *ubuf,
8904               size_t cnt, loff_t *ppos)
8905{
8906        struct trace_array *tr = filp->private_data;
8907        char buf[64];
8908        int r;
8909
8910        r = tracer_tracing_is_on(tr);
8911        r = sprintf(buf, "%d\n", r);
8912
8913        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8914}
8915
8916static ssize_t
8917rb_simple_write(struct file *filp, const char __user *ubuf,
8918                size_t cnt, loff_t *ppos)
8919{
8920        struct trace_array *tr = filp->private_data;
8921        struct trace_buffer *buffer = tr->array_buffer.buffer;
8922        unsigned long val;
8923        int ret;
8924
8925        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8926        if (ret)
8927                return ret;
8928
8929        if (buffer) {
8930                mutex_lock(&trace_types_lock);
8931                if (!!val == tracer_tracing_is_on(tr)) {
8932                        val = 0; /* do nothing */
8933                } else if (val) {
8934                        tracer_tracing_on(tr);
8935                        if (tr->current_trace->start)
8936                                tr->current_trace->start(tr);
8937                } else {
8938                        tracer_tracing_off(tr);
8939                        if (tr->current_trace->stop)
8940                                tr->current_trace->stop(tr);
8941                }
8942                mutex_unlock(&trace_types_lock);
8943        }
8944
8945        (*ppos)++;
8946
8947        return cnt;
8948}
8949
8950static const struct file_operations rb_simple_fops = {
8951        .open           = tracing_open_generic_tr,
8952        .read           = rb_simple_read,
8953        .write          = rb_simple_write,
8954        .release        = tracing_release_generic_tr,
8955        .llseek         = default_llseek,
8956};
8957
8958static ssize_t
8959buffer_percent_read(struct file *filp, char __user *ubuf,
8960                    size_t cnt, loff_t *ppos)
8961{
8962        struct trace_array *tr = filp->private_data;
8963        char buf[64];
8964        int r;
8965
8966        r = tr->buffer_percent;
8967        r = sprintf(buf, "%d\n", r);
8968
8969        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8970}
8971
8972static ssize_t
8973buffer_percent_write(struct file *filp, const char __user *ubuf,
8974                     size_t cnt, loff_t *ppos)
8975{
8976        struct trace_array *tr = filp->private_data;
8977        unsigned long val;
8978        int ret;
8979
8980        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8981        if (ret)
8982                return ret;
8983
8984        if (val > 100)
8985                return -EINVAL;
8986
8987        if (!val)
8988                val = 1;
8989
8990        tr->buffer_percent = val;
8991
8992        (*ppos)++;
8993
8994        return cnt;
8995}
8996
8997static const struct file_operations buffer_percent_fops = {
8998        .open           = tracing_open_generic_tr,
8999        .read           = buffer_percent_read,
9000        .write          = buffer_percent_write,
9001        .release        = tracing_release_generic_tr,
9002        .llseek         = default_llseek,
9003};
9004
9005static struct dentry *trace_instance_dir;
9006
9007static void
9008init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9009
9010static int
9011allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9012{
9013        enum ring_buffer_flags rb_flags;
9014
9015        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9016
9017        buf->tr = tr;
9018
9019        buf->buffer = ring_buffer_alloc(size, rb_flags);
9020        if (!buf->buffer)
9021                return -ENOMEM;
9022
9023        buf->data = alloc_percpu(struct trace_array_cpu);
9024        if (!buf->data) {
9025                ring_buffer_free(buf->buffer);
9026                buf->buffer = NULL;
9027                return -ENOMEM;
9028        }
9029
9030        /* Allocate the first page for all buffers */
9031        set_buffer_entries(&tr->array_buffer,
9032                           ring_buffer_size(tr->array_buffer.buffer, 0));
9033
9034        return 0;
9035}
9036
9037static int allocate_trace_buffers(struct trace_array *tr, int size)
9038{
9039        int ret;
9040
9041        ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9042        if (ret)
9043                return ret;
9044
9045#ifdef CONFIG_TRACER_MAX_TRACE
9046        ret = allocate_trace_buffer(tr, &tr->max_buffer,
9047                                    allocate_snapshot ? size : 1);
9048        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9049                ring_buffer_free(tr->array_buffer.buffer);
9050                tr->array_buffer.buffer = NULL;
9051                free_percpu(tr->array_buffer.data);
9052                tr->array_buffer.data = NULL;
9053                return -ENOMEM;
9054        }
9055        tr->allocated_snapshot = allocate_snapshot;
9056
9057        /*
9058         * Only the top level trace array gets its snapshot allocated
9059         * from the kernel command line.
9060         */
9061        allocate_snapshot = false;
9062#endif
9063
9064        return 0;
9065}
9066
9067static void free_trace_buffer(struct array_buffer *buf)
9068{
9069        if (buf->buffer) {
9070                ring_buffer_free(buf->buffer);
9071                buf->buffer = NULL;
9072                free_percpu(buf->data);
9073                buf->data = NULL;
9074        }
9075}
9076
9077static void free_trace_buffers(struct trace_array *tr)
9078{
9079        if (!tr)
9080                return;
9081
9082        free_trace_buffer(&tr->array_buffer);
9083
9084#ifdef CONFIG_TRACER_MAX_TRACE
9085        free_trace_buffer(&tr->max_buffer);
9086#endif
9087}
9088
9089static void init_trace_flags_index(struct trace_array *tr)
9090{
9091        int i;
9092
9093        /* Used by the trace options files */
9094        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9095                tr->trace_flags_index[i] = i;
9096}
9097
9098static void __update_tracer_options(struct trace_array *tr)
9099{
9100        struct tracer *t;
9101
9102        for (t = trace_types; t; t = t->next)
9103                add_tracer_options(tr, t);
9104}
9105
9106static void update_tracer_options(struct trace_array *tr)
9107{
9108        mutex_lock(&trace_types_lock);
9109        __update_tracer_options(tr);
9110        mutex_unlock(&trace_types_lock);
9111}
9112
9113/* Must have trace_types_lock held */
9114struct trace_array *trace_array_find(const char *instance)
9115{
9116        struct trace_array *tr, *found = NULL;
9117
9118        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9119                if (tr->name && strcmp(tr->name, instance) == 0) {
9120                        found = tr;
9121                        break;
9122                }
9123        }
9124
9125        return found;
9126}
9127
9128struct trace_array *trace_array_find_get(const char *instance)
9129{
9130        struct trace_array *tr;
9131
9132        mutex_lock(&trace_types_lock);
9133        tr = trace_array_find(instance);
9134        if (tr)
9135                tr->ref++;
9136        mutex_unlock(&trace_types_lock);
9137
9138        return tr;
9139}
9140
9141static int trace_array_create_dir(struct trace_array *tr)
9142{
9143        int ret;
9144
9145        tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9146        if (!tr->dir)
9147                return -EINVAL;
9148
9149        ret = event_trace_add_tracer(tr->dir, tr);
9150        if (ret) {
9151                tracefs_remove(tr->dir);
9152                return ret;
9153        }
9154
9155        init_tracer_tracefs(tr, tr->dir);
9156        __update_tracer_options(tr);
9157
9158        return ret;
9159}
9160
9161static struct trace_array *trace_array_create(const char *name)
9162{
9163        struct trace_array *tr;
9164        int ret;
9165
9166        ret = -ENOMEM;
9167        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9168        if (!tr)
9169                return ERR_PTR(ret);
9170
9171        tr->name = kstrdup(name, GFP_KERNEL);
9172        if (!tr->name)
9173                goto out_free_tr;
9174
9175        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9176                goto out_free_tr;
9177
9178        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9179
9180        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9181
9182        raw_spin_lock_init(&tr->start_lock);
9183
9184        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9185
9186        tr->current_trace = &nop_trace;
9187
9188        INIT_LIST_HEAD(&tr->systems);
9189        INIT_LIST_HEAD(&tr->events);
9190        INIT_LIST_HEAD(&tr->hist_vars);
9191        INIT_LIST_HEAD(&tr->err_log);
9192
9193        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9194                goto out_free_tr;
9195
9196        if (ftrace_allocate_ftrace_ops(tr) < 0)
9197                goto out_free_tr;
9198
9199        ftrace_init_trace_array(tr);
9200
9201        init_trace_flags_index(tr);
9202
9203        if (trace_instance_dir) {
9204                ret = trace_array_create_dir(tr);
9205                if (ret)
9206                        goto out_free_tr;
9207        } else
9208                __trace_early_add_events(tr);
9209
9210        list_add(&tr->list, &ftrace_trace_arrays);
9211
9212        tr->ref++;
9213
9214        return tr;
9215
9216 out_free_tr:
9217        ftrace_free_ftrace_ops(tr);
9218        free_trace_buffers(tr);
9219        free_cpumask_var(tr->tracing_cpumask);
9220        kfree(tr->name);
9221        kfree(tr);
9222
9223        return ERR_PTR(ret);
9224}
9225
9226static int instance_mkdir(const char *name)
9227{
9228        struct trace_array *tr;
9229        int ret;
9230
9231        mutex_lock(&event_mutex);
9232        mutex_lock(&trace_types_lock);
9233
9234        ret = -EEXIST;
9235        if (trace_array_find(name))
9236                goto out_unlock;
9237
9238        tr = trace_array_create(name);
9239
9240        ret = PTR_ERR_OR_ZERO(tr);
9241
9242out_unlock:
9243        mutex_unlock(&trace_types_lock);
9244        mutex_unlock(&event_mutex);
9245        return ret;
9246}
9247
9248/**
9249 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9250 * @name: The name of the trace array to be looked up/created.
9251 *
9252 * Returns pointer to trace array with given name.
9253 * NULL, if it cannot be created.
9254 *
9255 * NOTE: This function increments the reference counter associated with the
9256 * trace array returned. This makes sure it cannot be freed while in use.
9257 * Use trace_array_put() once the trace array is no longer needed.
9258 * If the trace_array is to be freed, trace_array_destroy() needs to
9259 * be called after the trace_array_put(), or simply let user space delete
9260 * it from the tracefs instances directory. But until the
9261 * trace_array_put() is called, user space can not delete it.
9262 *
9263 */
9264struct trace_array *trace_array_get_by_name(const char *name)
9265{
9266        struct trace_array *tr;
9267
9268        mutex_lock(&event_mutex);
9269        mutex_lock(&trace_types_lock);
9270
9271        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9272                if (tr->name && strcmp(tr->name, name) == 0)
9273                        goto out_unlock;
9274        }
9275
9276        tr = trace_array_create(name);
9277
9278        if (IS_ERR(tr))
9279                tr = NULL;
9280out_unlock:
9281        if (tr)
9282                tr->ref++;
9283
9284        mutex_unlock(&trace_types_lock);
9285        mutex_unlock(&event_mutex);
9286        return tr;
9287}
9288EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9289
9290static int __remove_instance(struct trace_array *tr)
9291{
9292        int i;
9293
9294        /* Reference counter for a newly created trace array = 1. */
9295        if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9296                return -EBUSY;
9297
9298        list_del(&tr->list);
9299
9300        /* Disable all the flags that were enabled coming in */
9301        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9302                if ((1 << i) & ZEROED_TRACE_FLAGS)
9303                        set_tracer_flag(tr, 1 << i, 0);
9304        }
9305
9306        tracing_set_nop(tr);
9307        clear_ftrace_function_probes(tr);
9308        event_trace_del_tracer(tr);
9309        ftrace_clear_pids(tr);
9310        ftrace_destroy_function_files(tr);
9311        tracefs_remove(tr->dir);
9312        free_percpu(tr->last_func_repeats);
9313        free_trace_buffers(tr);
9314
9315        for (i = 0; i < tr->nr_topts; i++) {
9316                kfree(tr->topts[i].topts);
9317        }
9318        kfree(tr->topts);
9319
9320        free_cpumask_var(tr->tracing_cpumask);
9321        kfree(tr->name);
9322        kfree(tr);
9323
9324        return 0;
9325}
9326
9327int trace_array_destroy(struct trace_array *this_tr)
9328{
9329        struct trace_array *tr;
9330        int ret;
9331
9332        if (!this_tr)
9333                return -EINVAL;
9334
9335        mutex_lock(&event_mutex);
9336        mutex_lock(&trace_types_lock);
9337
9338        ret = -ENODEV;
9339
9340        /* Making sure trace array exists before destroying it. */
9341        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9342                if (tr == this_tr) {
9343                        ret = __remove_instance(tr);
9344                        break;
9345                }
9346        }
9347
9348        mutex_unlock(&trace_types_lock);
9349        mutex_unlock(&event_mutex);
9350
9351        return ret;
9352}
9353EXPORT_SYMBOL_GPL(trace_array_destroy);
9354
9355static int instance_rmdir(const char *name)
9356{
9357        struct trace_array *tr;
9358        int ret;
9359
9360        mutex_lock(&event_mutex);
9361        mutex_lock(&trace_types_lock);
9362
9363        ret = -ENODEV;
9364        tr = trace_array_find(name);
9365        if (tr)
9366                ret = __remove_instance(tr);
9367
9368        mutex_unlock(&trace_types_lock);
9369        mutex_unlock(&event_mutex);
9370
9371        return ret;
9372}
9373
9374static __init void create_trace_instances(struct dentry *d_tracer)
9375{
9376        struct trace_array *tr;
9377
9378        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9379                                                         instance_mkdir,
9380                                                         instance_rmdir);
9381        if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9382                return;
9383
9384        mutex_lock(&event_mutex);
9385        mutex_lock(&trace_types_lock);
9386
9387        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9388                if (!tr->name)
9389                        continue;
9390                if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9391                             "Failed to create instance directory\n"))
9392                        break;
9393        }
9394
9395        mutex_unlock(&trace_types_lock);
9396        mutex_unlock(&event_mutex);
9397}
9398
9399static void
9400init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9401{
9402        struct trace_event_file *file;
9403        int cpu;
9404
9405        trace_create_file("available_tracers", 0444, d_tracer,
9406                        tr, &show_traces_fops);
9407
9408        trace_create_file("current_tracer", 0644, d_tracer,
9409                        tr, &set_tracer_fops);
9410
9411        trace_create_file("tracing_cpumask", 0644, d_tracer,
9412                          tr, &tracing_cpumask_fops);
9413
9414        trace_create_file("trace_options", 0644, d_tracer,
9415                          tr, &tracing_iter_fops);
9416
9417        trace_create_file("trace", 0644, d_tracer,
9418                          tr, &tracing_fops);
9419
9420        trace_create_file("trace_pipe", 0444, d_tracer,
9421                          tr, &tracing_pipe_fops);
9422
9423        trace_create_file("buffer_size_kb", 0644, d_tracer,
9424                          tr, &tracing_entries_fops);
9425
9426        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9427                          tr, &tracing_total_entries_fops);
9428
9429        trace_create_file("free_buffer", 0200, d_tracer,
9430                          tr, &tracing_free_buffer_fops);
9431
9432        trace_create_file("trace_marker", 0220, d_tracer,
9433                          tr, &tracing_mark_fops);
9434
9435        file = __find_event_file(tr, "ftrace", "print");
9436        if (file && file->dir)
9437                trace_create_file("trigger", 0644, file->dir, file,
9438                                  &event_trigger_fops);
9439        tr->trace_marker_file = file;
9440
9441        trace_create_file("trace_marker_raw", 0220, d_tracer,
9442                          tr, &tracing_mark_raw_fops);
9443
9444        trace_create_file("trace_clock", 0644, d_tracer, tr,
9445                          &trace_clock_fops);
9446
9447        trace_create_file("tracing_on", 0644, d_tracer,
9448                          tr, &rb_simple_fops);
9449
9450        trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9451                          &trace_time_stamp_mode_fops);
9452
9453        tr->buffer_percent = 50;
9454
9455        trace_create_file("buffer_percent", 0444, d_tracer,
9456                        tr, &buffer_percent_fops);
9457
9458        create_trace_options_dir(tr);
9459
9460#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9461        trace_create_maxlat_file(tr, d_tracer);
9462#endif
9463
9464        if (ftrace_create_function_files(tr, d_tracer))
9465                MEM_FAIL(1, "Could not allocate function filter files");
9466
9467#ifdef CONFIG_TRACER_SNAPSHOT
9468        trace_create_file("snapshot", 0644, d_tracer,
9469                          tr, &snapshot_fops);
9470#endif
9471
9472        trace_create_file("error_log", 0644, d_tracer,
9473                          tr, &tracing_err_log_fops);
9474
9475        for_each_tracing_cpu(cpu)
9476                tracing_init_tracefs_percpu(tr, cpu);
9477
9478        ftrace_init_tracefs(tr, d_tracer);
9479}
9480
9481static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9482{
9483        struct vfsmount *mnt;
9484        struct file_system_type *type;
9485
9486        /*
9487         * To maintain backward compatibility for tools that mount
9488         * debugfs to get to the tracing facility, tracefs is automatically
9489         * mounted to the debugfs/tracing directory.
9490         */
9491        type = get_fs_type("tracefs");
9492        if (!type)
9493                return NULL;
9494        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9495        put_filesystem(type);
9496        if (IS_ERR(mnt))
9497                return NULL;
9498        mntget(mnt);
9499
9500        return mnt;
9501}
9502
9503/**
9504 * tracing_init_dentry - initialize top level trace array
9505 *
9506 * This is called when creating files or directories in the tracing
9507 * directory. It is called via fs_initcall() by any of the boot up code
9508 * and expects to return the dentry of the top level tracing directory.
9509 */
9510int tracing_init_dentry(void)
9511{
9512        struct trace_array *tr = &global_trace;
9513
9514        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9515                pr_warn("Tracing disabled due to lockdown\n");
9516                return -EPERM;
9517        }
9518
9519        /* The top level trace array uses  NULL as parent */
9520        if (tr->dir)
9521                return 0;
9522
9523        if (WARN_ON(!tracefs_initialized()))
9524                return -ENODEV;
9525
9526        /*
9527         * As there may still be users that expect the tracing
9528         * files to exist in debugfs/tracing, we must automount
9529         * the tracefs file system there, so older tools still
9530         * work with the newer kernel.
9531         */
9532        tr->dir = debugfs_create_automount("tracing", NULL,
9533                                           trace_automount, NULL);
9534
9535        return 0;
9536}
9537
9538extern struct trace_eval_map *__start_ftrace_eval_maps[];
9539extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9540
9541static struct workqueue_struct *eval_map_wq __initdata;
9542static struct work_struct eval_map_work __initdata;
9543
9544static void __init eval_map_work_func(struct work_struct *work)
9545{
9546        int len;
9547
9548        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9549        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9550}
9551
9552static int __init trace_eval_init(void)
9553{
9554        INIT_WORK(&eval_map_work, eval_map_work_func);
9555
9556        eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9557        if (!eval_map_wq) {
9558                pr_err("Unable to allocate eval_map_wq\n");
9559                /* Do work here */
9560                eval_map_work_func(&eval_map_work);
9561                return -ENOMEM;
9562        }
9563
9564        queue_work(eval_map_wq, &eval_map_work);
9565        return 0;
9566}
9567
9568static int __init trace_eval_sync(void)
9569{
9570        /* Make sure the eval map updates are finished */
9571        if (eval_map_wq)
9572                destroy_workqueue(eval_map_wq);
9573        return 0;
9574}
9575
9576late_initcall_sync(trace_eval_sync);
9577
9578
9579#ifdef CONFIG_MODULES
9580static void trace_module_add_evals(struct module *mod)
9581{
9582        if (!mod->num_trace_evals)
9583                return;
9584
9585        /*
9586         * Modules with bad taint do not have events created, do
9587         * not bother with enums either.
9588         */
9589        if (trace_module_has_bad_taint(mod))
9590                return;
9591
9592        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9593}
9594
9595#ifdef CONFIG_TRACE_EVAL_MAP_FILE
9596static void trace_module_remove_evals(struct module *mod)
9597{
9598        union trace_eval_map_item *map;
9599        union trace_eval_map_item **last = &trace_eval_maps;
9600
9601        if (!mod->num_trace_evals)
9602                return;
9603
9604        mutex_lock(&trace_eval_mutex);
9605
9606        map = trace_eval_maps;
9607
9608        while (map) {
9609                if (map->head.mod == mod)
9610                        break;
9611                map = trace_eval_jmp_to_tail(map);
9612                last = &map->tail.next;
9613                map = map->tail.next;
9614        }
9615        if (!map)
9616                goto out;
9617
9618        *last = trace_eval_jmp_to_tail(map)->tail.next;
9619        kfree(map);
9620 out:
9621        mutex_unlock(&trace_eval_mutex);
9622}
9623#else
9624static inline void trace_module_remove_evals(struct module *mod) { }
9625#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9626
9627static int trace_module_notify(struct notifier_block *self,
9628                               unsigned long val, void *data)
9629{
9630        struct module *mod = data;
9631
9632        switch (val) {
9633        case MODULE_STATE_COMING:
9634                trace_module_add_evals(mod);
9635                break;
9636        case MODULE_STATE_GOING:
9637                trace_module_remove_evals(mod);
9638                break;
9639        }
9640
9641        return NOTIFY_OK;
9642}
9643
9644static struct notifier_block trace_module_nb = {
9645        .notifier_call = trace_module_notify,
9646        .priority = 0,
9647};
9648#endif /* CONFIG_MODULES */
9649
9650static __init int tracer_init_tracefs(void)
9651{
9652        int ret;
9653
9654        trace_access_lock_init();
9655
9656        ret = tracing_init_dentry();
9657        if (ret)
9658                return 0;
9659
9660        event_trace_init();
9661
9662        init_tracer_tracefs(&global_trace, NULL);
9663        ftrace_init_tracefs_toplevel(&global_trace, NULL);
9664
9665        trace_create_file("tracing_thresh", 0644, NULL,
9666                        &global_trace, &tracing_thresh_fops);
9667
9668        trace_create_file("README", 0444, NULL,
9669                        NULL, &tracing_readme_fops);
9670
9671        trace_create_file("saved_cmdlines", 0444, NULL,
9672                        NULL, &tracing_saved_cmdlines_fops);
9673
9674        trace_create_file("saved_cmdlines_size", 0644, NULL,
9675                          NULL, &tracing_saved_cmdlines_size_fops);
9676
9677        trace_create_file("saved_tgids", 0444, NULL,
9678                        NULL, &tracing_saved_tgids_fops);
9679
9680        trace_eval_init();
9681
9682        trace_create_eval_file(NULL);
9683
9684#ifdef CONFIG_MODULES
9685        register_module_notifier(&trace_module_nb);
9686#endif
9687
9688#ifdef CONFIG_DYNAMIC_FTRACE
9689        trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9690                        NULL, &tracing_dyn_info_fops);
9691#endif
9692
9693        create_trace_instances(NULL);
9694
9695        update_tracer_options(&global_trace);
9696
9697        return 0;
9698}
9699
9700fs_initcall(tracer_init_tracefs);
9701
9702static int trace_panic_handler(struct notifier_block *this,
9703                               unsigned long event, void *unused)
9704{
9705        if (ftrace_dump_on_oops)
9706                ftrace_dump(ftrace_dump_on_oops);
9707        return NOTIFY_OK;
9708}
9709
9710static struct notifier_block trace_panic_notifier = {
9711        .notifier_call  = trace_panic_handler,
9712        .next           = NULL,
9713        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9714};
9715
9716static int trace_die_handler(struct notifier_block *self,
9717                             unsigned long val,
9718                             void *data)
9719{
9720        switch (val) {
9721        case DIE_OOPS:
9722                if (ftrace_dump_on_oops)
9723                        ftrace_dump(ftrace_dump_on_oops);
9724                break;
9725        default:
9726                break;
9727        }
9728        return NOTIFY_OK;
9729}
9730
9731static struct notifier_block trace_die_notifier = {
9732        .notifier_call = trace_die_handler,
9733        .priority = 200
9734};
9735
9736/*
9737 * printk is set to max of 1024, we really don't need it that big.
9738 * Nothing should be printing 1000 characters anyway.
9739 */
9740#define TRACE_MAX_PRINT         1000
9741
9742/*
9743 * Define here KERN_TRACE so that we have one place to modify
9744 * it if we decide to change what log level the ftrace dump
9745 * should be at.
9746 */
9747#define KERN_TRACE              KERN_EMERG
9748
9749void
9750trace_printk_seq(struct trace_seq *s)
9751{
9752        /* Probably should print a warning here. */
9753        if (s->seq.len >= TRACE_MAX_PRINT)
9754                s->seq.len = TRACE_MAX_PRINT;
9755
9756        /*
9757         * More paranoid code. Although the buffer size is set to
9758         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9759         * an extra layer of protection.
9760         */
9761        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9762                s->seq.len = s->seq.size - 1;
9763
9764        /* should be zero ended, but we are paranoid. */
9765        s->buffer[s->seq.len] = 0;
9766
9767        printk(KERN_TRACE "%s", s->buffer);
9768
9769        trace_seq_init(s);
9770}
9771
9772void trace_init_global_iter(struct trace_iterator *iter)
9773{
9774        iter->tr = &global_trace;
9775        iter->trace = iter->tr->current_trace;
9776        iter->cpu_file = RING_BUFFER_ALL_CPUS;
9777        iter->array_buffer = &global_trace.array_buffer;
9778
9779        if (iter->trace && iter->trace->open)
9780                iter->trace->open(iter);
9781
9782        /* Annotate start of buffers if we had overruns */
9783        if (ring_buffer_overruns(iter->array_buffer->buffer))
9784                iter->iter_flags |= TRACE_FILE_ANNOTATE;
9785
9786        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9787        if (trace_clocks[iter->tr->clock_id].in_ns)
9788                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9789}
9790
9791void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9792{
9793        /* use static because iter can be a bit big for the stack */
9794        static struct trace_iterator iter;
9795        static atomic_t dump_running;
9796        struct trace_array *tr = &global_trace;
9797        unsigned int old_userobj;
9798        unsigned long flags;
9799        int cnt = 0, cpu;
9800
9801        /* Only allow one dump user at a time. */
9802        if (atomic_inc_return(&dump_running) != 1) {
9803                atomic_dec(&dump_running);
9804                return;
9805        }
9806
9807        /*
9808         * Always turn off tracing when we dump.
9809         * We don't need to show trace output of what happens
9810         * between multiple crashes.
9811         *
9812         * If the user does a sysrq-z, then they can re-enable
9813         * tracing with echo 1 > tracing_on.
9814         */
9815        tracing_off();
9816
9817        local_irq_save(flags);
9818        printk_nmi_direct_enter();
9819
9820        /* Simulate the iterator */
9821        trace_init_global_iter(&iter);
9822        /* Can not use kmalloc for iter.temp and iter.fmt */
9823        iter.temp = static_temp_buf;
9824        iter.temp_size = STATIC_TEMP_BUF_SIZE;
9825        iter.fmt = static_fmt_buf;
9826        iter.fmt_size = STATIC_FMT_BUF_SIZE;
9827
9828        for_each_tracing_cpu(cpu) {
9829                atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9830        }
9831
9832        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9833
9834        /* don't look at user memory in panic mode */
9835        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9836
9837        switch (oops_dump_mode) {
9838        case DUMP_ALL:
9839                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9840                break;
9841        case DUMP_ORIG:
9842                iter.cpu_file = raw_smp_processor_id();
9843                break;
9844        case DUMP_NONE:
9845                goto out_enable;
9846        default:
9847                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9848                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9849        }
9850
9851        printk(KERN_TRACE "Dumping ftrace buffer:\n");
9852
9853        /* Did function tracer already get disabled? */
9854        if (ftrace_is_dead()) {
9855                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9856                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9857        }
9858
9859        /*
9860         * We need to stop all tracing on all CPUS to read
9861         * the next buffer. This is a bit expensive, but is
9862         * not done often. We fill all what we can read,
9863         * and then release the locks again.
9864         */
9865
9866        while (!trace_empty(&iter)) {
9867
9868                if (!cnt)
9869                        printk(KERN_TRACE "---------------------------------\n");
9870
9871                cnt++;
9872
9873                trace_iterator_reset(&iter);
9874                iter.iter_flags |= TRACE_FILE_LAT_FMT;
9875
9876                if (trace_find_next_entry_inc(&iter) != NULL) {
9877                        int ret;
9878
9879                        ret = print_trace_line(&iter);
9880                        if (ret != TRACE_TYPE_NO_CONSUME)
9881                                trace_consume(&iter);
9882                }
9883                touch_nmi_watchdog();
9884
9885                trace_printk_seq(&iter.seq);
9886        }
9887
9888        if (!cnt)
9889                printk(KERN_TRACE "   (ftrace buffer empty)\n");
9890        else
9891                printk(KERN_TRACE "---------------------------------\n");
9892
9893 out_enable:
9894        tr->trace_flags |= old_userobj;
9895
9896        for_each_tracing_cpu(cpu) {
9897                atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9898        }
9899        atomic_dec(&dump_running);
9900        printk_nmi_direct_exit();
9901        local_irq_restore(flags);
9902}
9903EXPORT_SYMBOL_GPL(ftrace_dump);
9904
9905#define WRITE_BUFSIZE  4096
9906
9907ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9908                                size_t count, loff_t *ppos,
9909                                int (*createfn)(const char *))
9910{
9911        char *kbuf, *buf, *tmp;
9912        int ret = 0;
9913        size_t done = 0;
9914        size_t size;
9915
9916        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9917        if (!kbuf)
9918                return -ENOMEM;
9919
9920        while (done < count) {
9921                size = count - done;
9922
9923                if (size >= WRITE_BUFSIZE)
9924                        size = WRITE_BUFSIZE - 1;
9925
9926                if (copy_from_user(kbuf, buffer + done, size)) {
9927                        ret = -EFAULT;
9928                        goto out;
9929                }
9930                kbuf[size] = '\0';
9931                buf = kbuf;
9932                do {
9933                        tmp = strchr(buf, '\n');
9934                        if (tmp) {
9935                                *tmp = '\0';
9936                                size = tmp - buf + 1;
9937                        } else {
9938                                size = strlen(buf);
9939                                if (done + size < count) {
9940                                        if (buf != kbuf)
9941                                                break;
9942                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9943                                        pr_warn("Line length is too long: Should be less than %d\n",
9944                                                WRITE_BUFSIZE - 2);
9945                                        ret = -EINVAL;
9946                                        goto out;
9947                                }
9948                        }
9949                        done += size;
9950
9951                        /* Remove comments */
9952                        tmp = strchr(buf, '#');
9953
9954                        if (tmp)
9955                                *tmp = '\0';
9956
9957                        ret = createfn(buf);
9958                        if (ret)
9959                                goto out;
9960                        buf += size;
9961
9962                } while (done < count);
9963        }
9964        ret = done;
9965
9966out:
9967        kfree(kbuf);
9968
9969        return ret;
9970}
9971
9972__init static int tracer_alloc_buffers(void)
9973{
9974        int ring_buf_size;
9975        int ret = -ENOMEM;
9976
9977
9978        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9979                pr_warn("Tracing disabled due to lockdown\n");
9980                return -EPERM;
9981        }
9982
9983        /*
9984         * Make sure we don't accidentally add more trace options
9985         * than we have bits for.
9986         */
9987        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9988
9989        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9990                goto out;
9991
9992        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9993                goto out_free_buffer_mask;
9994
9995        /* Only allocate trace_printk buffers if a trace_printk exists */
9996        if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9997                /* Must be called before global_trace.buffer is allocated */
9998                trace_printk_init_buffers();
9999
10000        /* To save memory, keep the ring buffer size to its minimum */
10001        if (ring_buffer_expanded)
10002                ring_buf_size = trace_buf_size;
10003        else
10004                ring_buf_size = 1;
10005
10006        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10007        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10008
10009        raw_spin_lock_init(&global_trace.start_lock);
10010
10011        /*
10012         * The prepare callbacks allocates some memory for the ring buffer. We
10013         * don't free the buffer if the CPU goes down. If we were to free
10014         * the buffer, then the user would lose any trace that was in the
10015         * buffer. The memory will be removed once the "instance" is removed.
10016         */
10017        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10018                                      "trace/RB:preapre", trace_rb_cpu_prepare,
10019                                      NULL);
10020        if (ret < 0)
10021                goto out_free_cpumask;
10022        /* Used for event triggers */
10023        ret = -ENOMEM;
10024        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10025        if (!temp_buffer)
10026                goto out_rm_hp_state;
10027
10028        if (trace_create_savedcmd() < 0)
10029                goto out_free_temp_buffer;
10030
10031        /* TODO: make the number of buffers hot pluggable with CPUS */
10032        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10033                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10034                goto out_free_savedcmd;
10035        }
10036
10037        if (global_trace.buffer_disabled)
10038                tracing_off();
10039
10040        if (trace_boot_clock) {
10041                ret = tracing_set_clock(&global_trace, trace_boot_clock);
10042                if (ret < 0)
10043                        pr_warn("Trace clock %s not defined, going back to default\n",
10044                                trace_boot_clock);
10045        }
10046
10047        /*
10048         * register_tracer() might reference current_trace, so it
10049         * needs to be set before we register anything. This is
10050         * just a bootstrap of current_trace anyway.
10051         */
10052        global_trace.current_trace = &nop_trace;
10053
10054        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10055
10056        ftrace_init_global_array_ops(&global_trace);
10057
10058        init_trace_flags_index(&global_trace);
10059
10060        register_tracer(&nop_trace);
10061
10062        /* Function tracing may start here (via kernel command line) */
10063        init_function_trace();
10064
10065        /* All seems OK, enable tracing */
10066        tracing_disabled = 0;
10067
10068        atomic_notifier_chain_register(&panic_notifier_list,
10069                                       &trace_panic_notifier);
10070
10071        register_die_notifier(&trace_die_notifier);
10072
10073        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10074
10075        INIT_LIST_HEAD(&global_trace.systems);
10076        INIT_LIST_HEAD(&global_trace.events);
10077        INIT_LIST_HEAD(&global_trace.hist_vars);
10078        INIT_LIST_HEAD(&global_trace.err_log);
10079        list_add(&global_trace.list, &ftrace_trace_arrays);
10080
10081        apply_trace_boot_options();
10082
10083        register_snapshot_cmd();
10084
10085        test_can_verify();
10086
10087        return 0;
10088
10089out_free_savedcmd:
10090        free_saved_cmdlines_buffer(savedcmd);
10091out_free_temp_buffer:
10092        ring_buffer_free(temp_buffer);
10093out_rm_hp_state:
10094        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10095out_free_cpumask:
10096        free_cpumask_var(global_trace.tracing_cpumask);
10097out_free_buffer_mask:
10098        free_cpumask_var(tracing_buffer_mask);
10099out:
10100        return ret;
10101}
10102
10103void __init early_trace_init(void)
10104{
10105        if (tracepoint_printk) {
10106                tracepoint_print_iter =
10107                        kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10108                if (MEM_FAIL(!tracepoint_print_iter,
10109                             "Failed to allocate trace iterator\n"))
10110                        tracepoint_printk = 0;
10111                else
10112                        static_key_enable(&tracepoint_printk_key.key);
10113        }
10114        tracer_alloc_buffers();
10115}
10116
10117void __init trace_init(void)
10118{
10119        trace_event_init();
10120}
10121
10122__init static void clear_boot_tracer(void)
10123{
10124        /*
10125         * The default tracer at boot buffer is an init section.
10126         * This function is called in lateinit. If we did not
10127         * find the boot tracer, then clear it out, to prevent
10128         * later registration from accessing the buffer that is
10129         * about to be freed.
10130         */
10131        if (!default_bootup_tracer)
10132                return;
10133
10134        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10135               default_bootup_tracer);
10136        default_bootup_tracer = NULL;
10137}
10138
10139#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10140__init static void tracing_set_default_clock(void)
10141{
10142        /* sched_clock_stable() is determined in late_initcall */
10143        if (!trace_boot_clock && !sched_clock_stable()) {
10144                if (security_locked_down(LOCKDOWN_TRACEFS)) {
10145                        pr_warn("Can not set tracing clock due to lockdown\n");
10146                        return;
10147                }
10148
10149                printk(KERN_WARNING
10150                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10151                       "If you want to keep using the local clock, then add:\n"
10152                       "  \"trace_clock=local\"\n"
10153                       "on the kernel command line\n");
10154                tracing_set_clock(&global_trace, "global");
10155        }
10156}
10157#else
10158static inline void tracing_set_default_clock(void) { }
10159#endif
10160
10161__init static int late_trace_init(void)
10162{
10163        if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10164                static_key_disable(&tracepoint_printk_key.key);
10165                tracepoint_printk = 0;
10166        }
10167
10168        tracing_set_default_clock();
10169        clear_boot_tracer();
10170        return 0;
10171}
10172
10173late_initcall_sync(late_trace_init);
10174