linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/security.h>
  21#include <linux/seq_file.h>
  22#include <linux/notifier.h>
  23#include <linux/irqflags.h>
  24#include <linux/debugfs.h>
  25#include <linux/tracefs.h>
  26#include <linux/pagemap.h>
  27#include <linux/hardirq.h>
  28#include <linux/linkage.h>
  29#include <linux/uaccess.h>
  30#include <linux/vmalloc.h>
  31#include <linux/ftrace.h>
  32#include <linux/module.h>
  33#include <linux/percpu.h>
  34#include <linux/splice.h>
  35#include <linux/kdebug.h>
  36#include <linux/string.h>
  37#include <linux/mount.h>
  38#include <linux/rwsem.h>
  39#include <linux/slab.h>
  40#include <linux/ctype.h>
  41#include <linux/init.h>
  42#include <linux/panic_notifier.h>
  43#include <linux/poll.h>
  44#include <linux/nmi.h>
  45#include <linux/fs.h>
  46#include <linux/trace.h>
  47#include <linux/sched/clock.h>
  48#include <linux/sched/rt.h>
  49#include <linux/fsnotify.h>
  50#include <linux/irq_work.h>
  51#include <linux/workqueue.h>
  52
  53#include "trace.h"
  54#include "trace_output.h"
  55
  56/*
  57 * On boot up, the ring buffer is set to the minimum size, so that
  58 * we do not waste memory on systems that are not using tracing.
  59 */
  60bool ring_buffer_expanded;
  61
  62/*
  63 * We need to change this state when a selftest is running.
  64 * A selftest will lurk into the ring-buffer to count the
  65 * entries inserted during the selftest although some concurrent
  66 * insertions into the ring-buffer such as trace_printk could occurred
  67 * at the same time, giving false positive or negative results.
  68 */
  69static bool __read_mostly tracing_selftest_running;
  70
  71/*
  72 * If boot-time tracing including tracers/events via kernel cmdline
  73 * is running, we do not want to run SELFTEST.
  74 */
  75bool __read_mostly tracing_selftest_disabled;
  76
  77#ifdef CONFIG_FTRACE_STARTUP_TEST
  78void __init disable_tracing_selftest(const char *reason)
  79{
  80        if (!tracing_selftest_disabled) {
  81                tracing_selftest_disabled = true;
  82                pr_info("Ftrace startup test is disabled due to %s\n", reason);
  83        }
  84}
  85#endif
  86
  87/* Pipe tracepoints to printk */
  88struct trace_iterator *tracepoint_print_iter;
  89int tracepoint_printk;
  90static bool tracepoint_printk_stop_on_boot __initdata;
  91static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  92
  93/* For tracers that don't implement custom flags */
  94static struct tracer_opt dummy_tracer_opt[] = {
  95        { }
  96};
  97
  98static int
  99dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 100{
 101        return 0;
 102}
 103
 104/*
 105 * To prevent the comm cache from being overwritten when no
 106 * tracing is active, only save the comm when a trace event
 107 * occurred.
 108 */
 109static DEFINE_PER_CPU(bool, trace_taskinfo_save);
 110
 111/*
 112 * Kill all tracing for good (never come back).
 113 * It is initialized to 1 but will turn to zero if the initialization
 114 * of the tracer is successful. But that is the only place that sets
 115 * this back to zero.
 116 */
 117static int tracing_disabled = 1;
 118
 119cpumask_var_t __read_mostly     tracing_buffer_mask;
 120
 121/*
 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 123 *
 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 125 * is set, then ftrace_dump is called. This will output the contents
 126 * of the ftrace buffers to the console.  This is very useful for
 127 * capturing traces that lead to crashes and outputing it to a
 128 * serial console.
 129 *
 130 * It is default off, but you can enable it with either specifying
 131 * "ftrace_dump_on_oops" in the kernel command line, or setting
 132 * /proc/sys/kernel/ftrace_dump_on_oops
 133 * Set 1 if you want to dump buffers of all CPUs
 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 135 */
 136
 137enum ftrace_dump_mode ftrace_dump_on_oops;
 138
 139/* When set, tracing will stop when a WARN*() is hit */
 140int __disable_trace_on_warning;
 141
 142#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 143/* Map of enums to their values, for "eval_map" file */
 144struct trace_eval_map_head {
 145        struct module                   *mod;
 146        unsigned long                   length;
 147};
 148
 149union trace_eval_map_item;
 150
 151struct trace_eval_map_tail {
 152        /*
 153         * "end" is first and points to NULL as it must be different
 154         * than "mod" or "eval_string"
 155         */
 156        union trace_eval_map_item       *next;
 157        const char                      *end;   /* points to NULL */
 158};
 159
 160static DEFINE_MUTEX(trace_eval_mutex);
 161
 162/*
 163 * The trace_eval_maps are saved in an array with two extra elements,
 164 * one at the beginning, and one at the end. The beginning item contains
 165 * the count of the saved maps (head.length), and the module they
 166 * belong to if not built in (head.mod). The ending item contains a
 167 * pointer to the next array of saved eval_map items.
 168 */
 169union trace_eval_map_item {
 170        struct trace_eval_map           map;
 171        struct trace_eval_map_head      head;
 172        struct trace_eval_map_tail      tail;
 173};
 174
 175static union trace_eval_map_item *trace_eval_maps;
 176#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 177
 178int tracing_set_tracer(struct trace_array *tr, const char *buf);
 179static void ftrace_trace_userstack(struct trace_array *tr,
 180                                   struct trace_buffer *buffer,
 181                                   unsigned int trace_ctx);
 182
 183#define MAX_TRACER_SIZE         100
 184static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 185static char *default_bootup_tracer;
 186
 187static bool allocate_snapshot;
 188static bool snapshot_at_boot;
 189
 190static int __init set_cmdline_ftrace(char *str)
 191{
 192        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 193        default_bootup_tracer = bootup_tracer_buf;
 194        /* We are using ftrace early, expand it */
 195        ring_buffer_expanded = true;
 196        return 1;
 197}
 198__setup("ftrace=", set_cmdline_ftrace);
 199
 200static int __init set_ftrace_dump_on_oops(char *str)
 201{
 202        if (*str++ != '=' || !*str || !strcmp("1", str)) {
 203                ftrace_dump_on_oops = DUMP_ALL;
 204                return 1;
 205        }
 206
 207        if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
 208                ftrace_dump_on_oops = DUMP_ORIG;
 209                return 1;
 210        }
 211
 212        return 0;
 213}
 214__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 215
 216static int __init stop_trace_on_warning(char *str)
 217{
 218        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 219                __disable_trace_on_warning = 1;
 220        return 1;
 221}
 222__setup("traceoff_on_warning", stop_trace_on_warning);
 223
 224static int __init boot_alloc_snapshot(char *str)
 225{
 226        allocate_snapshot = true;
 227        /* We also need the main ring buffer expanded */
 228        ring_buffer_expanded = true;
 229        return 1;
 230}
 231__setup("alloc_snapshot", boot_alloc_snapshot);
 232
 233
 234static int __init boot_snapshot(char *str)
 235{
 236        snapshot_at_boot = true;
 237        boot_alloc_snapshot(str);
 238        return 1;
 239}
 240__setup("ftrace_boot_snapshot", boot_snapshot);
 241
 242
 243static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 244
 245static int __init set_trace_boot_options(char *str)
 246{
 247        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 248        return 1;
 249}
 250__setup("trace_options=", set_trace_boot_options);
 251
 252static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 253static char *trace_boot_clock __initdata;
 254
 255static int __init set_trace_boot_clock(char *str)
 256{
 257        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 258        trace_boot_clock = trace_boot_clock_buf;
 259        return 1;
 260}
 261__setup("trace_clock=", set_trace_boot_clock);
 262
 263static int __init set_tracepoint_printk(char *str)
 264{
 265        /* Ignore the "tp_printk_stop_on_boot" param */
 266        if (*str == '_')
 267                return 0;
 268
 269        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 270                tracepoint_printk = 1;
 271        return 1;
 272}
 273__setup("tp_printk", set_tracepoint_printk);
 274
 275static int __init set_tracepoint_printk_stop(char *str)
 276{
 277        tracepoint_printk_stop_on_boot = true;
 278        return 1;
 279}
 280__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
 281
 282unsigned long long ns2usecs(u64 nsec)
 283{
 284        nsec += 500;
 285        do_div(nsec, 1000);
 286        return nsec;
 287}
 288
 289static void
 290trace_process_export(struct trace_export *export,
 291               struct ring_buffer_event *event, int flag)
 292{
 293        struct trace_entry *entry;
 294        unsigned int size = 0;
 295
 296        if (export->flags & flag) {
 297                entry = ring_buffer_event_data(event);
 298                size = ring_buffer_event_length(event);
 299                export->write(export, entry, size);
 300        }
 301}
 302
 303static DEFINE_MUTEX(ftrace_export_lock);
 304
 305static struct trace_export __rcu *ftrace_exports_list __read_mostly;
 306
 307static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
 308static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
 309static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
 310
 311static inline void ftrace_exports_enable(struct trace_export *export)
 312{
 313        if (export->flags & TRACE_EXPORT_FUNCTION)
 314                static_branch_inc(&trace_function_exports_enabled);
 315
 316        if (export->flags & TRACE_EXPORT_EVENT)
 317                static_branch_inc(&trace_event_exports_enabled);
 318
 319        if (export->flags & TRACE_EXPORT_MARKER)
 320                static_branch_inc(&trace_marker_exports_enabled);
 321}
 322
 323static inline void ftrace_exports_disable(struct trace_export *export)
 324{
 325        if (export->flags & TRACE_EXPORT_FUNCTION)
 326                static_branch_dec(&trace_function_exports_enabled);
 327
 328        if (export->flags & TRACE_EXPORT_EVENT)
 329                static_branch_dec(&trace_event_exports_enabled);
 330
 331        if (export->flags & TRACE_EXPORT_MARKER)
 332                static_branch_dec(&trace_marker_exports_enabled);
 333}
 334
 335static void ftrace_exports(struct ring_buffer_event *event, int flag)
 336{
 337        struct trace_export *export;
 338
 339        preempt_disable_notrace();
 340
 341        export = rcu_dereference_raw_check(ftrace_exports_list);
 342        while (export) {
 343                trace_process_export(export, event, flag);
 344                export = rcu_dereference_raw_check(export->next);
 345        }
 346
 347        preempt_enable_notrace();
 348}
 349
 350static inline void
 351add_trace_export(struct trace_export **list, struct trace_export *export)
 352{
 353        rcu_assign_pointer(export->next, *list);
 354        /*
 355         * We are entering export into the list but another
 356         * CPU might be walking that list. We need to make sure
 357         * the export->next pointer is valid before another CPU sees
 358         * the export pointer included into the list.
 359         */
 360        rcu_assign_pointer(*list, export);
 361}
 362
 363static inline int
 364rm_trace_export(struct trace_export **list, struct trace_export *export)
 365{
 366        struct trace_export **p;
 367
 368        for (p = list; *p != NULL; p = &(*p)->next)
 369                if (*p == export)
 370                        break;
 371
 372        if (*p != export)
 373                return -1;
 374
 375        rcu_assign_pointer(*p, (*p)->next);
 376
 377        return 0;
 378}
 379
 380static inline void
 381add_ftrace_export(struct trace_export **list, struct trace_export *export)
 382{
 383        ftrace_exports_enable(export);
 384
 385        add_trace_export(list, export);
 386}
 387
 388static inline int
 389rm_ftrace_export(struct trace_export **list, struct trace_export *export)
 390{
 391        int ret;
 392
 393        ret = rm_trace_export(list, export);
 394        ftrace_exports_disable(export);
 395
 396        return ret;
 397}
 398
 399int register_ftrace_export(struct trace_export *export)
 400{
 401        if (WARN_ON_ONCE(!export->write))
 402                return -1;
 403
 404        mutex_lock(&ftrace_export_lock);
 405
 406        add_ftrace_export(&ftrace_exports_list, export);
 407
 408        mutex_unlock(&ftrace_export_lock);
 409
 410        return 0;
 411}
 412EXPORT_SYMBOL_GPL(register_ftrace_export);
 413
 414int unregister_ftrace_export(struct trace_export *export)
 415{
 416        int ret;
 417
 418        mutex_lock(&ftrace_export_lock);
 419
 420        ret = rm_ftrace_export(&ftrace_exports_list, export);
 421
 422        mutex_unlock(&ftrace_export_lock);
 423
 424        return ret;
 425}
 426EXPORT_SYMBOL_GPL(unregister_ftrace_export);
 427
 428/* trace_flags holds trace_options default values */
 429#define TRACE_DEFAULT_FLAGS                                             \
 430        (FUNCTION_DEFAULT_FLAGS |                                       \
 431         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 432         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 433         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 434         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
 435         TRACE_ITER_HASH_PTR)
 436
 437/* trace_options that are only supported by global_trace */
 438#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 439               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 440
 441/* trace_flags that are default zero for instances */
 442#define ZEROED_TRACE_FLAGS \
 443        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 444
 445/*
 446 * The global_trace is the descriptor that holds the top-level tracing
 447 * buffers for the live tracing.
 448 */
 449static struct trace_array global_trace = {
 450        .trace_flags = TRACE_DEFAULT_FLAGS,
 451};
 452
 453LIST_HEAD(ftrace_trace_arrays);
 454
 455int trace_array_get(struct trace_array *this_tr)
 456{
 457        struct trace_array *tr;
 458        int ret = -ENODEV;
 459
 460        mutex_lock(&trace_types_lock);
 461        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 462                if (tr == this_tr) {
 463                        tr->ref++;
 464                        ret = 0;
 465                        break;
 466                }
 467        }
 468        mutex_unlock(&trace_types_lock);
 469
 470        return ret;
 471}
 472
 473static void __trace_array_put(struct trace_array *this_tr)
 474{
 475        WARN_ON(!this_tr->ref);
 476        this_tr->ref--;
 477}
 478
 479/**
 480 * trace_array_put - Decrement the reference counter for this trace array.
 481 * @this_tr : pointer to the trace array
 482 *
 483 * NOTE: Use this when we no longer need the trace array returned by
 484 * trace_array_get_by_name(). This ensures the trace array can be later
 485 * destroyed.
 486 *
 487 */
 488void trace_array_put(struct trace_array *this_tr)
 489{
 490        if (!this_tr)
 491                return;
 492
 493        mutex_lock(&trace_types_lock);
 494        __trace_array_put(this_tr);
 495        mutex_unlock(&trace_types_lock);
 496}
 497EXPORT_SYMBOL_GPL(trace_array_put);
 498
 499int tracing_check_open_get_tr(struct trace_array *tr)
 500{
 501        int ret;
 502
 503        ret = security_locked_down(LOCKDOWN_TRACEFS);
 504        if (ret)
 505                return ret;
 506
 507        if (tracing_disabled)
 508                return -ENODEV;
 509
 510        if (tr && trace_array_get(tr) < 0)
 511                return -ENODEV;
 512
 513        return 0;
 514}
 515
 516int call_filter_check_discard(struct trace_event_call *call, void *rec,
 517                              struct trace_buffer *buffer,
 518                              struct ring_buffer_event *event)
 519{
 520        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 521            !filter_match_preds(call->filter, rec)) {
 522                __trace_event_discard_commit(buffer, event);
 523                return 1;
 524        }
 525
 526        return 0;
 527}
 528
 529/**
 530 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 531 * @filtered_pids: The list of pids to check
 532 * @search_pid: The PID to find in @filtered_pids
 533 *
 534 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
 535 */
 536bool
 537trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 538{
 539        return trace_pid_list_is_set(filtered_pids, search_pid);
 540}
 541
 542/**
 543 * trace_ignore_this_task - should a task be ignored for tracing
 544 * @filtered_pids: The list of pids to check
 545 * @filtered_no_pids: The list of pids not to be traced
 546 * @task: The task that should be ignored if not filtered
 547 *
 548 * Checks if @task should be traced or not from @filtered_pids.
 549 * Returns true if @task should *NOT* be traced.
 550 * Returns false if @task should be traced.
 551 */
 552bool
 553trace_ignore_this_task(struct trace_pid_list *filtered_pids,
 554                       struct trace_pid_list *filtered_no_pids,
 555                       struct task_struct *task)
 556{
 557        /*
 558         * If filtered_no_pids is not empty, and the task's pid is listed
 559         * in filtered_no_pids, then return true.
 560         * Otherwise, if filtered_pids is empty, that means we can
 561         * trace all tasks. If it has content, then only trace pids
 562         * within filtered_pids.
 563         */
 564
 565        return (filtered_pids &&
 566                !trace_find_filtered_pid(filtered_pids, task->pid)) ||
 567                (filtered_no_pids &&
 568                 trace_find_filtered_pid(filtered_no_pids, task->pid));
 569}
 570
 571/**
 572 * trace_filter_add_remove_task - Add or remove a task from a pid_list
 573 * @pid_list: The list to modify
 574 * @self: The current task for fork or NULL for exit
 575 * @task: The task to add or remove
 576 *
 577 * If adding a task, if @self is defined, the task is only added if @self
 578 * is also included in @pid_list. This happens on fork and tasks should
 579 * only be added when the parent is listed. If @self is NULL, then the
 580 * @task pid will be removed from the list, which would happen on exit
 581 * of a task.
 582 */
 583void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 584                                  struct task_struct *self,
 585                                  struct task_struct *task)
 586{
 587        if (!pid_list)
 588                return;
 589
 590        /* For forks, we only add if the forking task is listed */
 591        if (self) {
 592                if (!trace_find_filtered_pid(pid_list, self->pid))
 593                        return;
 594        }
 595
 596        /* "self" is set for forks, and NULL for exits */
 597        if (self)
 598                trace_pid_list_set(pid_list, task->pid);
 599        else
 600                trace_pid_list_clear(pid_list, task->pid);
 601}
 602
 603/**
 604 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 605 * @pid_list: The pid list to show
 606 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 607 * @pos: The position of the file
 608 *
 609 * This is used by the seq_file "next" operation to iterate the pids
 610 * listed in a trace_pid_list structure.
 611 *
 612 * Returns the pid+1 as we want to display pid of zero, but NULL would
 613 * stop the iteration.
 614 */
 615void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 616{
 617        long pid = (unsigned long)v;
 618        unsigned int next;
 619
 620        (*pos)++;
 621
 622        /* pid already is +1 of the actual previous bit */
 623        if (trace_pid_list_next(pid_list, pid, &next) < 0)
 624                return NULL;
 625
 626        pid = next;
 627
 628        /* Return pid + 1 to allow zero to be represented */
 629        return (void *)(pid + 1);
 630}
 631
 632/**
 633 * trace_pid_start - Used for seq_file to start reading pid lists
 634 * @pid_list: The pid list to show
 635 * @pos: The position of the file
 636 *
 637 * This is used by seq_file "start" operation to start the iteration
 638 * of listing pids.
 639 *
 640 * Returns the pid+1 as we want to display pid of zero, but NULL would
 641 * stop the iteration.
 642 */
 643void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 644{
 645        unsigned long pid;
 646        unsigned int first;
 647        loff_t l = 0;
 648
 649        if (trace_pid_list_first(pid_list, &first) < 0)
 650                return NULL;
 651
 652        pid = first;
 653
 654        /* Return pid + 1 so that zero can be the exit value */
 655        for (pid++; pid && l < *pos;
 656             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 657                ;
 658        return (void *)pid;
 659}
 660
 661/**
 662 * trace_pid_show - show the current pid in seq_file processing
 663 * @m: The seq_file structure to write into
 664 * @v: A void pointer of the pid (+1) value to display
 665 *
 666 * Can be directly used by seq_file operations to display the current
 667 * pid value.
 668 */
 669int trace_pid_show(struct seq_file *m, void *v)
 670{
 671        unsigned long pid = (unsigned long)v - 1;
 672
 673        seq_printf(m, "%lu\n", pid);
 674        return 0;
 675}
 676
 677/* 128 should be much more than enough */
 678#define PID_BUF_SIZE            127
 679
 680int trace_pid_write(struct trace_pid_list *filtered_pids,
 681                    struct trace_pid_list **new_pid_list,
 682                    const char __user *ubuf, size_t cnt)
 683{
 684        struct trace_pid_list *pid_list;
 685        struct trace_parser parser;
 686        unsigned long val;
 687        int nr_pids = 0;
 688        ssize_t read = 0;
 689        ssize_t ret;
 690        loff_t pos;
 691        pid_t pid;
 692
 693        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 694                return -ENOMEM;
 695
 696        /*
 697         * Always recreate a new array. The write is an all or nothing
 698         * operation. Always create a new array when adding new pids by
 699         * the user. If the operation fails, then the current list is
 700         * not modified.
 701         */
 702        pid_list = trace_pid_list_alloc();
 703        if (!pid_list) {
 704                trace_parser_put(&parser);
 705                return -ENOMEM;
 706        }
 707
 708        if (filtered_pids) {
 709                /* copy the current bits to the new max */
 710                ret = trace_pid_list_first(filtered_pids, &pid);
 711                while (!ret) {
 712                        trace_pid_list_set(pid_list, pid);
 713                        ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
 714                        nr_pids++;
 715                }
 716        }
 717
 718        ret = 0;
 719        while (cnt > 0) {
 720
 721                pos = 0;
 722
 723                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 724                if (ret < 0)
 725                        break;
 726
 727                read += ret;
 728                ubuf += ret;
 729                cnt -= ret;
 730
 731                if (!trace_parser_loaded(&parser))
 732                        break;
 733
 734                ret = -EINVAL;
 735                if (kstrtoul(parser.buffer, 0, &val))
 736                        break;
 737
 738                pid = (pid_t)val;
 739
 740                if (trace_pid_list_set(pid_list, pid) < 0) {
 741                        ret = -1;
 742                        break;
 743                }
 744                nr_pids++;
 745
 746                trace_parser_clear(&parser);
 747                ret = 0;
 748        }
 749        trace_parser_put(&parser);
 750
 751        if (ret < 0) {
 752                trace_pid_list_free(pid_list);
 753                return ret;
 754        }
 755
 756        if (!nr_pids) {
 757                /* Cleared the list of pids */
 758                trace_pid_list_free(pid_list);
 759                pid_list = NULL;
 760        }
 761
 762        *new_pid_list = pid_list;
 763
 764        return read;
 765}
 766
 767static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 768{
 769        u64 ts;
 770
 771        /* Early boot up does not have a buffer yet */
 772        if (!buf->buffer)
 773                return trace_clock_local();
 774
 775        ts = ring_buffer_time_stamp(buf->buffer);
 776        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 777
 778        return ts;
 779}
 780
 781u64 ftrace_now(int cpu)
 782{
 783        return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 784}
 785
 786/**
 787 * tracing_is_enabled - Show if global_trace has been enabled
 788 *
 789 * Shows if the global trace has been enabled or not. It uses the
 790 * mirror flag "buffer_disabled" to be used in fast paths such as for
 791 * the irqsoff tracer. But it may be inaccurate due to races. If you
 792 * need to know the accurate state, use tracing_is_on() which is a little
 793 * slower, but accurate.
 794 */
 795int tracing_is_enabled(void)
 796{
 797        /*
 798         * For quick access (irqsoff uses this in fast path), just
 799         * return the mirror variable of the state of the ring buffer.
 800         * It's a little racy, but we don't really care.
 801         */
 802        smp_rmb();
 803        return !global_trace.buffer_disabled;
 804}
 805
 806/*
 807 * trace_buf_size is the size in bytes that is allocated
 808 * for a buffer. Note, the number of bytes is always rounded
 809 * to page size.
 810 *
 811 * This number is purposely set to a low number of 16384.
 812 * If the dump on oops happens, it will be much appreciated
 813 * to not have to wait for all that output. Anyway this can be
 814 * boot time and run time configurable.
 815 */
 816#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 817
 818static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 819
 820/* trace_types holds a link list of available tracers. */
 821static struct tracer            *trace_types __read_mostly;
 822
 823/*
 824 * trace_types_lock is used to protect the trace_types list.
 825 */
 826DEFINE_MUTEX(trace_types_lock);
 827
 828/*
 829 * serialize the access of the ring buffer
 830 *
 831 * ring buffer serializes readers, but it is low level protection.
 832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 833 * are not protected by ring buffer.
 834 *
 835 * The content of events may become garbage if we allow other process consumes
 836 * these events concurrently:
 837 *   A) the page of the consumed events may become a normal page
 838 *      (not reader page) in ring buffer, and this page will be rewritten
 839 *      by events producer.
 840 *   B) The page of the consumed events may become a page for splice_read,
 841 *      and this page will be returned to system.
 842 *
 843 * These primitives allow multi process access to different cpu ring buffer
 844 * concurrently.
 845 *
 846 * These primitives don't distinguish read-only and read-consume access.
 847 * Multi read-only access are also serialized.
 848 */
 849
 850#ifdef CONFIG_SMP
 851static DECLARE_RWSEM(all_cpu_access_lock);
 852static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 853
 854static inline void trace_access_lock(int cpu)
 855{
 856        if (cpu == RING_BUFFER_ALL_CPUS) {
 857                /* gain it for accessing the whole ring buffer. */
 858                down_write(&all_cpu_access_lock);
 859        } else {
 860                /* gain it for accessing a cpu ring buffer. */
 861
 862                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 863                down_read(&all_cpu_access_lock);
 864
 865                /* Secondly block other access to this @cpu ring buffer. */
 866                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 867        }
 868}
 869
 870static inline void trace_access_unlock(int cpu)
 871{
 872        if (cpu == RING_BUFFER_ALL_CPUS) {
 873                up_write(&all_cpu_access_lock);
 874        } else {
 875                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 876                up_read(&all_cpu_access_lock);
 877        }
 878}
 879
 880static inline void trace_access_lock_init(void)
 881{
 882        int cpu;
 883
 884        for_each_possible_cpu(cpu)
 885                mutex_init(&per_cpu(cpu_access_lock, cpu));
 886}
 887
 888#else
 889
 890static DEFINE_MUTEX(access_lock);
 891
 892static inline void trace_access_lock(int cpu)
 893{
 894        (void)cpu;
 895        mutex_lock(&access_lock);
 896}
 897
 898static inline void trace_access_unlock(int cpu)
 899{
 900        (void)cpu;
 901        mutex_unlock(&access_lock);
 902}
 903
 904static inline void trace_access_lock_init(void)
 905{
 906}
 907
 908#endif
 909
 910#ifdef CONFIG_STACKTRACE
 911static void __ftrace_trace_stack(struct trace_buffer *buffer,
 912                                 unsigned int trace_ctx,
 913                                 int skip, struct pt_regs *regs);
 914static inline void ftrace_trace_stack(struct trace_array *tr,
 915                                      struct trace_buffer *buffer,
 916                                      unsigned int trace_ctx,
 917                                      int skip, struct pt_regs *regs);
 918
 919#else
 920static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 921                                        unsigned int trace_ctx,
 922                                        int skip, struct pt_regs *regs)
 923{
 924}
 925static inline void ftrace_trace_stack(struct trace_array *tr,
 926                                      struct trace_buffer *buffer,
 927                                      unsigned long trace_ctx,
 928                                      int skip, struct pt_regs *regs)
 929{
 930}
 931
 932#endif
 933
 934static __always_inline void
 935trace_event_setup(struct ring_buffer_event *event,
 936                  int type, unsigned int trace_ctx)
 937{
 938        struct trace_entry *ent = ring_buffer_event_data(event);
 939
 940        tracing_generic_entry_update(ent, type, trace_ctx);
 941}
 942
 943static __always_inline struct ring_buffer_event *
 944__trace_buffer_lock_reserve(struct trace_buffer *buffer,
 945                          int type,
 946                          unsigned long len,
 947                          unsigned int trace_ctx)
 948{
 949        struct ring_buffer_event *event;
 950
 951        event = ring_buffer_lock_reserve(buffer, len);
 952        if (event != NULL)
 953                trace_event_setup(event, type, trace_ctx);
 954
 955        return event;
 956}
 957
 958void tracer_tracing_on(struct trace_array *tr)
 959{
 960        if (tr->array_buffer.buffer)
 961                ring_buffer_record_on(tr->array_buffer.buffer);
 962        /*
 963         * This flag is looked at when buffers haven't been allocated
 964         * yet, or by some tracers (like irqsoff), that just want to
 965         * know if the ring buffer has been disabled, but it can handle
 966         * races of where it gets disabled but we still do a record.
 967         * As the check is in the fast path of the tracers, it is more
 968         * important to be fast than accurate.
 969         */
 970        tr->buffer_disabled = 0;
 971        /* Make the flag seen by readers */
 972        smp_wmb();
 973}
 974
 975/**
 976 * tracing_on - enable tracing buffers
 977 *
 978 * This function enables tracing buffers that may have been
 979 * disabled with tracing_off.
 980 */
 981void tracing_on(void)
 982{
 983        tracer_tracing_on(&global_trace);
 984}
 985EXPORT_SYMBOL_GPL(tracing_on);
 986
 987
 988static __always_inline void
 989__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 990{
 991        __this_cpu_write(trace_taskinfo_save, true);
 992
 993        /* If this is the temp buffer, we need to commit fully */
 994        if (this_cpu_read(trace_buffered_event) == event) {
 995                /* Length is in event->array[0] */
 996                ring_buffer_write(buffer, event->array[0], &event->array[1]);
 997                /* Release the temp buffer */
 998                this_cpu_dec(trace_buffered_event_cnt);
 999                /* ring_buffer_unlock_commit() enables preemption */
1000                preempt_enable_notrace();
1001        } else
1002                ring_buffer_unlock_commit(buffer, event);
1003}
1004
1005/**
1006 * __trace_puts - write a constant string into the trace buffer.
1007 * @ip:    The address of the caller
1008 * @str:   The constant string to write
1009 * @size:  The size of the string.
1010 */
1011int __trace_puts(unsigned long ip, const char *str, int size)
1012{
1013        struct ring_buffer_event *event;
1014        struct trace_buffer *buffer;
1015        struct print_entry *entry;
1016        unsigned int trace_ctx;
1017        int alloc;
1018
1019        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                return 0;
1021
1022        if (unlikely(tracing_selftest_running || tracing_disabled))
1023                return 0;
1024
1025        alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027        trace_ctx = tracing_gen_ctx();
1028        buffer = global_trace.array_buffer.buffer;
1029        ring_buffer_nest_start(buffer);
1030        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                            trace_ctx);
1032        if (!event) {
1033                size = 0;
1034                goto out;
1035        }
1036
1037        entry = ring_buffer_event_data(event);
1038        entry->ip = ip;
1039
1040        memcpy(&entry->buf, str, size);
1041
1042        /* Add a newline if necessary */
1043        if (entry->buf[size - 1] != '\n') {
1044                entry->buf[size] = '\n';
1045                entry->buf[size + 1] = '\0';
1046        } else
1047                entry->buf[size] = '\0';
1048
1049        __buffer_unlock_commit(buffer, event);
1050        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051 out:
1052        ring_buffer_nest_end(buffer);
1053        return size;
1054}
1055EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057/**
1058 * __trace_bputs - write the pointer to a constant string into trace buffer
1059 * @ip:    The address of the caller
1060 * @str:   The constant string to write to the buffer to
1061 */
1062int __trace_bputs(unsigned long ip, const char *str)
1063{
1064        struct ring_buffer_event *event;
1065        struct trace_buffer *buffer;
1066        struct bputs_entry *entry;
1067        unsigned int trace_ctx;
1068        int size = sizeof(struct bputs_entry);
1069        int ret = 0;
1070
1071        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                return 0;
1073
1074        if (unlikely(tracing_selftest_running || tracing_disabled))
1075                return 0;
1076
1077        trace_ctx = tracing_gen_ctx();
1078        buffer = global_trace.array_buffer.buffer;
1079
1080        ring_buffer_nest_start(buffer);
1081        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                            trace_ctx);
1083        if (!event)
1084                goto out;
1085
1086        entry = ring_buffer_event_data(event);
1087        entry->ip                       = ip;
1088        entry->str                      = str;
1089
1090        __buffer_unlock_commit(buffer, event);
1091        ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093        ret = 1;
1094 out:
1095        ring_buffer_nest_end(buffer);
1096        return ret;
1097}
1098EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100#ifdef CONFIG_TRACER_SNAPSHOT
1101static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                           void *cond_data)
1103{
1104        struct tracer *tracer = tr->current_trace;
1105        unsigned long flags;
1106
1107        if (in_nmi()) {
1108                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                return;
1111        }
1112
1113        if (!tr->allocated_snapshot) {
1114                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                internal_trace_puts("*** stopping trace here!   ***\n");
1116                tracing_off();
1117                return;
1118        }
1119
1120        /* Note, snapshot can not be used when the tracer uses it */
1121        if (tracer->use_max_tr) {
1122                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                return;
1125        }
1126
1127        local_irq_save(flags);
1128        update_max_tr(tr, current, smp_processor_id(), cond_data);
1129        local_irq_restore(flags);
1130}
1131
1132void tracing_snapshot_instance(struct trace_array *tr)
1133{
1134        tracing_snapshot_instance_cond(tr, NULL);
1135}
1136
1137/**
1138 * tracing_snapshot - take a snapshot of the current buffer.
1139 *
1140 * This causes a swap between the snapshot buffer and the current live
1141 * tracing buffer. You can use this to take snapshots of the live
1142 * trace when some condition is triggered, but continue to trace.
1143 *
1144 * Note, make sure to allocate the snapshot with either
1145 * a tracing_snapshot_alloc(), or by doing it manually
1146 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147 *
1148 * If the snapshot buffer is not allocated, it will stop tracing.
1149 * Basically making a permanent snapshot.
1150 */
1151void tracing_snapshot(void)
1152{
1153        struct trace_array *tr = &global_trace;
1154
1155        tracing_snapshot_instance(tr);
1156}
1157EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159/**
1160 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161 * @tr:         The tracing instance to snapshot
1162 * @cond_data:  The data to be tested conditionally, and possibly saved
1163 *
1164 * This is the same as tracing_snapshot() except that the snapshot is
1165 * conditional - the snapshot will only happen if the
1166 * cond_snapshot.update() implementation receiving the cond_data
1167 * returns true, which means that the trace array's cond_snapshot
1168 * update() operation used the cond_data to determine whether the
1169 * snapshot should be taken, and if it was, presumably saved it along
1170 * with the snapshot.
1171 */
1172void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173{
1174        tracing_snapshot_instance_cond(tr, cond_data);
1175}
1176EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178/**
1179 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180 * @tr:         The tracing instance
1181 *
1182 * When the user enables a conditional snapshot using
1183 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184 * with the snapshot.  This accessor is used to retrieve it.
1185 *
1186 * Should not be called from cond_snapshot.update(), since it takes
1187 * the tr->max_lock lock, which the code calling
1188 * cond_snapshot.update() has already done.
1189 *
1190 * Returns the cond_data associated with the trace array's snapshot.
1191 */
1192void *tracing_cond_snapshot_data(struct trace_array *tr)
1193{
1194        void *cond_data = NULL;
1195
1196        arch_spin_lock(&tr->max_lock);
1197
1198        if (tr->cond_snapshot)
1199                cond_data = tr->cond_snapshot->cond_data;
1200
1201        arch_spin_unlock(&tr->max_lock);
1202
1203        return cond_data;
1204}
1205EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206
1207static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208                                        struct array_buffer *size_buf, int cpu_id);
1209static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210
1211int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212{
1213        int ret;
1214
1215        if (!tr->allocated_snapshot) {
1216
1217                /* allocate spare buffer */
1218                ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220                if (ret < 0)
1221                        return ret;
1222
1223                tr->allocated_snapshot = true;
1224        }
1225
1226        return 0;
1227}
1228
1229static void free_snapshot(struct trace_array *tr)
1230{
1231        /*
1232         * We don't free the ring buffer. instead, resize it because
1233         * The max_tr ring buffer has some state (e.g. ring->clock) and
1234         * we want preserve it.
1235         */
1236        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237        set_buffer_entries(&tr->max_buffer, 1);
1238        tracing_reset_online_cpus(&tr->max_buffer);
1239        tr->allocated_snapshot = false;
1240}
1241
1242/**
1243 * tracing_alloc_snapshot - allocate snapshot buffer.
1244 *
1245 * This only allocates the snapshot buffer if it isn't already
1246 * allocated - it doesn't also take a snapshot.
1247 *
1248 * This is meant to be used in cases where the snapshot buffer needs
1249 * to be set up for events that can't sleep but need to be able to
1250 * trigger a snapshot.
1251 */
1252int tracing_alloc_snapshot(void)
1253{
1254        struct trace_array *tr = &global_trace;
1255        int ret;
1256
1257        ret = tracing_alloc_snapshot_instance(tr);
1258        WARN_ON(ret < 0);
1259
1260        return ret;
1261}
1262EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263
1264/**
1265 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266 *
1267 * This is similar to tracing_snapshot(), but it will allocate the
1268 * snapshot buffer if it isn't already allocated. Use this only
1269 * where it is safe to sleep, as the allocation may sleep.
1270 *
1271 * This causes a swap between the snapshot buffer and the current live
1272 * tracing buffer. You can use this to take snapshots of the live
1273 * trace when some condition is triggered, but continue to trace.
1274 */
1275void tracing_snapshot_alloc(void)
1276{
1277        int ret;
1278
1279        ret = tracing_alloc_snapshot();
1280        if (ret < 0)
1281                return;
1282
1283        tracing_snapshot();
1284}
1285EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286
1287/**
1288 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289 * @tr:         The tracing instance
1290 * @cond_data:  User data to associate with the snapshot
1291 * @update:     Implementation of the cond_snapshot update function
1292 *
1293 * Check whether the conditional snapshot for the given instance has
1294 * already been enabled, or if the current tracer is already using a
1295 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296 * save the cond_data and update function inside.
1297 *
1298 * Returns 0 if successful, error otherwise.
1299 */
1300int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301                                 cond_update_fn_t update)
1302{
1303        struct cond_snapshot *cond_snapshot;
1304        int ret = 0;
1305
1306        cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307        if (!cond_snapshot)
1308                return -ENOMEM;
1309
1310        cond_snapshot->cond_data = cond_data;
1311        cond_snapshot->update = update;
1312
1313        mutex_lock(&trace_types_lock);
1314
1315        ret = tracing_alloc_snapshot_instance(tr);
1316        if (ret)
1317                goto fail_unlock;
1318
1319        if (tr->current_trace->use_max_tr) {
1320                ret = -EBUSY;
1321                goto fail_unlock;
1322        }
1323
1324        /*
1325         * The cond_snapshot can only change to NULL without the
1326         * trace_types_lock. We don't care if we race with it going
1327         * to NULL, but we want to make sure that it's not set to
1328         * something other than NULL when we get here, which we can
1329         * do safely with only holding the trace_types_lock and not
1330         * having to take the max_lock.
1331         */
1332        if (tr->cond_snapshot) {
1333                ret = -EBUSY;
1334                goto fail_unlock;
1335        }
1336
1337        arch_spin_lock(&tr->max_lock);
1338        tr->cond_snapshot = cond_snapshot;
1339        arch_spin_unlock(&tr->max_lock);
1340
1341        mutex_unlock(&trace_types_lock);
1342
1343        return ret;
1344
1345 fail_unlock:
1346        mutex_unlock(&trace_types_lock);
1347        kfree(cond_snapshot);
1348        return ret;
1349}
1350EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351
1352/**
1353 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354 * @tr:         The tracing instance
1355 *
1356 * Check whether the conditional snapshot for the given instance is
1357 * enabled; if so, free the cond_snapshot associated with it,
1358 * otherwise return -EINVAL.
1359 *
1360 * Returns 0 if successful, error otherwise.
1361 */
1362int tracing_snapshot_cond_disable(struct trace_array *tr)
1363{
1364        int ret = 0;
1365
1366        arch_spin_lock(&tr->max_lock);
1367
1368        if (!tr->cond_snapshot)
1369                ret = -EINVAL;
1370        else {
1371                kfree(tr->cond_snapshot);
1372                tr->cond_snapshot = NULL;
1373        }
1374
1375        arch_spin_unlock(&tr->max_lock);
1376
1377        return ret;
1378}
1379EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380#else
1381void tracing_snapshot(void)
1382{
1383        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384}
1385EXPORT_SYMBOL_GPL(tracing_snapshot);
1386void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387{
1388        WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389}
1390EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1391int tracing_alloc_snapshot(void)
1392{
1393        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394        return -ENODEV;
1395}
1396EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1397void tracing_snapshot_alloc(void)
1398{
1399        /* Give warning */
1400        tracing_snapshot();
1401}
1402EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1403void *tracing_cond_snapshot_data(struct trace_array *tr)
1404{
1405        return NULL;
1406}
1407EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1408int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409{
1410        return -ENODEV;
1411}
1412EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1413int tracing_snapshot_cond_disable(struct trace_array *tr)
1414{
1415        return false;
1416}
1417EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418#endif /* CONFIG_TRACER_SNAPSHOT */
1419
1420void tracer_tracing_off(struct trace_array *tr)
1421{
1422        if (tr->array_buffer.buffer)
1423                ring_buffer_record_off(tr->array_buffer.buffer);
1424        /*
1425         * This flag is looked at when buffers haven't been allocated
1426         * yet, or by some tracers (like irqsoff), that just want to
1427         * know if the ring buffer has been disabled, but it can handle
1428         * races of where it gets disabled but we still do a record.
1429         * As the check is in the fast path of the tracers, it is more
1430         * important to be fast than accurate.
1431         */
1432        tr->buffer_disabled = 1;
1433        /* Make the flag seen by readers */
1434        smp_wmb();
1435}
1436
1437/**
1438 * tracing_off - turn off tracing buffers
1439 *
1440 * This function stops the tracing buffers from recording data.
1441 * It does not disable any overhead the tracers themselves may
1442 * be causing. This function simply causes all recording to
1443 * the ring buffers to fail.
1444 */
1445void tracing_off(void)
1446{
1447        tracer_tracing_off(&global_trace);
1448}
1449EXPORT_SYMBOL_GPL(tracing_off);
1450
1451void disable_trace_on_warning(void)
1452{
1453        if (__disable_trace_on_warning) {
1454                trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455                        "Disabling tracing due to warning\n");
1456                tracing_off();
1457        }
1458}
1459
1460/**
1461 * tracer_tracing_is_on - show real state of ring buffer enabled
1462 * @tr : the trace array to know if ring buffer is enabled
1463 *
1464 * Shows real state of the ring buffer if it is enabled or not.
1465 */
1466bool tracer_tracing_is_on(struct trace_array *tr)
1467{
1468        if (tr->array_buffer.buffer)
1469                return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470        return !tr->buffer_disabled;
1471}
1472
1473/**
1474 * tracing_is_on - show state of ring buffers enabled
1475 */
1476int tracing_is_on(void)
1477{
1478        return tracer_tracing_is_on(&global_trace);
1479}
1480EXPORT_SYMBOL_GPL(tracing_is_on);
1481
1482static int __init set_buf_size(char *str)
1483{
1484        unsigned long buf_size;
1485
1486        if (!str)
1487                return 0;
1488        buf_size = memparse(str, &str);
1489        /*
1490         * nr_entries can not be zero and the startup
1491         * tests require some buffer space. Therefore
1492         * ensure we have at least 4096 bytes of buffer.
1493         */
1494        trace_buf_size = max(4096UL, buf_size);
1495        return 1;
1496}
1497__setup("trace_buf_size=", set_buf_size);
1498
1499static int __init set_tracing_thresh(char *str)
1500{
1501        unsigned long threshold;
1502        int ret;
1503
1504        if (!str)
1505                return 0;
1506        ret = kstrtoul(str, 0, &threshold);
1507        if (ret < 0)
1508                return 0;
1509        tracing_thresh = threshold * 1000;
1510        return 1;
1511}
1512__setup("tracing_thresh=", set_tracing_thresh);
1513
1514unsigned long nsecs_to_usecs(unsigned long nsecs)
1515{
1516        return nsecs / 1000;
1517}
1518
1519/*
1520 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1521 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1522 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1523 * of strings in the order that the evals (enum) were defined.
1524 */
1525#undef C
1526#define C(a, b) b
1527
1528/* These must match the bit positions in trace_iterator_flags */
1529static const char *trace_options[] = {
1530        TRACE_FLAGS
1531        NULL
1532};
1533
1534static struct {
1535        u64 (*func)(void);
1536        const char *name;
1537        int in_ns;              /* is this clock in nanoseconds? */
1538} trace_clocks[] = {
1539        { trace_clock_local,            "local",        1 },
1540        { trace_clock_global,           "global",       1 },
1541        { trace_clock_counter,          "counter",      0 },
1542        { trace_clock_jiffies,          "uptime",       0 },
1543        { trace_clock,                  "perf",         1 },
1544        { ktime_get_mono_fast_ns,       "mono",         1 },
1545        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1546        { ktime_get_boot_fast_ns,       "boot",         1 },
1547        { ktime_get_tai_fast_ns,        "tai",          1 },
1548        ARCH_TRACE_CLOCKS
1549};
1550
1551bool trace_clock_in_ns(struct trace_array *tr)
1552{
1553        if (trace_clocks[tr->clock_id].in_ns)
1554                return true;
1555
1556        return false;
1557}
1558
1559/*
1560 * trace_parser_get_init - gets the buffer for trace parser
1561 */
1562int trace_parser_get_init(struct trace_parser *parser, int size)
1563{
1564        memset(parser, 0, sizeof(*parser));
1565
1566        parser->buffer = kmalloc(size, GFP_KERNEL);
1567        if (!parser->buffer)
1568                return 1;
1569
1570        parser->size = size;
1571        return 0;
1572}
1573
1574/*
1575 * trace_parser_put - frees the buffer for trace parser
1576 */
1577void trace_parser_put(struct trace_parser *parser)
1578{
1579        kfree(parser->buffer);
1580        parser->buffer = NULL;
1581}
1582
1583/*
1584 * trace_get_user - reads the user input string separated by  space
1585 * (matched by isspace(ch))
1586 *
1587 * For each string found the 'struct trace_parser' is updated,
1588 * and the function returns.
1589 *
1590 * Returns number of bytes read.
1591 *
1592 * See kernel/trace/trace.h for 'struct trace_parser' details.
1593 */
1594int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1595        size_t cnt, loff_t *ppos)
1596{
1597        char ch;
1598        size_t read = 0;
1599        ssize_t ret;
1600
1601        if (!*ppos)
1602                trace_parser_clear(parser);
1603
1604        ret = get_user(ch, ubuf++);
1605        if (ret)
1606                goto out;
1607
1608        read++;
1609        cnt--;
1610
1611        /*
1612         * The parser is not finished with the last write,
1613         * continue reading the user input without skipping spaces.
1614         */
1615        if (!parser->cont) {
1616                /* skip white space */
1617                while (cnt && isspace(ch)) {
1618                        ret = get_user(ch, ubuf++);
1619                        if (ret)
1620                                goto out;
1621                        read++;
1622                        cnt--;
1623                }
1624
1625                parser->idx = 0;
1626
1627                /* only spaces were written */
1628                if (isspace(ch) || !ch) {
1629                        *ppos += read;
1630                        ret = read;
1631                        goto out;
1632                }
1633        }
1634
1635        /* read the non-space input */
1636        while (cnt && !isspace(ch) && ch) {
1637                if (parser->idx < parser->size - 1)
1638                        parser->buffer[parser->idx++] = ch;
1639                else {
1640                        ret = -EINVAL;
1641                        goto out;
1642                }
1643                ret = get_user(ch, ubuf++);
1644                if (ret)
1645                        goto out;
1646                read++;
1647                cnt--;
1648        }
1649
1650        /* We either got finished input or we have to wait for another call. */
1651        if (isspace(ch) || !ch) {
1652                parser->buffer[parser->idx] = 0;
1653                parser->cont = false;
1654        } else if (parser->idx < parser->size - 1) {
1655                parser->cont = true;
1656                parser->buffer[parser->idx++] = ch;
1657                /* Make sure the parsed string always terminates with '\0'. */
1658                parser->buffer[parser->idx] = 0;
1659        } else {
1660                ret = -EINVAL;
1661                goto out;
1662        }
1663
1664        *ppos += read;
1665        ret = read;
1666
1667out:
1668        return ret;
1669}
1670
1671/* TODO add a seq_buf_to_buffer() */
1672static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1673{
1674        int len;
1675
1676        if (trace_seq_used(s) <= s->seq.readpos)
1677                return -EBUSY;
1678
1679        len = trace_seq_used(s) - s->seq.readpos;
1680        if (cnt > len)
1681                cnt = len;
1682        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1683
1684        s->seq.readpos += cnt;
1685        return cnt;
1686}
1687
1688unsigned long __read_mostly     tracing_thresh;
1689static const struct file_operations tracing_max_lat_fops;
1690
1691#ifdef LATENCY_FS_NOTIFY
1692
1693static struct workqueue_struct *fsnotify_wq;
1694
1695static void latency_fsnotify_workfn(struct work_struct *work)
1696{
1697        struct trace_array *tr = container_of(work, struct trace_array,
1698                                              fsnotify_work);
1699        fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1700}
1701
1702static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1703{
1704        struct trace_array *tr = container_of(iwork, struct trace_array,
1705                                              fsnotify_irqwork);
1706        queue_work(fsnotify_wq, &tr->fsnotify_work);
1707}
1708
1709static void trace_create_maxlat_file(struct trace_array *tr,
1710                                     struct dentry *d_tracer)
1711{
1712        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1713        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1714        tr->d_max_latency = trace_create_file("tracing_max_latency",
1715                                              TRACE_MODE_WRITE,
1716                                              d_tracer, &tr->max_latency,
1717                                              &tracing_max_lat_fops);
1718}
1719
1720__init static int latency_fsnotify_init(void)
1721{
1722        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1723                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
1724        if (!fsnotify_wq) {
1725                pr_err("Unable to allocate tr_max_lat_wq\n");
1726                return -ENOMEM;
1727        }
1728        return 0;
1729}
1730
1731late_initcall_sync(latency_fsnotify_init);
1732
1733void latency_fsnotify(struct trace_array *tr)
1734{
1735        if (!fsnotify_wq)
1736                return;
1737        /*
1738         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1739         * possible that we are called from __schedule() or do_idle(), which
1740         * could cause a deadlock.
1741         */
1742        irq_work_queue(&tr->fsnotify_irqwork);
1743}
1744
1745#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1746        || defined(CONFIG_OSNOISE_TRACER)
1747
1748#define trace_create_maxlat_file(tr, d_tracer)                          \
1749        trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1750                          d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1751
1752#else
1753#define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1754#endif
1755
1756#ifdef CONFIG_TRACER_MAX_TRACE
1757/*
1758 * Copy the new maximum trace into the separate maximum-trace
1759 * structure. (this way the maximum trace is permanently saved,
1760 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761 */
1762static void
1763__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764{
1765        struct array_buffer *trace_buf = &tr->array_buffer;
1766        struct array_buffer *max_buf = &tr->max_buffer;
1767        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769
1770        max_buf->cpu = cpu;
1771        max_buf->time_start = data->preempt_timestamp;
1772
1773        max_data->saved_latency = tr->max_latency;
1774        max_data->critical_start = data->critical_start;
1775        max_data->critical_end = data->critical_end;
1776
1777        strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778        max_data->pid = tsk->pid;
1779        /*
1780         * If tsk == current, then use current_uid(), as that does not use
1781         * RCU. The irq tracer can be called out of RCU scope.
1782         */
1783        if (tsk == current)
1784                max_data->uid = current_uid();
1785        else
1786                max_data->uid = task_uid(tsk);
1787
1788        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789        max_data->policy = tsk->policy;
1790        max_data->rt_priority = tsk->rt_priority;
1791
1792        /* record this tasks comm */
1793        tracing_record_cmdline(tsk);
1794        latency_fsnotify(tr);
1795}
1796
1797/**
1798 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799 * @tr: tracer
1800 * @tsk: the task with the latency
1801 * @cpu: The cpu that initiated the trace.
1802 * @cond_data: User data associated with a conditional snapshot
1803 *
1804 * Flip the buffers between the @tr and the max_tr and record information
1805 * about which task was the cause of this latency.
1806 */
1807void
1808update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809              void *cond_data)
1810{
1811        if (tr->stop_count)
1812                return;
1813
1814        WARN_ON_ONCE(!irqs_disabled());
1815
1816        if (!tr->allocated_snapshot) {
1817                /* Only the nop tracer should hit this when disabling */
1818                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819                return;
1820        }
1821
1822        arch_spin_lock(&tr->max_lock);
1823
1824        /* Inherit the recordable setting from array_buffer */
1825        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826                ring_buffer_record_on(tr->max_buffer.buffer);
1827        else
1828                ring_buffer_record_off(tr->max_buffer.buffer);
1829
1830#ifdef CONFIG_TRACER_SNAPSHOT
1831        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1832                goto out_unlock;
1833#endif
1834        swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1835
1836        __update_max_tr(tr, tsk, cpu);
1837
1838 out_unlock:
1839        arch_spin_unlock(&tr->max_lock);
1840}
1841
1842/**
1843 * update_max_tr_single - only copy one trace over, and reset the rest
1844 * @tr: tracer
1845 * @tsk: task with the latency
1846 * @cpu: the cpu of the buffer to copy.
1847 *
1848 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1849 */
1850void
1851update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852{
1853        int ret;
1854
1855        if (tr->stop_count)
1856                return;
1857
1858        WARN_ON_ONCE(!irqs_disabled());
1859        if (!tr->allocated_snapshot) {
1860                /* Only the nop tracer should hit this when disabling */
1861                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1862                return;
1863        }
1864
1865        arch_spin_lock(&tr->max_lock);
1866
1867        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1868
1869        if (ret == -EBUSY) {
1870                /*
1871                 * We failed to swap the buffer due to a commit taking
1872                 * place on this CPU. We fail to record, but we reset
1873                 * the max trace buffer (no one writes directly to it)
1874                 * and flag that it failed.
1875                 */
1876                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1877                        "Failed to swap buffers due to commit in progress\n");
1878        }
1879
1880        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1881
1882        __update_max_tr(tr, tsk, cpu);
1883        arch_spin_unlock(&tr->max_lock);
1884}
1885#endif /* CONFIG_TRACER_MAX_TRACE */
1886
1887static int wait_on_pipe(struct trace_iterator *iter, int full)
1888{
1889        /* Iterators are static, they should be filled or empty */
1890        if (trace_buffer_iter(iter, iter->cpu_file))
1891                return 0;
1892
1893        return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1894                                full);
1895}
1896
1897#ifdef CONFIG_FTRACE_STARTUP_TEST
1898static bool selftests_can_run;
1899
1900struct trace_selftests {
1901        struct list_head                list;
1902        struct tracer                   *type;
1903};
1904
1905static LIST_HEAD(postponed_selftests);
1906
1907static int save_selftest(struct tracer *type)
1908{
1909        struct trace_selftests *selftest;
1910
1911        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1912        if (!selftest)
1913                return -ENOMEM;
1914
1915        selftest->type = type;
1916        list_add(&selftest->list, &postponed_selftests);
1917        return 0;
1918}
1919
1920static int run_tracer_selftest(struct tracer *type)
1921{
1922        struct trace_array *tr = &global_trace;
1923        struct tracer *saved_tracer = tr->current_trace;
1924        int ret;
1925
1926        if (!type->selftest || tracing_selftest_disabled)
1927                return 0;
1928
1929        /*
1930         * If a tracer registers early in boot up (before scheduling is
1931         * initialized and such), then do not run its selftests yet.
1932         * Instead, run it a little later in the boot process.
1933         */
1934        if (!selftests_can_run)
1935                return save_selftest(type);
1936
1937        if (!tracing_is_on()) {
1938                pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939                        type->name);
1940                return 0;
1941        }
1942
1943        /*
1944         * Run a selftest on this tracer.
1945         * Here we reset the trace buffer, and set the current
1946         * tracer to be this tracer. The tracer can then run some
1947         * internal tracing to verify that everything is in order.
1948         * If we fail, we do not register this tracer.
1949         */
1950        tracing_reset_online_cpus(&tr->array_buffer);
1951
1952        tr->current_trace = type;
1953
1954#ifdef CONFIG_TRACER_MAX_TRACE
1955        if (type->use_max_tr) {
1956                /* If we expanded the buffers, make sure the max is expanded too */
1957                if (ring_buffer_expanded)
1958                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1959                                           RING_BUFFER_ALL_CPUS);
1960                tr->allocated_snapshot = true;
1961        }
1962#endif
1963
1964        /* the test is responsible for initializing and enabling */
1965        pr_info("Testing tracer %s: ", type->name);
1966        ret = type->selftest(type, tr);
1967        /* the test is responsible for resetting too */
1968        tr->current_trace = saved_tracer;
1969        if (ret) {
1970                printk(KERN_CONT "FAILED!\n");
1971                /* Add the warning after printing 'FAILED' */
1972                WARN_ON(1);
1973                return -1;
1974        }
1975        /* Only reset on passing, to avoid touching corrupted buffers */
1976        tracing_reset_online_cpus(&tr->array_buffer);
1977
1978#ifdef CONFIG_TRACER_MAX_TRACE
1979        if (type->use_max_tr) {
1980                tr->allocated_snapshot = false;
1981
1982                /* Shrink the max buffer again */
1983                if (ring_buffer_expanded)
1984                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1985                                           RING_BUFFER_ALL_CPUS);
1986        }
1987#endif
1988
1989        printk(KERN_CONT "PASSED\n");
1990        return 0;
1991}
1992
1993static __init int init_trace_selftests(void)
1994{
1995        struct trace_selftests *p, *n;
1996        struct tracer *t, **last;
1997        int ret;
1998
1999        selftests_can_run = true;
2000
2001        mutex_lock(&trace_types_lock);
2002
2003        if (list_empty(&postponed_selftests))
2004                goto out;
2005
2006        pr_info("Running postponed tracer tests:\n");
2007
2008        tracing_selftest_running = true;
2009        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2010                /* This loop can take minutes when sanitizers are enabled, so
2011                 * lets make sure we allow RCU processing.
2012                 */
2013                cond_resched();
2014                ret = run_tracer_selftest(p->type);
2015                /* If the test fails, then warn and remove from available_tracers */
2016                if (ret < 0) {
2017                        WARN(1, "tracer: %s failed selftest, disabling\n",
2018                             p->type->name);
2019                        last = &trace_types;
2020                        for (t = trace_types; t; t = t->next) {
2021                                if (t == p->type) {
2022                                        *last = t->next;
2023                                        break;
2024                                }
2025                                last = &t->next;
2026                        }
2027                }
2028                list_del(&p->list);
2029                kfree(p);
2030        }
2031        tracing_selftest_running = false;
2032
2033 out:
2034        mutex_unlock(&trace_types_lock);
2035
2036        return 0;
2037}
2038core_initcall(init_trace_selftests);
2039#else
2040static inline int run_tracer_selftest(struct tracer *type)
2041{
2042        return 0;
2043}
2044#endif /* CONFIG_FTRACE_STARTUP_TEST */
2045
2046static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2047
2048static void __init apply_trace_boot_options(void);
2049
2050/**
2051 * register_tracer - register a tracer with the ftrace system.
2052 * @type: the plugin for the tracer
2053 *
2054 * Register a new plugin tracer.
2055 */
2056int __init register_tracer(struct tracer *type)
2057{
2058        struct tracer *t;
2059        int ret = 0;
2060
2061        if (!type->name) {
2062                pr_info("Tracer must have a name\n");
2063                return -1;
2064        }
2065
2066        if (strlen(type->name) >= MAX_TRACER_SIZE) {
2067                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2068                return -1;
2069        }
2070
2071        if (security_locked_down(LOCKDOWN_TRACEFS)) {
2072                pr_warn("Can not register tracer %s due to lockdown\n",
2073                           type->name);
2074                return -EPERM;
2075        }
2076
2077        mutex_lock(&trace_types_lock);
2078
2079        tracing_selftest_running = true;
2080
2081        for (t = trace_types; t; t = t->next) {
2082                if (strcmp(type->name, t->name) == 0) {
2083                        /* already found */
2084                        pr_info("Tracer %s already registered\n",
2085                                type->name);
2086                        ret = -1;
2087                        goto out;
2088                }
2089        }
2090
2091        if (!type->set_flag)
2092                type->set_flag = &dummy_set_flag;
2093        if (!type->flags) {
2094                /*allocate a dummy tracer_flags*/
2095                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2096                if (!type->flags) {
2097                        ret = -ENOMEM;
2098                        goto out;
2099                }
2100                type->flags->val = 0;
2101                type->flags->opts = dummy_tracer_opt;
2102        } else
2103                if (!type->flags->opts)
2104                        type->flags->opts = dummy_tracer_opt;
2105
2106        /* store the tracer for __set_tracer_option */
2107        type->flags->trace = type;
2108
2109        ret = run_tracer_selftest(type);
2110        if (ret < 0)
2111                goto out;
2112
2113        type->next = trace_types;
2114        trace_types = type;
2115        add_tracer_options(&global_trace, type);
2116
2117 out:
2118        tracing_selftest_running = false;
2119        mutex_unlock(&trace_types_lock);
2120
2121        if (ret || !default_bootup_tracer)
2122                goto out_unlock;
2123
2124        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2125                goto out_unlock;
2126
2127        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2128        /* Do we want this tracer to start on bootup? */
2129        tracing_set_tracer(&global_trace, type->name);
2130        default_bootup_tracer = NULL;
2131
2132        apply_trace_boot_options();
2133
2134        /* disable other selftests, since this will break it. */
2135        disable_tracing_selftest("running a tracer");
2136
2137 out_unlock:
2138        return ret;
2139}
2140
2141static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2142{
2143        struct trace_buffer *buffer = buf->buffer;
2144
2145        if (!buffer)
2146                return;
2147
2148        ring_buffer_record_disable(buffer);
2149
2150        /* Make sure all commits have finished */
2151        synchronize_rcu();
2152        ring_buffer_reset_cpu(buffer, cpu);
2153
2154        ring_buffer_record_enable(buffer);
2155}
2156
2157void tracing_reset_online_cpus(struct array_buffer *buf)
2158{
2159        struct trace_buffer *buffer = buf->buffer;
2160
2161        if (!buffer)
2162                return;
2163
2164        ring_buffer_record_disable(buffer);
2165
2166        /* Make sure all commits have finished */
2167        synchronize_rcu();
2168
2169        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2170
2171        ring_buffer_reset_online_cpus(buffer);
2172
2173        ring_buffer_record_enable(buffer);
2174}
2175
2176/* Must have trace_types_lock held */
2177void tracing_reset_all_online_cpus(void)
2178{
2179        struct trace_array *tr;
2180
2181        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2182                if (!tr->clear_trace)
2183                        continue;
2184                tr->clear_trace = false;
2185                tracing_reset_online_cpus(&tr->array_buffer);
2186#ifdef CONFIG_TRACER_MAX_TRACE
2187                tracing_reset_online_cpus(&tr->max_buffer);
2188#endif
2189        }
2190}
2191
2192/*
2193 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2194 * is the tgid last observed corresponding to pid=i.
2195 */
2196static int *tgid_map;
2197
2198/* The maximum valid index into tgid_map. */
2199static size_t tgid_map_max;
2200
2201#define SAVED_CMDLINES_DEFAULT 128
2202#define NO_CMDLINE_MAP UINT_MAX
2203static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2204struct saved_cmdlines_buffer {
2205        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2206        unsigned *map_cmdline_to_pid;
2207        unsigned cmdline_num;
2208        int cmdline_idx;
2209        char *saved_cmdlines;
2210};
2211static struct saved_cmdlines_buffer *savedcmd;
2212
2213static inline char *get_saved_cmdlines(int idx)
2214{
2215        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2216}
2217
2218static inline void set_cmdline(int idx, const char *cmdline)
2219{
2220        strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2221}
2222
2223static int allocate_cmdlines_buffer(unsigned int val,
2224                                    struct saved_cmdlines_buffer *s)
2225{
2226        s->map_cmdline_to_pid = kmalloc_array(val,
2227                                              sizeof(*s->map_cmdline_to_pid),
2228                                              GFP_KERNEL);
2229        if (!s->map_cmdline_to_pid)
2230                return -ENOMEM;
2231
2232        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2233        if (!s->saved_cmdlines) {
2234                kfree(s->map_cmdline_to_pid);
2235                return -ENOMEM;
2236        }
2237
2238        s->cmdline_idx = 0;
2239        s->cmdline_num = val;
2240        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2241               sizeof(s->map_pid_to_cmdline));
2242        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2243               val * sizeof(*s->map_cmdline_to_pid));
2244
2245        return 0;
2246}
2247
2248static int trace_create_savedcmd(void)
2249{
2250        int ret;
2251
2252        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2253        if (!savedcmd)
2254                return -ENOMEM;
2255
2256        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2257        if (ret < 0) {
2258                kfree(savedcmd);
2259                savedcmd = NULL;
2260                return -ENOMEM;
2261        }
2262
2263        return 0;
2264}
2265
2266int is_tracing_stopped(void)
2267{
2268        return global_trace.stop_count;
2269}
2270
2271/**
2272 * tracing_start - quick start of the tracer
2273 *
2274 * If tracing is enabled but was stopped by tracing_stop,
2275 * this will start the tracer back up.
2276 */
2277void tracing_start(void)
2278{
2279        struct trace_buffer *buffer;
2280        unsigned long flags;
2281
2282        if (tracing_disabled)
2283                return;
2284
2285        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2286        if (--global_trace.stop_count) {
2287                if (global_trace.stop_count < 0) {
2288                        /* Someone screwed up their debugging */
2289                        WARN_ON_ONCE(1);
2290                        global_trace.stop_count = 0;
2291                }
2292                goto out;
2293        }
2294
2295        /* Prevent the buffers from switching */
2296        arch_spin_lock(&global_trace.max_lock);
2297
2298        buffer = global_trace.array_buffer.buffer;
2299        if (buffer)
2300                ring_buffer_record_enable(buffer);
2301
2302#ifdef CONFIG_TRACER_MAX_TRACE
2303        buffer = global_trace.max_buffer.buffer;
2304        if (buffer)
2305                ring_buffer_record_enable(buffer);
2306#endif
2307
2308        arch_spin_unlock(&global_trace.max_lock);
2309
2310 out:
2311        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2312}
2313
2314static void tracing_start_tr(struct trace_array *tr)
2315{
2316        struct trace_buffer *buffer;
2317        unsigned long flags;
2318
2319        if (tracing_disabled)
2320                return;
2321
2322        /* If global, we need to also start the max tracer */
2323        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2324                return tracing_start();
2325
2326        raw_spin_lock_irqsave(&tr->start_lock, flags);
2327
2328        if (--tr->stop_count) {
2329                if (tr->stop_count < 0) {
2330                        /* Someone screwed up their debugging */
2331                        WARN_ON_ONCE(1);
2332                        tr->stop_count = 0;
2333                }
2334                goto out;
2335        }
2336
2337        buffer = tr->array_buffer.buffer;
2338        if (buffer)
2339                ring_buffer_record_enable(buffer);
2340
2341 out:
2342        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2343}
2344
2345/**
2346 * tracing_stop - quick stop of the tracer
2347 *
2348 * Light weight way to stop tracing. Use in conjunction with
2349 * tracing_start.
2350 */
2351void tracing_stop(void)
2352{
2353        struct trace_buffer *buffer;
2354        unsigned long flags;
2355
2356        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2357        if (global_trace.stop_count++)
2358                goto out;
2359
2360        /* Prevent the buffers from switching */
2361        arch_spin_lock(&global_trace.max_lock);
2362
2363        buffer = global_trace.array_buffer.buffer;
2364        if (buffer)
2365                ring_buffer_record_disable(buffer);
2366
2367#ifdef CONFIG_TRACER_MAX_TRACE
2368        buffer = global_trace.max_buffer.buffer;
2369        if (buffer)
2370                ring_buffer_record_disable(buffer);
2371#endif
2372
2373        arch_spin_unlock(&global_trace.max_lock);
2374
2375 out:
2376        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2377}
2378
2379static void tracing_stop_tr(struct trace_array *tr)
2380{
2381        struct trace_buffer *buffer;
2382        unsigned long flags;
2383
2384        /* If global, we need to also stop the max tracer */
2385        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2386                return tracing_stop();
2387
2388        raw_spin_lock_irqsave(&tr->start_lock, flags);
2389        if (tr->stop_count++)
2390                goto out;
2391
2392        buffer = tr->array_buffer.buffer;
2393        if (buffer)
2394                ring_buffer_record_disable(buffer);
2395
2396 out:
2397        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2398}
2399
2400static int trace_save_cmdline(struct task_struct *tsk)
2401{
2402        unsigned tpid, idx;
2403
2404        /* treat recording of idle task as a success */
2405        if (!tsk->pid)
2406                return 1;
2407
2408        tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2409
2410        /*
2411         * It's not the end of the world if we don't get
2412         * the lock, but we also don't want to spin
2413         * nor do we want to disable interrupts,
2414         * so if we miss here, then better luck next time.
2415         */
2416        if (!arch_spin_trylock(&trace_cmdline_lock))
2417                return 0;
2418
2419        idx = savedcmd->map_pid_to_cmdline[tpid];
2420        if (idx == NO_CMDLINE_MAP) {
2421                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2422
2423                savedcmd->map_pid_to_cmdline[tpid] = idx;
2424                savedcmd->cmdline_idx = idx;
2425        }
2426
2427        savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2428        set_cmdline(idx, tsk->comm);
2429
2430        arch_spin_unlock(&trace_cmdline_lock);
2431
2432        return 1;
2433}
2434
2435static void __trace_find_cmdline(int pid, char comm[])
2436{
2437        unsigned map;
2438        int tpid;
2439
2440        if (!pid) {
2441                strcpy(comm, "<idle>");
2442                return;
2443        }
2444
2445        if (WARN_ON_ONCE(pid < 0)) {
2446                strcpy(comm, "<XXX>");
2447                return;
2448        }
2449
2450        tpid = pid & (PID_MAX_DEFAULT - 1);
2451        map = savedcmd->map_pid_to_cmdline[tpid];
2452        if (map != NO_CMDLINE_MAP) {
2453                tpid = savedcmd->map_cmdline_to_pid[map];
2454                if (tpid == pid) {
2455                        strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2456                        return;
2457                }
2458        }
2459        strcpy(comm, "<...>");
2460}
2461
2462void trace_find_cmdline(int pid, char comm[])
2463{
2464        preempt_disable();
2465        arch_spin_lock(&trace_cmdline_lock);
2466
2467        __trace_find_cmdline(pid, comm);
2468
2469        arch_spin_unlock(&trace_cmdline_lock);
2470        preempt_enable();
2471}
2472
2473static int *trace_find_tgid_ptr(int pid)
2474{
2475        /*
2476         * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2477         * if we observe a non-NULL tgid_map then we also observe the correct
2478         * tgid_map_max.
2479         */
2480        int *map = smp_load_acquire(&tgid_map);
2481
2482        if (unlikely(!map || pid > tgid_map_max))
2483                return NULL;
2484
2485        return &map[pid];
2486}
2487
2488int trace_find_tgid(int pid)
2489{
2490        int *ptr = trace_find_tgid_ptr(pid);
2491
2492        return ptr ? *ptr : 0;
2493}
2494
2495static int trace_save_tgid(struct task_struct *tsk)
2496{
2497        int *ptr;
2498
2499        /* treat recording of idle task as a success */
2500        if (!tsk->pid)
2501                return 1;
2502
2503        ptr = trace_find_tgid_ptr(tsk->pid);
2504        if (!ptr)
2505                return 0;
2506
2507        *ptr = tsk->tgid;
2508        return 1;
2509}
2510
2511static bool tracing_record_taskinfo_skip(int flags)
2512{
2513        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2514                return true;
2515        if (!__this_cpu_read(trace_taskinfo_save))
2516                return true;
2517        return false;
2518}
2519
2520/**
2521 * tracing_record_taskinfo - record the task info of a task
2522 *
2523 * @task:  task to record
2524 * @flags: TRACE_RECORD_CMDLINE for recording comm
2525 *         TRACE_RECORD_TGID for recording tgid
2526 */
2527void tracing_record_taskinfo(struct task_struct *task, int flags)
2528{
2529        bool done;
2530
2531        if (tracing_record_taskinfo_skip(flags))
2532                return;
2533
2534        /*
2535         * Record as much task information as possible. If some fail, continue
2536         * to try to record the others.
2537         */
2538        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2539        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2540
2541        /* If recording any information failed, retry again soon. */
2542        if (!done)
2543                return;
2544
2545        __this_cpu_write(trace_taskinfo_save, false);
2546}
2547
2548/**
2549 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2550 *
2551 * @prev: previous task during sched_switch
2552 * @next: next task during sched_switch
2553 * @flags: TRACE_RECORD_CMDLINE for recording comm
2554 *         TRACE_RECORD_TGID for recording tgid
2555 */
2556void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2557                                          struct task_struct *next, int flags)
2558{
2559        bool done;
2560
2561        if (tracing_record_taskinfo_skip(flags))
2562                return;
2563
2564        /*
2565         * Record as much task information as possible. If some fail, continue
2566         * to try to record the others.
2567         */
2568        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2569        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2570        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2571        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2572
2573        /* If recording any information failed, retry again soon. */
2574        if (!done)
2575                return;
2576
2577        __this_cpu_write(trace_taskinfo_save, false);
2578}
2579
2580/* Helpers to record a specific task information */
2581void tracing_record_cmdline(struct task_struct *task)
2582{
2583        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2584}
2585
2586void tracing_record_tgid(struct task_struct *task)
2587{
2588        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2589}
2590
2591/*
2592 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2593 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2594 * simplifies those functions and keeps them in sync.
2595 */
2596enum print_line_t trace_handle_return(struct trace_seq *s)
2597{
2598        return trace_seq_has_overflowed(s) ?
2599                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2600}
2601EXPORT_SYMBOL_GPL(trace_handle_return);
2602
2603static unsigned short migration_disable_value(void)
2604{
2605#if defined(CONFIG_SMP)
2606        return current->migration_disabled;
2607#else
2608        return 0;
2609#endif
2610}
2611
2612unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2613{
2614        unsigned int trace_flags = irqs_status;
2615        unsigned int pc;
2616
2617        pc = preempt_count();
2618
2619        if (pc & NMI_MASK)
2620                trace_flags |= TRACE_FLAG_NMI;
2621        if (pc & HARDIRQ_MASK)
2622                trace_flags |= TRACE_FLAG_HARDIRQ;
2623        if (in_serving_softirq())
2624                trace_flags |= TRACE_FLAG_SOFTIRQ;
2625        if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2626                trace_flags |= TRACE_FLAG_BH_OFF;
2627
2628        if (tif_need_resched())
2629                trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630        if (test_preempt_need_resched())
2631                trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632        return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633                (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634}
2635
2636struct ring_buffer_event *
2637trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638                          int type,
2639                          unsigned long len,
2640                          unsigned int trace_ctx)
2641{
2642        return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643}
2644
2645DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647static int trace_buffered_event_ref;
2648
2649/**
2650 * trace_buffered_event_enable - enable buffering events
2651 *
2652 * When events are being filtered, it is quicker to use a temporary
2653 * buffer to write the event data into if there's a likely chance
2654 * that it will not be committed. The discard of the ring buffer
2655 * is not as fast as committing, and is much slower than copying
2656 * a commit.
2657 *
2658 * When an event is to be filtered, allocate per cpu buffers to
2659 * write the event data into, and if the event is filtered and discarded
2660 * it is simply dropped, otherwise, the entire data is to be committed
2661 * in one shot.
2662 */
2663void trace_buffered_event_enable(void)
2664{
2665        struct ring_buffer_event *event;
2666        struct page *page;
2667        int cpu;
2668
2669        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671        if (trace_buffered_event_ref++)
2672                return;
2673
2674        for_each_tracing_cpu(cpu) {
2675                page = alloc_pages_node(cpu_to_node(cpu),
2676                                        GFP_KERNEL | __GFP_NORETRY, 0);
2677                if (!page)
2678                        goto failed;
2679
2680                event = page_address(page);
2681                memset(event, 0, sizeof(*event));
2682
2683                per_cpu(trace_buffered_event, cpu) = event;
2684
2685                preempt_disable();
2686                if (cpu == smp_processor_id() &&
2687                    __this_cpu_read(trace_buffered_event) !=
2688                    per_cpu(trace_buffered_event, cpu))
2689                        WARN_ON_ONCE(1);
2690                preempt_enable();
2691        }
2692
2693        return;
2694 failed:
2695        trace_buffered_event_disable();
2696}
2697
2698static void enable_trace_buffered_event(void *data)
2699{
2700        /* Probably not needed, but do it anyway */
2701        smp_rmb();
2702        this_cpu_dec(trace_buffered_event_cnt);
2703}
2704
2705static void disable_trace_buffered_event(void *data)
2706{
2707        this_cpu_inc(trace_buffered_event_cnt);
2708}
2709
2710/**
2711 * trace_buffered_event_disable - disable buffering events
2712 *
2713 * When a filter is removed, it is faster to not use the buffered
2714 * events, and to commit directly into the ring buffer. Free up
2715 * the temp buffers when there are no more users. This requires
2716 * special synchronization with current events.
2717 */
2718void trace_buffered_event_disable(void)
2719{
2720        int cpu;
2721
2722        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725                return;
2726
2727        if (--trace_buffered_event_ref)
2728                return;
2729
2730        preempt_disable();
2731        /* For each CPU, set the buffer as used. */
2732        smp_call_function_many(tracing_buffer_mask,
2733                               disable_trace_buffered_event, NULL, 1);
2734        preempt_enable();
2735
2736        /* Wait for all current users to finish */
2737        synchronize_rcu();
2738
2739        for_each_tracing_cpu(cpu) {
2740                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741                per_cpu(trace_buffered_event, cpu) = NULL;
2742        }
2743        /*
2744         * Make sure trace_buffered_event is NULL before clearing
2745         * trace_buffered_event_cnt.
2746         */
2747        smp_wmb();
2748
2749        preempt_disable();
2750        /* Do the work on each cpu */
2751        smp_call_function_many(tracing_buffer_mask,
2752                               enable_trace_buffered_event, NULL, 1);
2753        preempt_enable();
2754}
2755
2756static struct trace_buffer *temp_buffer;
2757
2758struct ring_buffer_event *
2759trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760                          struct trace_event_file *trace_file,
2761                          int type, unsigned long len,
2762                          unsigned int trace_ctx)
2763{
2764        struct ring_buffer_event *entry;
2765        struct trace_array *tr = trace_file->tr;
2766        int val;
2767
2768        *current_rb = tr->array_buffer.buffer;
2769
2770        if (!tr->no_filter_buffering_ref &&
2771            (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2772                preempt_disable_notrace();
2773                /*
2774                 * Filtering is on, so try to use the per cpu buffer first.
2775                 * This buffer will simulate a ring_buffer_event,
2776                 * where the type_len is zero and the array[0] will
2777                 * hold the full length.
2778                 * (see include/linux/ring-buffer.h for details on
2779                 *  how the ring_buffer_event is structured).
2780                 *
2781                 * Using a temp buffer during filtering and copying it
2782                 * on a matched filter is quicker than writing directly
2783                 * into the ring buffer and then discarding it when
2784                 * it doesn't match. That is because the discard
2785                 * requires several atomic operations to get right.
2786                 * Copying on match and doing nothing on a failed match
2787                 * is still quicker than no copy on match, but having
2788                 * to discard out of the ring buffer on a failed match.
2789                 */
2790                if ((entry = __this_cpu_read(trace_buffered_event))) {
2791                        int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2792
2793                        val = this_cpu_inc_return(trace_buffered_event_cnt);
2794
2795                        /*
2796                         * Preemption is disabled, but interrupts and NMIs
2797                         * can still come in now. If that happens after
2798                         * the above increment, then it will have to go
2799                         * back to the old method of allocating the event
2800                         * on the ring buffer, and if the filter fails, it
2801                         * will have to call ring_buffer_discard_commit()
2802                         * to remove it.
2803                         *
2804                         * Need to also check the unlikely case that the
2805                         * length is bigger than the temp buffer size.
2806                         * If that happens, then the reserve is pretty much
2807                         * guaranteed to fail, as the ring buffer currently
2808                         * only allows events less than a page. But that may
2809                         * change in the future, so let the ring buffer reserve
2810                         * handle the failure in that case.
2811                         */
2812                        if (val == 1 && likely(len <= max_len)) {
2813                                trace_event_setup(entry, type, trace_ctx);
2814                                entry->array[0] = len;
2815                                /* Return with preemption disabled */
2816                                return entry;
2817                        }
2818                        this_cpu_dec(trace_buffered_event_cnt);
2819                }
2820                /* __trace_buffer_lock_reserve() disables preemption */
2821                preempt_enable_notrace();
2822        }
2823
2824        entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825                                            trace_ctx);
2826        /*
2827         * If tracing is off, but we have triggers enabled
2828         * we still need to look at the event data. Use the temp_buffer
2829         * to store the trace event for the trigger to use. It's recursive
2830         * safe and will not be recorded anywhere.
2831         */
2832        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2833                *current_rb = temp_buffer;
2834                entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2835                                                    trace_ctx);
2836        }
2837        return entry;
2838}
2839EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2840
2841static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2842static DEFINE_MUTEX(tracepoint_printk_mutex);
2843
2844static void output_printk(struct trace_event_buffer *fbuffer)
2845{
2846        struct trace_event_call *event_call;
2847        struct trace_event_file *file;
2848        struct trace_event *event;
2849        unsigned long flags;
2850        struct trace_iterator *iter = tracepoint_print_iter;
2851
2852        /* We should never get here if iter is NULL */
2853        if (WARN_ON_ONCE(!iter))
2854                return;
2855
2856        event_call = fbuffer->trace_file->event_call;
2857        if (!event_call || !event_call->event.funcs ||
2858            !event_call->event.funcs->trace)
2859                return;
2860
2861        file = fbuffer->trace_file;
2862        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2863            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2864             !filter_match_preds(file->filter, fbuffer->entry)))
2865                return;
2866
2867        event = &fbuffer->trace_file->event_call->event;
2868
2869        raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2870        trace_seq_init(&iter->seq);
2871        iter->ent = fbuffer->entry;
2872        event_call->event.funcs->trace(iter, 0, event);
2873        trace_seq_putc(&iter->seq, 0);
2874        printk("%s", iter->seq.buffer);
2875
2876        raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2877}
2878
2879int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2880                             void *buffer, size_t *lenp,
2881                             loff_t *ppos)
2882{
2883        int save_tracepoint_printk;
2884        int ret;
2885
2886        mutex_lock(&tracepoint_printk_mutex);
2887        save_tracepoint_printk = tracepoint_printk;
2888
2889        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2890
2891        /*
2892         * This will force exiting early, as tracepoint_printk
2893         * is always zero when tracepoint_printk_iter is not allocated
2894         */
2895        if (!tracepoint_print_iter)
2896                tracepoint_printk = 0;
2897
2898        if (save_tracepoint_printk == tracepoint_printk)
2899                goto out;
2900
2901        if (tracepoint_printk)
2902                static_key_enable(&tracepoint_printk_key.key);
2903        else
2904                static_key_disable(&tracepoint_printk_key.key);
2905
2906 out:
2907        mutex_unlock(&tracepoint_printk_mutex);
2908
2909        return ret;
2910}
2911
2912void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2913{
2914        enum event_trigger_type tt = ETT_NONE;
2915        struct trace_event_file *file = fbuffer->trace_file;
2916
2917        if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2918                        fbuffer->entry, &tt))
2919                goto discard;
2920
2921        if (static_key_false(&tracepoint_printk_key.key))
2922                output_printk(fbuffer);
2923
2924        if (static_branch_unlikely(&trace_event_exports_enabled))
2925                ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2926
2927        trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2928                        fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2929
2930discard:
2931        if (tt)
2932                event_triggers_post_call(file, tt);
2933
2934}
2935EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2936
2937/*
2938 * Skip 3:
2939 *
2940 *   trace_buffer_unlock_commit_regs()
2941 *   trace_event_buffer_commit()
2942 *   trace_event_raw_event_xxx()
2943 */
2944# define STACK_SKIP 3
2945
2946void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2947                                     struct trace_buffer *buffer,
2948                                     struct ring_buffer_event *event,
2949                                     unsigned int trace_ctx,
2950                                     struct pt_regs *regs)
2951{
2952        __buffer_unlock_commit(buffer, event);
2953
2954        /*
2955         * If regs is not set, then skip the necessary functions.
2956         * Note, we can still get here via blktrace, wakeup tracer
2957         * and mmiotrace, but that's ok if they lose a function or
2958         * two. They are not that meaningful.
2959         */
2960        ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2961        ftrace_trace_userstack(tr, buffer, trace_ctx);
2962}
2963
2964/*
2965 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2966 */
2967void
2968trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2969                                   struct ring_buffer_event *event)
2970{
2971        __buffer_unlock_commit(buffer, event);
2972}
2973
2974void
2975trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2976               parent_ip, unsigned int trace_ctx)
2977{
2978        struct trace_event_call *call = &event_function;
2979        struct trace_buffer *buffer = tr->array_buffer.buffer;
2980        struct ring_buffer_event *event;
2981        struct ftrace_entry *entry;
2982
2983        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2984                                            trace_ctx);
2985        if (!event)
2986                return;
2987        entry   = ring_buffer_event_data(event);
2988        entry->ip                       = ip;
2989        entry->parent_ip                = parent_ip;
2990
2991        if (!call_filter_check_discard(call, entry, buffer, event)) {
2992                if (static_branch_unlikely(&trace_function_exports_enabled))
2993                        ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2994                __buffer_unlock_commit(buffer, event);
2995        }
2996}
2997
2998#ifdef CONFIG_STACKTRACE
2999
3000/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3001#define FTRACE_KSTACK_NESTING   4
3002
3003#define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3004
3005struct ftrace_stack {
3006        unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3007};
3008
3009
3010struct ftrace_stacks {
3011        struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3012};
3013
3014static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3015static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3016
3017static void __ftrace_trace_stack(struct trace_buffer *buffer,
3018                                 unsigned int trace_ctx,
3019                                 int skip, struct pt_regs *regs)
3020{
3021        struct trace_event_call *call = &event_kernel_stack;
3022        struct ring_buffer_event *event;
3023        unsigned int size, nr_entries;
3024        struct ftrace_stack *fstack;
3025        struct stack_entry *entry;
3026        int stackidx;
3027
3028        /*
3029         * Add one, for this function and the call to save_stack_trace()
3030         * If regs is set, then these functions will not be in the way.
3031         */
3032#ifndef CONFIG_UNWINDER_ORC
3033        if (!regs)
3034                skip++;
3035#endif
3036
3037        preempt_disable_notrace();
3038
3039        stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3040
3041        /* This should never happen. If it does, yell once and skip */
3042        if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3043                goto out;
3044
3045        /*
3046         * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3047         * interrupt will either see the value pre increment or post
3048         * increment. If the interrupt happens pre increment it will have
3049         * restored the counter when it returns.  We just need a barrier to
3050         * keep gcc from moving things around.
3051         */
3052        barrier();
3053
3054        fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3055        size = ARRAY_SIZE(fstack->calls);
3056
3057        if (regs) {
3058                nr_entries = stack_trace_save_regs(regs, fstack->calls,
3059                                                   size, skip);
3060        } else {
3061                nr_entries = stack_trace_save(fstack->calls, size, skip);
3062        }
3063
3064        size = nr_entries * sizeof(unsigned long);
3065        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3066                                    (sizeof(*entry) - sizeof(entry->caller)) + size,
3067                                    trace_ctx);
3068        if (!event)
3069                goto out;
3070        entry = ring_buffer_event_data(event);
3071
3072        memcpy(&entry->caller, fstack->calls, size);
3073        entry->size = nr_entries;
3074
3075        if (!call_filter_check_discard(call, entry, buffer, event))
3076                __buffer_unlock_commit(buffer, event);
3077
3078 out:
3079        /* Again, don't let gcc optimize things here */
3080        barrier();
3081        __this_cpu_dec(ftrace_stack_reserve);
3082        preempt_enable_notrace();
3083
3084}
3085
3086static inline void ftrace_trace_stack(struct trace_array *tr,
3087                                      struct trace_buffer *buffer,
3088                                      unsigned int trace_ctx,
3089                                      int skip, struct pt_regs *regs)
3090{
3091        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3092                return;
3093
3094        __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3095}
3096
3097void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3098                   int skip)
3099{
3100        struct trace_buffer *buffer = tr->array_buffer.buffer;
3101
3102        if (rcu_is_watching()) {
3103                __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104                return;
3105        }
3106
3107        /*
3108         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3109         * but if the above rcu_is_watching() failed, then the NMI
3110         * triggered someplace critical, and rcu_irq_enter() should
3111         * not be called from NMI.
3112         */
3113        if (unlikely(in_nmi()))
3114                return;
3115
3116        rcu_irq_enter_irqson();
3117        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3118        rcu_irq_exit_irqson();
3119}
3120
3121/**
3122 * trace_dump_stack - record a stack back trace in the trace buffer
3123 * @skip: Number of functions to skip (helper handlers)
3124 */
3125void trace_dump_stack(int skip)
3126{
3127        if (tracing_disabled || tracing_selftest_running)
3128                return;
3129
3130#ifndef CONFIG_UNWINDER_ORC
3131        /* Skip 1 to skip this function. */
3132        skip++;
3133#endif
3134        __ftrace_trace_stack(global_trace.array_buffer.buffer,
3135                             tracing_gen_ctx(), skip, NULL);
3136}
3137EXPORT_SYMBOL_GPL(trace_dump_stack);
3138
3139#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3140static DEFINE_PER_CPU(int, user_stack_count);
3141
3142static void
3143ftrace_trace_userstack(struct trace_array *tr,
3144                       struct trace_buffer *buffer, unsigned int trace_ctx)
3145{
3146        struct trace_event_call *call = &event_user_stack;
3147        struct ring_buffer_event *event;
3148        struct userstack_entry *entry;
3149
3150        if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3151                return;
3152
3153        /*
3154         * NMIs can not handle page faults, even with fix ups.
3155         * The save user stack can (and often does) fault.
3156         */
3157        if (unlikely(in_nmi()))
3158                return;
3159
3160        /*
3161         * prevent recursion, since the user stack tracing may
3162         * trigger other kernel events.
3163         */
3164        preempt_disable();
3165        if (__this_cpu_read(user_stack_count))
3166                goto out;
3167
3168        __this_cpu_inc(user_stack_count);
3169
3170        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3171                                            sizeof(*entry), trace_ctx);
3172        if (!event)
3173                goto out_drop_count;
3174        entry   = ring_buffer_event_data(event);
3175
3176        entry->tgid             = current->tgid;
3177        memset(&entry->caller, 0, sizeof(entry->caller));
3178
3179        stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3180        if (!call_filter_check_discard(call, entry, buffer, event))
3181                __buffer_unlock_commit(buffer, event);
3182
3183 out_drop_count:
3184        __this_cpu_dec(user_stack_count);
3185 out:
3186        preempt_enable();
3187}
3188#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3189static void ftrace_trace_userstack(struct trace_array *tr,
3190                                   struct trace_buffer *buffer,
3191                                   unsigned int trace_ctx)
3192{
3193}
3194#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3195
3196#endif /* CONFIG_STACKTRACE */
3197
3198static inline void
3199func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3200                          unsigned long long delta)
3201{
3202        entry->bottom_delta_ts = delta & U32_MAX;
3203        entry->top_delta_ts = (delta >> 32);
3204}
3205
3206void trace_last_func_repeats(struct trace_array *tr,
3207                             struct trace_func_repeats *last_info,
3208                             unsigned int trace_ctx)
3209{
3210        struct trace_buffer *buffer = tr->array_buffer.buffer;
3211        struct func_repeats_entry *entry;
3212        struct ring_buffer_event *event;
3213        u64 delta;
3214
3215        event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3216                                            sizeof(*entry), trace_ctx);
3217        if (!event)
3218                return;
3219
3220        delta = ring_buffer_event_time_stamp(buffer, event) -
3221                last_info->ts_last_call;
3222
3223        entry = ring_buffer_event_data(event);
3224        entry->ip = last_info->ip;
3225        entry->parent_ip = last_info->parent_ip;
3226        entry->count = last_info->count;
3227        func_repeats_set_delta_ts(entry, delta);
3228
3229        __buffer_unlock_commit(buffer, event);
3230}
3231
3232/* created for use with alloc_percpu */
3233struct trace_buffer_struct {
3234        int nesting;
3235        char buffer[4][TRACE_BUF_SIZE];
3236};
3237
3238static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3239
3240/*
3241 * This allows for lockless recording.  If we're nested too deeply, then
3242 * this returns NULL.
3243 */
3244static char *get_trace_buf(void)
3245{
3246        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3247
3248        if (!trace_percpu_buffer || buffer->nesting >= 4)
3249                return NULL;
3250
3251        buffer->nesting++;
3252
3253        /* Interrupts must see nesting incremented before we use the buffer */
3254        barrier();
3255        return &buffer->buffer[buffer->nesting - 1][0];
3256}
3257
3258static void put_trace_buf(void)
3259{
3260        /* Don't let the decrement of nesting leak before this */
3261        barrier();
3262        this_cpu_dec(trace_percpu_buffer->nesting);
3263}
3264
3265static int alloc_percpu_trace_buffer(void)
3266{
3267        struct trace_buffer_struct __percpu *buffers;
3268
3269        if (trace_percpu_buffer)
3270                return 0;
3271
3272        buffers = alloc_percpu(struct trace_buffer_struct);
3273        if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3274                return -ENOMEM;
3275
3276        trace_percpu_buffer = buffers;
3277        return 0;
3278}
3279
3280static int buffers_allocated;
3281
3282void trace_printk_init_buffers(void)
3283{
3284        if (buffers_allocated)
3285                return;
3286
3287        if (alloc_percpu_trace_buffer())
3288                return;
3289
3290        /* trace_printk() is for debug use only. Don't use it in production. */
3291
3292        pr_warn("\n");
3293        pr_warn("**********************************************************\n");
3294        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3295        pr_warn("**                                                      **\n");
3296        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3297        pr_warn("**                                                      **\n");
3298        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3299        pr_warn("** unsafe for production use.                           **\n");
3300        pr_warn("**                                                      **\n");
3301        pr_warn("** If you see this message and you are not debugging    **\n");
3302        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3303        pr_warn("**                                                      **\n");
3304        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3305        pr_warn("**********************************************************\n");
3306
3307        /* Expand the buffers to set size */
3308        tracing_update_buffers();
3309
3310        buffers_allocated = 1;
3311
3312        /*
3313         * trace_printk_init_buffers() can be called by modules.
3314         * If that happens, then we need to start cmdline recording
3315         * directly here. If the global_trace.buffer is already
3316         * allocated here, then this was called by module code.
3317         */
3318        if (global_trace.array_buffer.buffer)
3319                tracing_start_cmdline_record();
3320}
3321EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3322
3323void trace_printk_start_comm(void)
3324{
3325        /* Start tracing comms if trace printk is set */
3326        if (!buffers_allocated)
3327                return;
3328        tracing_start_cmdline_record();
3329}
3330
3331static void trace_printk_start_stop_comm(int enabled)
3332{
3333        if (!buffers_allocated)
3334                return;
3335
3336        if (enabled)
3337                tracing_start_cmdline_record();
3338        else
3339                tracing_stop_cmdline_record();
3340}
3341
3342/**
3343 * trace_vbprintk - write binary msg to tracing buffer
3344 * @ip:    The address of the caller
3345 * @fmt:   The string format to write to the buffer
3346 * @args:  Arguments for @fmt
3347 */
3348int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3349{
3350        struct trace_event_call *call = &event_bprint;
3351        struct ring_buffer_event *event;
3352        struct trace_buffer *buffer;
3353        struct trace_array *tr = &global_trace;
3354        struct bprint_entry *entry;
3355        unsigned int trace_ctx;
3356        char *tbuffer;
3357        int len = 0, size;
3358
3359        if (unlikely(tracing_selftest_running || tracing_disabled))
3360                return 0;
3361
3362        /* Don't pollute graph traces with trace_vprintk internals */
3363        pause_graph_tracing();
3364
3365        trace_ctx = tracing_gen_ctx();
3366        preempt_disable_notrace();
3367
3368        tbuffer = get_trace_buf();
3369        if (!tbuffer) {
3370                len = 0;
3371                goto out_nobuffer;
3372        }
3373
3374        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375
3376        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377                goto out_put;
3378
3379        size = sizeof(*entry) + sizeof(u32) * len;
3380        buffer = tr->array_buffer.buffer;
3381        ring_buffer_nest_start(buffer);
3382        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383                                            trace_ctx);
3384        if (!event)
3385                goto out;
3386        entry = ring_buffer_event_data(event);
3387        entry->ip                       = ip;
3388        entry->fmt                      = fmt;
3389
3390        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391        if (!call_filter_check_discard(call, entry, buffer, event)) {
3392                __buffer_unlock_commit(buffer, event);
3393                ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3394        }
3395
3396out:
3397        ring_buffer_nest_end(buffer);
3398out_put:
3399        put_trace_buf();
3400
3401out_nobuffer:
3402        preempt_enable_notrace();
3403        unpause_graph_tracing();
3404
3405        return len;
3406}
3407EXPORT_SYMBOL_GPL(trace_vbprintk);
3408
3409__printf(3, 0)
3410static int
3411__trace_array_vprintk(struct trace_buffer *buffer,
3412                      unsigned long ip, const char *fmt, va_list args)
3413{
3414        struct trace_event_call *call = &event_print;
3415        struct ring_buffer_event *event;
3416        int len = 0, size;
3417        struct print_entry *entry;
3418        unsigned int trace_ctx;
3419        char *tbuffer;
3420
3421        if (tracing_disabled || tracing_selftest_running)
3422                return 0;
3423
3424        /* Don't pollute graph traces with trace_vprintk internals */
3425        pause_graph_tracing();
3426
3427        trace_ctx = tracing_gen_ctx();
3428        preempt_disable_notrace();
3429
3430
3431        tbuffer = get_trace_buf();
3432        if (!tbuffer) {
3433                len = 0;
3434                goto out_nobuffer;
3435        }
3436
3437        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3438
3439        size = sizeof(*entry) + len + 1;
3440        ring_buffer_nest_start(buffer);
3441        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3442                                            trace_ctx);
3443        if (!event)
3444                goto out;
3445        entry = ring_buffer_event_data(event);
3446        entry->ip = ip;
3447
3448        memcpy(&entry->buf, tbuffer, len + 1);
3449        if (!call_filter_check_discard(call, entry, buffer, event)) {
3450                __buffer_unlock_commit(buffer, event);
3451                ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3452        }
3453
3454out:
3455        ring_buffer_nest_end(buffer);
3456        put_trace_buf();
3457
3458out_nobuffer:
3459        preempt_enable_notrace();
3460        unpause_graph_tracing();
3461
3462        return len;
3463}
3464
3465__printf(3, 0)
3466int trace_array_vprintk(struct trace_array *tr,
3467                        unsigned long ip, const char *fmt, va_list args)
3468{
3469        return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3470}
3471
3472/**
3473 * trace_array_printk - Print a message to a specific instance
3474 * @tr: The instance trace_array descriptor
3475 * @ip: The instruction pointer that this is called from.
3476 * @fmt: The format to print (printf format)
3477 *
3478 * If a subsystem sets up its own instance, they have the right to
3479 * printk strings into their tracing instance buffer using this
3480 * function. Note, this function will not write into the top level
3481 * buffer (use trace_printk() for that), as writing into the top level
3482 * buffer should only have events that can be individually disabled.
3483 * trace_printk() is only used for debugging a kernel, and should not
3484 * be ever incorporated in normal use.
3485 *
3486 * trace_array_printk() can be used, as it will not add noise to the
3487 * top level tracing buffer.
3488 *
3489 * Note, trace_array_init_printk() must be called on @tr before this
3490 * can be used.
3491 */
3492__printf(3, 0)
3493int trace_array_printk(struct trace_array *tr,
3494                       unsigned long ip, const char *fmt, ...)
3495{
3496        int ret;
3497        va_list ap;
3498
3499        if (!tr)
3500                return -ENOENT;
3501
3502        /* This is only allowed for created instances */
3503        if (tr == &global_trace)
3504                return 0;
3505
3506        if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3507                return 0;
3508
3509        va_start(ap, fmt);
3510        ret = trace_array_vprintk(tr, ip, fmt, ap);
3511        va_end(ap);
3512        return ret;
3513}
3514EXPORT_SYMBOL_GPL(trace_array_printk);
3515
3516/**
3517 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3518 * @tr: The trace array to initialize the buffers for
3519 *
3520 * As trace_array_printk() only writes into instances, they are OK to
3521 * have in the kernel (unlike trace_printk()). This needs to be called
3522 * before trace_array_printk() can be used on a trace_array.
3523 */
3524int trace_array_init_printk(struct trace_array *tr)
3525{
3526        if (!tr)
3527                return -ENOENT;
3528
3529        /* This is only allowed for created instances */
3530        if (tr == &global_trace)
3531                return -EINVAL;
3532
3533        return alloc_percpu_trace_buffer();
3534}
3535EXPORT_SYMBOL_GPL(trace_array_init_printk);
3536
3537__printf(3, 4)
3538int trace_array_printk_buf(struct trace_buffer *buffer,
3539                           unsigned long ip, const char *fmt, ...)
3540{
3541        int ret;
3542        va_list ap;
3543
3544        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3545                return 0;
3546
3547        va_start(ap, fmt);
3548        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3549        va_end(ap);
3550        return ret;
3551}
3552
3553__printf(2, 0)
3554int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3555{
3556        return trace_array_vprintk(&global_trace, ip, fmt, args);
3557}
3558EXPORT_SYMBOL_GPL(trace_vprintk);
3559
3560static void trace_iterator_increment(struct trace_iterator *iter)
3561{
3562        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3563
3564        iter->idx++;
3565        if (buf_iter)
3566                ring_buffer_iter_advance(buf_iter);
3567}
3568
3569static struct trace_entry *
3570peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3571                unsigned long *lost_events)
3572{
3573        struct ring_buffer_event *event;
3574        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3575
3576        if (buf_iter) {
3577                event = ring_buffer_iter_peek(buf_iter, ts);
3578                if (lost_events)
3579                        *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3580                                (unsigned long)-1 : 0;
3581        } else {
3582                event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3583                                         lost_events);
3584        }
3585
3586        if (event) {
3587                iter->ent_size = ring_buffer_event_length(event);
3588                return ring_buffer_event_data(event);
3589        }
3590        iter->ent_size = 0;
3591        return NULL;
3592}
3593
3594static struct trace_entry *
3595__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3596                  unsigned long *missing_events, u64 *ent_ts)
3597{
3598        struct trace_buffer *buffer = iter->array_buffer->buffer;
3599        struct trace_entry *ent, *next = NULL;
3600        unsigned long lost_events = 0, next_lost = 0;
3601        int cpu_file = iter->cpu_file;
3602        u64 next_ts = 0, ts;
3603        int next_cpu = -1;
3604        int next_size = 0;
3605        int cpu;
3606
3607        /*
3608         * If we are in a per_cpu trace file, don't bother by iterating over
3609         * all cpu and peek directly.
3610         */
3611        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3612                if (ring_buffer_empty_cpu(buffer, cpu_file))
3613                        return NULL;
3614                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3615                if (ent_cpu)
3616                        *ent_cpu = cpu_file;
3617
3618                return ent;
3619        }
3620
3621        for_each_tracing_cpu(cpu) {
3622
3623                if (ring_buffer_empty_cpu(buffer, cpu))
3624                        continue;
3625
3626                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3627
3628                /*
3629                 * Pick the entry with the smallest timestamp:
3630                 */
3631                if (ent && (!next || ts < next_ts)) {
3632                        next = ent;
3633                        next_cpu = cpu;
3634                        next_ts = ts;
3635                        next_lost = lost_events;
3636                        next_size = iter->ent_size;
3637                }
3638        }
3639
3640        iter->ent_size = next_size;
3641
3642        if (ent_cpu)
3643                *ent_cpu = next_cpu;
3644
3645        if (ent_ts)
3646                *ent_ts = next_ts;
3647
3648        if (missing_events)
3649                *missing_events = next_lost;
3650
3651        return next;
3652}
3653
3654#define STATIC_FMT_BUF_SIZE     128
3655static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3656
3657static char *trace_iter_expand_format(struct trace_iterator *iter)
3658{
3659        char *tmp;
3660
3661        /*
3662         * iter->tr is NULL when used with tp_printk, which makes
3663         * this get called where it is not safe to call krealloc().
3664         */
3665        if (!iter->tr || iter->fmt == static_fmt_buf)
3666                return NULL;
3667
3668        tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3669                       GFP_KERNEL);
3670        if (tmp) {
3671                iter->fmt_size += STATIC_FMT_BUF_SIZE;
3672                iter->fmt = tmp;
3673        }
3674
3675        return tmp;
3676}
3677
3678/* Returns true if the string is safe to dereference from an event */
3679static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3680                           bool star, int len)
3681{
3682        unsigned long addr = (unsigned long)str;
3683        struct trace_event *trace_event;
3684        struct trace_event_call *event;
3685
3686        /* Ignore strings with no length */
3687        if (star && !len)
3688                return true;
3689
3690        /* OK if part of the event data */
3691        if ((addr >= (unsigned long)iter->ent) &&
3692            (addr < (unsigned long)iter->ent + iter->ent_size))
3693                return true;
3694
3695        /* OK if part of the temp seq buffer */
3696        if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3697            (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3698                return true;
3699
3700        /* Core rodata can not be freed */
3701        if (is_kernel_rodata(addr))
3702                return true;
3703
3704        if (trace_is_tracepoint_string(str))
3705                return true;
3706
3707        /*
3708         * Now this could be a module event, referencing core module
3709         * data, which is OK.
3710         */
3711        if (!iter->ent)
3712                return false;
3713
3714        trace_event = ftrace_find_event(iter->ent->type);
3715        if (!trace_event)
3716                return false;
3717
3718        event = container_of(trace_event, struct trace_event_call, event);
3719        if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3720                return false;
3721
3722        /* Would rather have rodata, but this will suffice */
3723        if (within_module_core(addr, event->module))
3724                return true;
3725
3726        return false;
3727}
3728
3729static const char *show_buffer(struct trace_seq *s)
3730{
3731        struct seq_buf *seq = &s->seq;
3732
3733        seq_buf_terminate(seq);
3734
3735        return seq->buffer;
3736}
3737
3738static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3739
3740static int test_can_verify_check(const char *fmt, ...)
3741{
3742        char buf[16];
3743        va_list ap;
3744        int ret;
3745
3746        /*
3747         * The verifier is dependent on vsnprintf() modifies the va_list
3748         * passed to it, where it is sent as a reference. Some architectures
3749         * (like x86_32) passes it by value, which means that vsnprintf()
3750         * does not modify the va_list passed to it, and the verifier
3751         * would then need to be able to understand all the values that
3752         * vsnprintf can use. If it is passed by value, then the verifier
3753         * is disabled.
3754         */
3755        va_start(ap, fmt);
3756        vsnprintf(buf, 16, "%d", ap);
3757        ret = va_arg(ap, int);
3758        va_end(ap);
3759
3760        return ret;
3761}
3762
3763static void test_can_verify(void)
3764{
3765        if (!test_can_verify_check("%d %d", 0, 1)) {
3766                pr_info("trace event string verifier disabled\n");
3767                static_branch_inc(&trace_no_verify);
3768        }
3769}
3770
3771/**
3772 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3773 * @iter: The iterator that holds the seq buffer and the event being printed
3774 * @fmt: The format used to print the event
3775 * @ap: The va_list holding the data to print from @fmt.
3776 *
3777 * This writes the data into the @iter->seq buffer using the data from
3778 * @fmt and @ap. If the format has a %s, then the source of the string
3779 * is examined to make sure it is safe to print, otherwise it will
3780 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3781 * pointer.
3782 */
3783void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3784                         va_list ap)
3785{
3786        const char *p = fmt;
3787        const char *str;
3788        int i, j;
3789
3790        if (WARN_ON_ONCE(!fmt))
3791                return;
3792
3793        if (static_branch_unlikely(&trace_no_verify))
3794                goto print;
3795
3796        /* Don't bother checking when doing a ftrace_dump() */
3797        if (iter->fmt == static_fmt_buf)
3798                goto print;
3799
3800        while (*p) {
3801                bool star = false;
3802                int len = 0;
3803
3804                j = 0;
3805
3806                /* We only care about %s and variants */
3807                for (i = 0; p[i]; i++) {
3808                        if (i + 1 >= iter->fmt_size) {
3809                                /*
3810                                 * If we can't expand the copy buffer,
3811                                 * just print it.
3812                                 */
3813                                if (!trace_iter_expand_format(iter))
3814                                        goto print;
3815                        }
3816
3817                        if (p[i] == '\\' && p[i+1]) {
3818                                i++;
3819                                continue;
3820                        }
3821                        if (p[i] == '%') {
3822                                /* Need to test cases like %08.*s */
3823                                for (j = 1; p[i+j]; j++) {
3824                                        if (isdigit(p[i+j]) ||
3825                                            p[i+j] == '.')
3826                                                continue;
3827                                        if (p[i+j] == '*') {
3828                                                star = true;
3829                                                continue;
3830                                        }
3831                                        break;
3832                                }
3833                                if (p[i+j] == 's')
3834                                        break;
3835                                star = false;
3836                        }
3837                        j = 0;
3838                }
3839                /* If no %s found then just print normally */
3840                if (!p[i])
3841                        break;
3842
3843                /* Copy up to the %s, and print that */
3844                strncpy(iter->fmt, p, i);
3845                iter->fmt[i] = '\0';
3846                trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3847
3848                /*
3849                 * If iter->seq is full, the above call no longer guarantees
3850                 * that ap is in sync with fmt processing, and further calls
3851                 * to va_arg() can return wrong positional arguments.
3852                 *
3853                 * Ensure that ap is no longer used in this case.
3854                 */
3855                if (iter->seq.full) {
3856                        p = "";
3857                        break;
3858                }
3859
3860                if (star)
3861                        len = va_arg(ap, int);
3862
3863                /* The ap now points to the string data of the %s */
3864                str = va_arg(ap, const char *);
3865
3866                /*
3867                 * If you hit this warning, it is likely that the
3868                 * trace event in question used %s on a string that
3869                 * was saved at the time of the event, but may not be
3870                 * around when the trace is read. Use __string(),
3871                 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3872                 * instead. See samples/trace_events/trace-events-sample.h
3873                 * for reference.
3874                 */
3875                if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3876                              "fmt: '%s' current_buffer: '%s'",
3877                              fmt, show_buffer(&iter->seq))) {
3878                        int ret;
3879
3880                        /* Try to safely read the string */
3881                        if (star) {
3882                                if (len + 1 > iter->fmt_size)
3883                                        len = iter->fmt_size - 1;
3884                                if (len < 0)
3885                                        len = 0;
3886                                ret = copy_from_kernel_nofault(iter->fmt, str, len);
3887                                iter->fmt[len] = 0;
3888                                star = false;
3889                        } else {
3890                                ret = strncpy_from_kernel_nofault(iter->fmt, str,
3891                                                                  iter->fmt_size);
3892                        }
3893                        if (ret < 0)
3894                                trace_seq_printf(&iter->seq, "(0x%px)", str);
3895                        else
3896                                trace_seq_printf(&iter->seq, "(0x%px:%s)",
3897                                                 str, iter->fmt);
3898                        str = "[UNSAFE-MEMORY]";
3899                        strcpy(iter->fmt, "%s");
3900                } else {
3901                        strncpy(iter->fmt, p + i, j + 1);
3902                        iter->fmt[j+1] = '\0';
3903                }
3904                if (star)
3905                        trace_seq_printf(&iter->seq, iter->fmt, len, str);
3906                else
3907                        trace_seq_printf(&iter->seq, iter->fmt, str);
3908
3909                p += i + j + 1;
3910        }
3911 print:
3912        if (*p)
3913                trace_seq_vprintf(&iter->seq, p, ap);
3914}
3915
3916const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3917{
3918        const char *p, *new_fmt;
3919        char *q;
3920
3921        if (WARN_ON_ONCE(!fmt))
3922                return fmt;
3923
3924        if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3925                return fmt;
3926
3927        p = fmt;
3928        new_fmt = q = iter->fmt;
3929        while (*p) {
3930                if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3931                        if (!trace_iter_expand_format(iter))
3932                                return fmt;
3933
3934                        q += iter->fmt - new_fmt;
3935                        new_fmt = iter->fmt;
3936                }
3937
3938                *q++ = *p++;
3939
3940                /* Replace %p with %px */
3941                if (p[-1] == '%') {
3942                        if (p[0] == '%') {
3943                                *q++ = *p++;
3944                        } else if (p[0] == 'p' && !isalnum(p[1])) {
3945                                *q++ = *p++;
3946                                *q++ = 'x';
3947                        }
3948                }
3949        }
3950        *q = '\0';
3951
3952        return new_fmt;
3953}
3954
3955#define STATIC_TEMP_BUF_SIZE    128
3956static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3957
3958/* Find the next real entry, without updating the iterator itself */
3959struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3960                                          int *ent_cpu, u64 *ent_ts)
3961{
3962        /* __find_next_entry will reset ent_size */
3963        int ent_size = iter->ent_size;
3964        struct trace_entry *entry;
3965
3966        /*
3967         * If called from ftrace_dump(), then the iter->temp buffer
3968         * will be the static_temp_buf and not created from kmalloc.
3969         * If the entry size is greater than the buffer, we can
3970         * not save it. Just return NULL in that case. This is only
3971         * used to add markers when two consecutive events' time
3972         * stamps have a large delta. See trace_print_lat_context()
3973         */
3974        if (iter->temp == static_temp_buf &&
3975            STATIC_TEMP_BUF_SIZE < ent_size)
3976                return NULL;
3977
3978        /*
3979         * The __find_next_entry() may call peek_next_entry(), which may
3980         * call ring_buffer_peek() that may make the contents of iter->ent
3981         * undefined. Need to copy iter->ent now.
3982         */
3983        if (iter->ent && iter->ent != iter->temp) {
3984                if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3985                    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3986                        void *temp;
3987                        temp = kmalloc(iter->ent_size, GFP_KERNEL);
3988                        if (!temp)
3989                                return NULL;
3990                        kfree(iter->temp);
3991                        iter->temp = temp;
3992                        iter->temp_size = iter->ent_size;
3993                }
3994                memcpy(iter->temp, iter->ent, iter->ent_size);
3995                iter->ent = iter->temp;
3996        }
3997        entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3998        /* Put back the original ent_size */
3999        iter->ent_size = ent_size;
4000
4001        return entry;
4002}
4003
4004/* Find the next real entry, and increment the iterator to the next entry */
4005void *trace_find_next_entry_inc(struct trace_iterator *iter)
4006{
4007        iter->ent = __find_next_entry(iter, &iter->cpu,
4008                                      &iter->lost_events, &iter->ts);
4009
4010        if (iter->ent)
4011                trace_iterator_increment(iter);
4012
4013        return iter->ent ? iter : NULL;
4014}
4015
4016static void trace_consume(struct trace_iterator *iter)
4017{
4018        ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4019                            &iter->lost_events);
4020}
4021
4022static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4023{
4024        struct trace_iterator *iter = m->private;
4025        int i = (int)*pos;
4026        void *ent;
4027
4028        WARN_ON_ONCE(iter->leftover);
4029
4030        (*pos)++;
4031
4032        /* can't go backwards */
4033        if (iter->idx > i)
4034                return NULL;
4035
4036        if (iter->idx < 0)
4037                ent = trace_find_next_entry_inc(iter);
4038        else
4039                ent = iter;
4040
4041        while (ent && iter->idx < i)
4042                ent = trace_find_next_entry_inc(iter);
4043
4044        iter->pos = *pos;
4045
4046        return ent;
4047}
4048
4049void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4050{
4051        struct ring_buffer_iter *buf_iter;
4052        unsigned long entries = 0;
4053        u64 ts;
4054
4055        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4056
4057        buf_iter = trace_buffer_iter(iter, cpu);
4058        if (!buf_iter)
4059                return;
4060
4061        ring_buffer_iter_reset(buf_iter);
4062
4063        /*
4064         * We could have the case with the max latency tracers
4065         * that a reset never took place on a cpu. This is evident
4066         * by the timestamp being before the start of the buffer.
4067         */
4068        while (ring_buffer_iter_peek(buf_iter, &ts)) {
4069                if (ts >= iter->array_buffer->time_start)
4070                        break;
4071                entries++;
4072                ring_buffer_iter_advance(buf_iter);
4073        }
4074
4075        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4076}
4077
4078/*
4079 * The current tracer is copied to avoid a global locking
4080 * all around.
4081 */
4082static void *s_start(struct seq_file *m, loff_t *pos)
4083{
4084        struct trace_iterator *iter = m->private;
4085        struct trace_array *tr = iter->tr;
4086        int cpu_file = iter->cpu_file;
4087        void *p = NULL;
4088        loff_t l = 0;
4089        int cpu;
4090
4091        /*
4092         * copy the tracer to avoid using a global lock all around.
4093         * iter->trace is a copy of current_trace, the pointer to the
4094         * name may be used instead of a strcmp(), as iter->trace->name
4095         * will point to the same string as current_trace->name.
4096         */
4097        mutex_lock(&trace_types_lock);
4098        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4099                *iter->trace = *tr->current_trace;
4100        mutex_unlock(&trace_types_lock);
4101
4102#ifdef CONFIG_TRACER_MAX_TRACE
4103        if (iter->snapshot && iter->trace->use_max_tr)
4104                return ERR_PTR(-EBUSY);
4105#endif
4106
4107        if (*pos != iter->pos) {
4108                iter->ent = NULL;
4109                iter->cpu = 0;
4110                iter->idx = -1;
4111
4112                if (cpu_file == RING_BUFFER_ALL_CPUS) {
4113                        for_each_tracing_cpu(cpu)
4114                                tracing_iter_reset(iter, cpu);
4115                } else
4116                        tracing_iter_reset(iter, cpu_file);
4117
4118                iter->leftover = 0;
4119                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4120                        ;
4121
4122        } else {
4123                /*
4124                 * If we overflowed the seq_file before, then we want
4125                 * to just reuse the trace_seq buffer again.
4126                 */
4127                if (iter->leftover)
4128                        p = iter;
4129                else {
4130                        l = *pos - 1;
4131                        p = s_next(m, p, &l);
4132                }
4133        }
4134
4135        trace_event_read_lock();
4136        trace_access_lock(cpu_file);
4137        return p;
4138}
4139
4140static void s_stop(struct seq_file *m, void *p)
4141{
4142        struct trace_iterator *iter = m->private;
4143
4144#ifdef CONFIG_TRACER_MAX_TRACE
4145        if (iter->snapshot && iter->trace->use_max_tr)
4146                return;
4147#endif
4148
4149        trace_access_unlock(iter->cpu_file);
4150        trace_event_read_unlock();
4151}
4152
4153static void
4154get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4155                      unsigned long *entries, int cpu)
4156{
4157        unsigned long count;
4158
4159        count = ring_buffer_entries_cpu(buf->buffer, cpu);
4160        /*
4161         * If this buffer has skipped entries, then we hold all
4162         * entries for the trace and we need to ignore the
4163         * ones before the time stamp.
4164         */
4165        if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4166                count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4167                /* total is the same as the entries */
4168                *total = count;
4169        } else
4170                *total = count +
4171                        ring_buffer_overrun_cpu(buf->buffer, cpu);
4172        *entries = count;
4173}
4174
4175static void
4176get_total_entries(struct array_buffer *buf,
4177                  unsigned long *total, unsigned long *entries)
4178{
4179        unsigned long t, e;
4180        int cpu;
4181
4182        *total = 0;
4183        *entries = 0;
4184
4185        for_each_tracing_cpu(cpu) {
4186                get_total_entries_cpu(buf, &t, &e, cpu);
4187                *total += t;
4188                *entries += e;
4189        }
4190}
4191
4192unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4193{
4194        unsigned long total, entries;
4195
4196        if (!tr)
4197                tr = &global_trace;
4198
4199        get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4200
4201        return entries;
4202}
4203
4204unsigned long trace_total_entries(struct trace_array *tr)
4205{
4206        unsigned long total, entries;
4207
4208        if (!tr)
4209                tr = &global_trace;
4210
4211        get_total_entries(&tr->array_buffer, &total, &entries);
4212
4213        return entries;
4214}
4215
4216static void print_lat_help_header(struct seq_file *m)
4217{
4218        seq_puts(m, "#                    _------=> CPU#            \n"
4219                    "#                   / _-----=> irqs-off/BH-disabled\n"
4220                    "#                  | / _----=> need-resched    \n"
4221                    "#                  || / _---=> hardirq/softirq \n"
4222                    "#                  ||| / _--=> preempt-depth   \n"
4223                    "#                  |||| / _-=> migrate-disable \n"
4224                    "#                  ||||| /     delay           \n"
4225                    "#  cmd     pid     |||||| time  |   caller     \n"
4226                    "#     \\   /        ||||||  \\    |    /       \n");
4227}
4228
4229static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4230{
4231        unsigned long total;
4232        unsigned long entries;
4233
4234        get_total_entries(buf, &total, &entries);
4235        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4236                   entries, total, num_online_cpus());
4237        seq_puts(m, "#\n");
4238}
4239
4240static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4241                                   unsigned int flags)
4242{
4243        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4244
4245        print_event_info(buf, m);
4246
4247        seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4248        seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4249}
4250
4251static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4252                                       unsigned int flags)
4253{
4254        bool tgid = flags & TRACE_ITER_RECORD_TGID;
4255        static const char space[] = "            ";
4256        int prec = tgid ? 12 : 2;
4257
4258        print_event_info(buf, m);
4259
4260        seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4261        seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4262        seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4263        seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4264        seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4265        seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4266        seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4267        seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4268}
4269
4270void
4271print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4272{
4273        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4274        struct array_buffer *buf = iter->array_buffer;
4275        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4276        struct tracer *type = iter->trace;
4277        unsigned long entries;
4278        unsigned long total;
4279        const char *name = type->name;
4280
4281        get_total_entries(buf, &total, &entries);
4282
4283        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4284                   name, UTS_RELEASE);
4285        seq_puts(m, "# -----------------------------------"
4286                 "---------------------------------\n");
4287        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4288                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4289                   nsecs_to_usecs(data->saved_latency),
4290                   entries,
4291                   total,
4292                   buf->cpu,
4293                   preempt_model_none()      ? "server" :
4294                   preempt_model_voluntary() ? "desktop" :
4295                   preempt_model_full()      ? "preempt" :
4296                   preempt_model_rt()        ? "preempt_rt" :
4297                   "unknown",
4298                   /* These are reserved for later use */
4299                   0, 0, 0, 0);
4300#ifdef CONFIG_SMP
4301        seq_printf(m, " #P:%d)\n", num_online_cpus());
4302#else
4303        seq_puts(m, ")\n");
4304#endif
4305        seq_puts(m, "#    -----------------\n");
4306        seq_printf(m, "#    | task: %.16s-%d "
4307                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4308                   data->comm, data->pid,
4309                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4310                   data->policy, data->rt_priority);
4311        seq_puts(m, "#    -----------------\n");
4312
4313        if (data->critical_start) {
4314                seq_puts(m, "#  => started at: ");
4315                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4316                trace_print_seq(m, &iter->seq);
4317                seq_puts(m, "\n#  => ended at:   ");
4318                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4319                trace_print_seq(m, &iter->seq);
4320                seq_puts(m, "\n#\n");
4321        }
4322
4323        seq_puts(m, "#\n");
4324}
4325
4326static void test_cpu_buff_start(struct trace_iterator *iter)
4327{
4328        struct trace_seq *s = &iter->seq;
4329        struct trace_array *tr = iter->tr;
4330
4331        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4332                return;
4333
4334        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4335                return;
4336
4337        if (cpumask_available(iter->started) &&
4338            cpumask_test_cpu(iter->cpu, iter->started))
4339                return;
4340
4341        if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4342                return;
4343
4344        if (cpumask_available(iter->started))
4345                cpumask_set_cpu(iter->cpu, iter->started);
4346
4347        /* Don't print started cpu buffer for the first entry of the trace */
4348        if (iter->idx > 1)
4349                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4350                                iter->cpu);
4351}
4352
4353static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4354{
4355        struct trace_array *tr = iter->tr;
4356        struct trace_seq *s = &iter->seq;
4357        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4358        struct trace_entry *entry;
4359        struct trace_event *event;
4360
4361        entry = iter->ent;
4362
4363        test_cpu_buff_start(iter);
4364
4365        event = ftrace_find_event(entry->type);
4366
4367        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4368                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4369                        trace_print_lat_context(iter);
4370                else
4371                        trace_print_context(iter);
4372        }
4373
4374        if (trace_seq_has_overflowed(s))
4375                return TRACE_TYPE_PARTIAL_LINE;
4376
4377        if (event)
4378                return event->funcs->trace(iter, sym_flags, event);
4379
4380        trace_seq_printf(s, "Unknown type %d\n", entry->type);
4381
4382        return trace_handle_return(s);
4383}
4384
4385static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4386{
4387        struct trace_array *tr = iter->tr;
4388        struct trace_seq *s = &iter->seq;
4389        struct trace_entry *entry;
4390        struct trace_event *event;
4391
4392        entry = iter->ent;
4393
4394        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4395                trace_seq_printf(s, "%d %d %llu ",
4396                                 entry->pid, iter->cpu, iter->ts);
4397
4398        if (trace_seq_has_overflowed(s))
4399                return TRACE_TYPE_PARTIAL_LINE;
4400
4401        event = ftrace_find_event(entry->type);
4402        if (event)
4403                return event->funcs->raw(iter, 0, event);
4404
4405        trace_seq_printf(s, "%d ?\n", entry->type);
4406
4407        return trace_handle_return(s);
4408}
4409
4410static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4411{
4412        struct trace_array *tr = iter->tr;
4413        struct trace_seq *s = &iter->seq;
4414        unsigned char newline = '\n';
4415        struct trace_entry *entry;
4416        struct trace_event *event;
4417
4418        entry = iter->ent;
4419
4420        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4421                SEQ_PUT_HEX_FIELD(s, entry->pid);
4422                SEQ_PUT_HEX_FIELD(s, iter->cpu);
4423                SEQ_PUT_HEX_FIELD(s, iter->ts);
4424                if (trace_seq_has_overflowed(s))
4425                        return TRACE_TYPE_PARTIAL_LINE;
4426        }
4427
4428        event = ftrace_find_event(entry->type);
4429        if (event) {
4430                enum print_line_t ret = event->funcs->hex(iter, 0, event);
4431                if (ret != TRACE_TYPE_HANDLED)
4432                        return ret;
4433        }
4434
4435        SEQ_PUT_FIELD(s, newline);
4436
4437        return trace_handle_return(s);
4438}
4439
4440static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4441{
4442        struct trace_array *tr = iter->tr;
4443        struct trace_seq *s = &iter->seq;
4444        struct trace_entry *entry;
4445        struct trace_event *event;
4446
4447        entry = iter->ent;
4448
4449        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4450                SEQ_PUT_FIELD(s, entry->pid);
4451                SEQ_PUT_FIELD(s, iter->cpu);
4452                SEQ_PUT_FIELD(s, iter->ts);
4453                if (trace_seq_has_overflowed(s))
4454                        return TRACE_TYPE_PARTIAL_LINE;
4455        }
4456
4457        event = ftrace_find_event(entry->type);
4458        return event ? event->funcs->binary(iter, 0, event) :
4459                TRACE_TYPE_HANDLED;
4460}
4461
4462int trace_empty(struct trace_iterator *iter)
4463{
4464        struct ring_buffer_iter *buf_iter;
4465        int cpu;
4466
4467        /* If we are looking at one CPU buffer, only check that one */
4468        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4469                cpu = iter->cpu_file;
4470                buf_iter = trace_buffer_iter(iter, cpu);
4471                if (buf_iter) {
4472                        if (!ring_buffer_iter_empty(buf_iter))
4473                                return 0;
4474                } else {
4475                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476                                return 0;
4477                }
4478                return 1;
4479        }
4480
4481        for_each_tracing_cpu(cpu) {
4482                buf_iter = trace_buffer_iter(iter, cpu);
4483                if (buf_iter) {
4484                        if (!ring_buffer_iter_empty(buf_iter))
4485                                return 0;
4486                } else {
4487                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4488                                return 0;
4489                }
4490        }
4491
4492        return 1;
4493}
4494
4495/*  Called with trace_event_read_lock() held. */
4496enum print_line_t print_trace_line(struct trace_iterator *iter)
4497{
4498        struct trace_array *tr = iter->tr;
4499        unsigned long trace_flags = tr->trace_flags;
4500        enum print_line_t ret;
4501
4502        if (iter->lost_events) {
4503                if (iter->lost_events == (unsigned long)-1)
4504                        trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4505                                         iter->cpu);
4506                else
4507                        trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4508                                         iter->cpu, iter->lost_events);
4509                if (trace_seq_has_overflowed(&iter->seq))
4510                        return TRACE_TYPE_PARTIAL_LINE;
4511        }
4512
4513        if (iter->trace && iter->trace->print_line) {
4514                ret = iter->trace->print_line(iter);
4515                if (ret != TRACE_TYPE_UNHANDLED)
4516                        return ret;
4517        }
4518
4519        if (iter->ent->type == TRACE_BPUTS &&
4520                        trace_flags & TRACE_ITER_PRINTK &&
4521                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4522                return trace_print_bputs_msg_only(iter);
4523
4524        if (iter->ent->type == TRACE_BPRINT &&
4525                        trace_flags & TRACE_ITER_PRINTK &&
4526                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527                return trace_print_bprintk_msg_only(iter);
4528
4529        if (iter->ent->type == TRACE_PRINT &&
4530                        trace_flags & TRACE_ITER_PRINTK &&
4531                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532                return trace_print_printk_msg_only(iter);
4533
4534        if (trace_flags & TRACE_ITER_BIN)
4535                return print_bin_fmt(iter);
4536
4537        if (trace_flags & TRACE_ITER_HEX)
4538                return print_hex_fmt(iter);
4539
4540        if (trace_flags & TRACE_ITER_RAW)
4541                return print_raw_fmt(iter);
4542
4543        return print_trace_fmt(iter);
4544}
4545
4546void trace_latency_header(struct seq_file *m)
4547{
4548        struct trace_iterator *iter = m->private;
4549        struct trace_array *tr = iter->tr;
4550
4551        /* print nothing if the buffers are empty */
4552        if (trace_empty(iter))
4553                return;
4554
4555        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4556                print_trace_header(m, iter);
4557
4558        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4559                print_lat_help_header(m);
4560}
4561
4562void trace_default_header(struct seq_file *m)
4563{
4564        struct trace_iterator *iter = m->private;
4565        struct trace_array *tr = iter->tr;
4566        unsigned long trace_flags = tr->trace_flags;
4567
4568        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4569                return;
4570
4571        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4572                /* print nothing if the buffers are empty */
4573                if (trace_empty(iter))
4574                        return;
4575                print_trace_header(m, iter);
4576                if (!(trace_flags & TRACE_ITER_VERBOSE))
4577                        print_lat_help_header(m);
4578        } else {
4579                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4580                        if (trace_flags & TRACE_ITER_IRQ_INFO)
4581                                print_func_help_header_irq(iter->array_buffer,
4582                                                           m, trace_flags);
4583                        else
4584                                print_func_help_header(iter->array_buffer, m,
4585                                                       trace_flags);
4586                }
4587        }
4588}
4589
4590static void test_ftrace_alive(struct seq_file *m)
4591{
4592        if (!ftrace_is_dead())
4593                return;
4594        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4595                    "#          MAY BE MISSING FUNCTION EVENTS\n");
4596}
4597
4598#ifdef CONFIG_TRACER_MAX_TRACE
4599static void show_snapshot_main_help(struct seq_file *m)
4600{
4601        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4602                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4603                    "#                      Takes a snapshot of the main buffer.\n"
4604                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4605                    "#                      (Doesn't have to be '2' works with any number that\n"
4606                    "#                       is not a '0' or '1')\n");
4607}
4608
4609static void show_snapshot_percpu_help(struct seq_file *m)
4610{
4611        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4612#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4613        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4614                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4615#else
4616        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4617                    "#                     Must use main snapshot file to allocate.\n");
4618#endif
4619        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4620                    "#                      (Doesn't have to be '2' works with any number that\n"
4621                    "#                       is not a '0' or '1')\n");
4622}
4623
4624static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4625{
4626        if (iter->tr->allocated_snapshot)
4627                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4628        else
4629                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4630
4631        seq_puts(m, "# Snapshot commands:\n");
4632        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4633                show_snapshot_main_help(m);
4634        else
4635                show_snapshot_percpu_help(m);
4636}
4637#else
4638/* Should never be called */
4639static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4640#endif
4641
4642static int s_show(struct seq_file *m, void *v)
4643{
4644        struct trace_iterator *iter = v;
4645        int ret;
4646
4647        if (iter->ent == NULL) {
4648                if (iter->tr) {
4649                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
4650                        seq_puts(m, "#\n");
4651                        test_ftrace_alive(m);
4652                }
4653                if (iter->snapshot && trace_empty(iter))
4654                        print_snapshot_help(m, iter);
4655                else if (iter->trace && iter->trace->print_header)
4656                        iter->trace->print_header(m);
4657                else
4658                        trace_default_header(m);
4659
4660        } else if (iter->leftover) {
4661                /*
4662                 * If we filled the seq_file buffer earlier, we
4663                 * want to just show it now.
4664                 */
4665                ret = trace_print_seq(m, &iter->seq);
4666
4667                /* ret should this time be zero, but you never know */
4668                iter->leftover = ret;
4669
4670        } else {
4671                print_trace_line(iter);
4672                ret = trace_print_seq(m, &iter->seq);
4673                /*
4674                 * If we overflow the seq_file buffer, then it will
4675                 * ask us for this data again at start up.
4676                 * Use that instead.
4677                 *  ret is 0 if seq_file write succeeded.
4678                 *        -1 otherwise.
4679                 */
4680                iter->leftover = ret;
4681        }
4682
4683        return 0;
4684}
4685
4686/*
4687 * Should be used after trace_array_get(), trace_types_lock
4688 * ensures that i_cdev was already initialized.
4689 */
4690static inline int tracing_get_cpu(struct inode *inode)
4691{
4692        if (inode->i_cdev) /* See trace_create_cpu_file() */
4693                return (long)inode->i_cdev - 1;
4694        return RING_BUFFER_ALL_CPUS;
4695}
4696
4697static const struct seq_operations tracer_seq_ops = {
4698        .start          = s_start,
4699        .next           = s_next,
4700        .stop           = s_stop,
4701        .show           = s_show,
4702};
4703
4704static struct trace_iterator *
4705__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4706{
4707        struct trace_array *tr = inode->i_private;
4708        struct trace_iterator *iter;
4709        int cpu;
4710
4711        if (tracing_disabled)
4712                return ERR_PTR(-ENODEV);
4713
4714        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4715        if (!iter)
4716                return ERR_PTR(-ENOMEM);
4717
4718        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4719                                    GFP_KERNEL);
4720        if (!iter->buffer_iter)
4721                goto release;
4722
4723        /*
4724         * trace_find_next_entry() may need to save off iter->ent.
4725         * It will place it into the iter->temp buffer. As most
4726         * events are less than 128, allocate a buffer of that size.
4727         * If one is greater, then trace_find_next_entry() will
4728         * allocate a new buffer to adjust for the bigger iter->ent.
4729         * It's not critical if it fails to get allocated here.
4730         */
4731        iter->temp = kmalloc(128, GFP_KERNEL);
4732        if (iter->temp)
4733                iter->temp_size = 128;
4734
4735        /*
4736         * trace_event_printf() may need to modify given format
4737         * string to replace %p with %px so that it shows real address
4738         * instead of hash value. However, that is only for the event
4739         * tracing, other tracer may not need. Defer the allocation
4740         * until it is needed.
4741         */
4742        iter->fmt = NULL;
4743        iter->fmt_size = 0;
4744
4745        /*
4746         * We make a copy of the current tracer to avoid concurrent
4747         * changes on it while we are reading.
4748         */
4749        mutex_lock(&trace_types_lock);
4750        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4751        if (!iter->trace)
4752                goto fail;
4753
4754        *iter->trace = *tr->current_trace;
4755
4756        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4757                goto fail;
4758
4759        iter->tr = tr;
4760
4761#ifdef CONFIG_TRACER_MAX_TRACE
4762        /* Currently only the top directory has a snapshot */
4763        if (tr->current_trace->print_max || snapshot)
4764                iter->array_buffer = &tr->max_buffer;
4765        else
4766#endif
4767                iter->array_buffer = &tr->array_buffer;
4768        iter->snapshot = snapshot;
4769        iter->pos = -1;
4770        iter->cpu_file = tracing_get_cpu(inode);
4771        mutex_init(&iter->mutex);
4772
4773        /* Notify the tracer early; before we stop tracing. */
4774        if (iter->trace->open)
4775                iter->trace->open(iter);
4776
4777        /* Annotate start of buffers if we had overruns */
4778        if (ring_buffer_overruns(iter->array_buffer->buffer))
4779                iter->iter_flags |= TRACE_FILE_ANNOTATE;
4780
4781        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4782        if (trace_clocks[tr->clock_id].in_ns)
4783                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4784
4785        /*
4786         * If pause-on-trace is enabled, then stop the trace while
4787         * dumping, unless this is the "snapshot" file
4788         */
4789        if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4790                tracing_stop_tr(tr);
4791
4792        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4793                for_each_tracing_cpu(cpu) {
4794                        iter->buffer_iter[cpu] =
4795                                ring_buffer_read_prepare(iter->array_buffer->buffer,
4796                                                         cpu, GFP_KERNEL);
4797                }
4798                ring_buffer_read_prepare_sync();
4799                for_each_tracing_cpu(cpu) {
4800                        ring_buffer_read_start(iter->buffer_iter[cpu]);
4801                        tracing_iter_reset(iter, cpu);
4802                }
4803        } else {
4804                cpu = iter->cpu_file;
4805                iter->buffer_iter[cpu] =
4806                        ring_buffer_read_prepare(iter->array_buffer->buffer,
4807                                                 cpu, GFP_KERNEL);
4808                ring_buffer_read_prepare_sync();
4809                ring_buffer_read_start(iter->buffer_iter[cpu]);
4810                tracing_iter_reset(iter, cpu);
4811        }
4812
4813        mutex_unlock(&trace_types_lock);
4814
4815        return iter;
4816
4817 fail:
4818        mutex_unlock(&trace_types_lock);
4819        kfree(iter->trace);
4820        kfree(iter->temp);
4821        kfree(iter->buffer_iter);
4822release:
4823        seq_release_private(inode, file);
4824        return ERR_PTR(-ENOMEM);
4825}
4826
4827int tracing_open_generic(struct inode *inode, struct file *filp)
4828{
4829        int ret;
4830
4831        ret = tracing_check_open_get_tr(NULL);
4832        if (ret)
4833                return ret;
4834
4835        filp->private_data = inode->i_private;
4836        return 0;
4837}
4838
4839bool tracing_is_disabled(void)
4840{
4841        return (tracing_disabled) ? true: false;
4842}
4843
4844/*
4845 * Open and update trace_array ref count.
4846 * Must have the current trace_array passed to it.
4847 */
4848int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4849{
4850        struct trace_array *tr = inode->i_private;
4851        int ret;
4852
4853        ret = tracing_check_open_get_tr(tr);
4854        if (ret)
4855                return ret;
4856
4857        filp->private_data = inode->i_private;
4858
4859        return 0;
4860}
4861
4862static int tracing_mark_open(struct inode *inode, struct file *filp)
4863{
4864        stream_open(inode, filp);
4865        return tracing_open_generic_tr(inode, filp);
4866}
4867
4868static int tracing_release(struct inode *inode, struct file *file)
4869{
4870        struct trace_array *tr = inode->i_private;
4871        struct seq_file *m = file->private_data;
4872        struct trace_iterator *iter;
4873        int cpu;
4874
4875        if (!(file->f_mode & FMODE_READ)) {
4876                trace_array_put(tr);
4877                return 0;
4878        }
4879
4880        /* Writes do not use seq_file */
4881        iter = m->private;
4882        mutex_lock(&trace_types_lock);
4883
4884        for_each_tracing_cpu(cpu) {
4885                if (iter->buffer_iter[cpu])
4886                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
4887        }
4888
4889        if (iter->trace && iter->trace->close)
4890                iter->trace->close(iter);
4891
4892        if (!iter->snapshot && tr->stop_count)
4893                /* reenable tracing if it was previously enabled */
4894                tracing_start_tr(tr);
4895
4896        __trace_array_put(tr);
4897
4898        mutex_unlock(&trace_types_lock);
4899
4900        mutex_destroy(&iter->mutex);
4901        free_cpumask_var(iter->started);
4902        kfree(iter->fmt);
4903        kfree(iter->temp);
4904        kfree(iter->trace);
4905        kfree(iter->buffer_iter);
4906        seq_release_private(inode, file);
4907
4908        return 0;
4909}
4910
4911static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4912{
4913        struct trace_array *tr = inode->i_private;
4914
4915        trace_array_put(tr);
4916        return 0;
4917}
4918
4919static int tracing_single_release_tr(struct inode *inode, struct file *file)
4920{
4921        struct trace_array *tr = inode->i_private;
4922
4923        trace_array_put(tr);
4924
4925        return single_release(inode, file);
4926}
4927
4928static int tracing_open(struct inode *inode, struct file *file)
4929{
4930        struct trace_array *tr = inode->i_private;
4931        struct trace_iterator *iter;
4932        int ret;
4933
4934        ret = tracing_check_open_get_tr(tr);
4935        if (ret)
4936                return ret;
4937
4938        /* If this file was open for write, then erase contents */
4939        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4940                int cpu = tracing_get_cpu(inode);
4941                struct array_buffer *trace_buf = &tr->array_buffer;
4942
4943#ifdef CONFIG_TRACER_MAX_TRACE
4944                if (tr->current_trace->print_max)
4945                        trace_buf = &tr->max_buffer;
4946#endif
4947
4948                if (cpu == RING_BUFFER_ALL_CPUS)
4949                        tracing_reset_online_cpus(trace_buf);
4950                else
4951                        tracing_reset_cpu(trace_buf, cpu);
4952        }
4953
4954        if (file->f_mode & FMODE_READ) {
4955                iter = __tracing_open(inode, file, false);
4956                if (IS_ERR(iter))
4957                        ret = PTR_ERR(iter);
4958                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4959                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4960        }
4961
4962        if (ret < 0)
4963                trace_array_put(tr);
4964
4965        return ret;
4966}
4967
4968/*
4969 * Some tracers are not suitable for instance buffers.
4970 * A tracer is always available for the global array (toplevel)
4971 * or if it explicitly states that it is.
4972 */
4973static bool
4974trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4975{
4976        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4977}
4978
4979/* Find the next tracer that this trace array may use */
4980static struct tracer *
4981get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4982{
4983        while (t && !trace_ok_for_array(t, tr))
4984                t = t->next;
4985
4986        return t;
4987}
4988
4989static void *
4990t_next(struct seq_file *m, void *v, loff_t *pos)
4991{
4992        struct trace_array *tr = m->private;
4993        struct tracer *t = v;
4994
4995        (*pos)++;
4996
4997        if (t)
4998                t = get_tracer_for_array(tr, t->next);
4999
5000        return t;
5001}
5002
5003static void *t_start(struct seq_file *m, loff_t *pos)
5004{
5005        struct trace_array *tr = m->private;
5006        struct tracer *t;
5007        loff_t l = 0;
5008
5009        mutex_lock(&trace_types_lock);
5010
5011        t = get_tracer_for_array(tr, trace_types);
5012        for (; t && l < *pos; t = t_next(m, t, &l))
5013                        ;
5014
5015        return t;
5016}
5017
5018static void t_stop(struct seq_file *m, void *p)
5019{
5020        mutex_unlock(&trace_types_lock);
5021}
5022
5023static int t_show(struct seq_file *m, void *v)
5024{
5025        struct tracer *t = v;
5026
5027        if (!t)
5028                return 0;
5029
5030        seq_puts(m, t->name);
5031        if (t->next)
5032                seq_putc(m, ' ');
5033        else
5034                seq_putc(m, '\n');
5035
5036        return 0;
5037}
5038
5039static const struct seq_operations show_traces_seq_ops = {
5040        .start          = t_start,
5041        .next           = t_next,
5042        .stop           = t_stop,
5043        .show           = t_show,
5044};
5045
5046static int show_traces_open(struct inode *inode, struct file *file)
5047{
5048        struct trace_array *tr = inode->i_private;
5049        struct seq_file *m;
5050        int ret;
5051
5052        ret = tracing_check_open_get_tr(tr);
5053        if (ret)
5054                return ret;
5055
5056        ret = seq_open(file, &show_traces_seq_ops);
5057        if (ret) {
5058                trace_array_put(tr);
5059                return ret;
5060        }
5061
5062        m = file->private_data;
5063        m->private = tr;
5064
5065        return 0;
5066}
5067
5068static int show_traces_release(struct inode *inode, struct file *file)
5069{
5070        struct trace_array *tr = inode->i_private;
5071
5072        trace_array_put(tr);
5073        return seq_release(inode, file);
5074}
5075
5076static ssize_t
5077tracing_write_stub(struct file *filp, const char __user *ubuf,
5078                   size_t count, loff_t *ppos)
5079{
5080        return count;
5081}
5082
5083loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5084{
5085        int ret;
5086
5087        if (file->f_mode & FMODE_READ)
5088                ret = seq_lseek(file, offset, whence);
5089        else
5090                file->f_pos = ret = 0;
5091
5092        return ret;
5093}
5094
5095static const struct file_operations tracing_fops = {
5096        .open           = tracing_open,
5097        .read           = seq_read,
5098        .write          = tracing_write_stub,
5099        .llseek         = tracing_lseek,
5100        .release        = tracing_release,
5101};
5102
5103static const struct file_operations show_traces_fops = {
5104        .open           = show_traces_open,
5105        .read           = seq_read,
5106        .llseek         = seq_lseek,
5107        .release        = show_traces_release,
5108};
5109
5110static ssize_t
5111tracing_cpumask_read(struct file *filp, char __user *ubuf,
5112                     size_t count, loff_t *ppos)
5113{
5114        struct trace_array *tr = file_inode(filp)->i_private;
5115        char *mask_str;
5116        int len;
5117
5118        len = snprintf(NULL, 0, "%*pb\n",
5119                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5120        mask_str = kmalloc(len, GFP_KERNEL);
5121        if (!mask_str)
5122                return -ENOMEM;
5123
5124        len = snprintf(mask_str, len, "%*pb\n",
5125                       cpumask_pr_args(tr->tracing_cpumask));
5126        if (len >= count) {
5127                count = -EINVAL;
5128                goto out_err;
5129        }
5130        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5131
5132out_err:
5133        kfree(mask_str);
5134
5135        return count;
5136}
5137
5138int tracing_set_cpumask(struct trace_array *tr,
5139                        cpumask_var_t tracing_cpumask_new)
5140{
5141        int cpu;
5142
5143        if (!tr)
5144                return -EINVAL;
5145
5146        local_irq_disable();
5147        arch_spin_lock(&tr->max_lock);
5148        for_each_tracing_cpu(cpu) {
5149                /*
5150                 * Increase/decrease the disabled counter if we are
5151                 * about to flip a bit in the cpumask:
5152                 */
5153                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5154                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5155                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5156                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5157                }
5158                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5162                }
5163        }
5164        arch_spin_unlock(&tr->max_lock);
5165        local_irq_enable();
5166
5167        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5168
5169        return 0;
5170}
5171
5172static ssize_t
5173tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5174                      size_t count, loff_t *ppos)
5175{
5176        struct trace_array *tr = file_inode(filp)->i_private;
5177        cpumask_var_t tracing_cpumask_new;
5178        int err;
5179
5180        if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5181                return -ENOMEM;
5182
5183        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5184        if (err)
5185                goto err_free;
5186
5187        err = tracing_set_cpumask(tr, tracing_cpumask_new);
5188        if (err)
5189                goto err_free;
5190
5191        free_cpumask_var(tracing_cpumask_new);
5192
5193        return count;
5194
5195err_free:
5196        free_cpumask_var(tracing_cpumask_new);
5197
5198        return err;
5199}
5200
5201static const struct file_operations tracing_cpumask_fops = {
5202        .open           = tracing_open_generic_tr,
5203        .read           = tracing_cpumask_read,
5204        .write          = tracing_cpumask_write,
5205        .release        = tracing_release_generic_tr,
5206        .llseek         = generic_file_llseek,
5207};
5208
5209static int tracing_trace_options_show(struct seq_file *m, void *v)
5210{
5211        struct tracer_opt *trace_opts;
5212        struct trace_array *tr = m->private;
5213        u32 tracer_flags;
5214        int i;
5215
5216        mutex_lock(&trace_types_lock);
5217        tracer_flags = tr->current_trace->flags->val;
5218        trace_opts = tr->current_trace->flags->opts;
5219
5220        for (i = 0; trace_options[i]; i++) {
5221                if (tr->trace_flags & (1 << i))
5222                        seq_printf(m, "%s\n", trace_options[i]);
5223                else
5224                        seq_printf(m, "no%s\n", trace_options[i]);
5225        }
5226
5227        for (i = 0; trace_opts[i].name; i++) {
5228                if (tracer_flags & trace_opts[i].bit)
5229                        seq_printf(m, "%s\n", trace_opts[i].name);
5230                else
5231                        seq_printf(m, "no%s\n", trace_opts[i].name);
5232        }
5233        mutex_unlock(&trace_types_lock);
5234
5235        return 0;
5236}
5237
5238static int __set_tracer_option(struct trace_array *tr,
5239                               struct tracer_flags *tracer_flags,
5240                               struct tracer_opt *opts, int neg)
5241{
5242        struct tracer *trace = tracer_flags->trace;
5243        int ret;
5244
5245        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5246        if (ret)
5247                return ret;
5248
5249        if (neg)
5250                tracer_flags->val &= ~opts->bit;
5251        else
5252                tracer_flags->val |= opts->bit;
5253        return 0;
5254}
5255
5256/* Try to assign a tracer specific option */
5257static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5258{
5259        struct tracer *trace = tr->current_trace;
5260        struct tracer_flags *tracer_flags = trace->flags;
5261        struct tracer_opt *opts = NULL;
5262        int i;
5263
5264        for (i = 0; tracer_flags->opts[i].name; i++) {
5265                opts = &tracer_flags->opts[i];
5266
5267                if (strcmp(cmp, opts->name) == 0)
5268                        return __set_tracer_option(tr, trace->flags, opts, neg);
5269        }
5270
5271        return -EINVAL;
5272}
5273
5274/* Some tracers require overwrite to stay enabled */
5275int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5276{
5277        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5278                return -1;
5279
5280        return 0;
5281}
5282
5283int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5284{
5285        int *map;
5286
5287        if ((mask == TRACE_ITER_RECORD_TGID) ||
5288            (mask == TRACE_ITER_RECORD_CMD))
5289                lockdep_assert_held(&event_mutex);
5290
5291        /* do nothing if flag is already set */
5292        if (!!(tr->trace_flags & mask) == !!enabled)
5293                return 0;
5294
5295        /* Give the tracer a chance to approve the change */
5296        if (tr->current_trace->flag_changed)
5297                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5298                        return -EINVAL;
5299
5300        if (enabled)
5301                tr->trace_flags |= mask;
5302        else
5303                tr->trace_flags &= ~mask;
5304
5305        if (mask == TRACE_ITER_RECORD_CMD)
5306                trace_event_enable_cmd_record(enabled);
5307
5308        if (mask == TRACE_ITER_RECORD_TGID) {
5309                if (!tgid_map) {
5310                        tgid_map_max = pid_max;
5311                        map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5312                                       GFP_KERNEL);
5313
5314                        /*
5315                         * Pairs with smp_load_acquire() in
5316                         * trace_find_tgid_ptr() to ensure that if it observes
5317                         * the tgid_map we just allocated then it also observes
5318                         * the corresponding tgid_map_max value.
5319                         */
5320                        smp_store_release(&tgid_map, map);
5321                }
5322                if (!tgid_map) {
5323                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5324                        return -ENOMEM;
5325                }
5326
5327                trace_event_enable_tgid_record(enabled);
5328        }
5329
5330        if (mask == TRACE_ITER_EVENT_FORK)
5331                trace_event_follow_fork(tr, enabled);
5332
5333        if (mask == TRACE_ITER_FUNC_FORK)
5334                ftrace_pid_follow_fork(tr, enabled);
5335
5336        if (mask == TRACE_ITER_OVERWRITE) {
5337                ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5338#ifdef CONFIG_TRACER_MAX_TRACE
5339                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5340#endif
5341        }
5342
5343        if (mask == TRACE_ITER_PRINTK) {
5344                trace_printk_start_stop_comm(enabled);
5345                trace_printk_control(enabled);
5346        }
5347
5348        return 0;
5349}
5350
5351int trace_set_options(struct trace_array *tr, char *option)
5352{
5353        char *cmp;
5354        int neg = 0;
5355        int ret;
5356        size_t orig_len = strlen(option);
5357        int len;
5358
5359        cmp = strstrip(option);
5360
5361        len = str_has_prefix(cmp, "no");
5362        if (len)
5363                neg = 1;
5364
5365        cmp += len;
5366
5367        mutex_lock(&event_mutex);
5368        mutex_lock(&trace_types_lock);
5369
5370        ret = match_string(trace_options, -1, cmp);
5371        /* If no option could be set, test the specific tracer options */
5372        if (ret < 0)
5373                ret = set_tracer_option(tr, cmp, neg);
5374        else
5375                ret = set_tracer_flag(tr, 1 << ret, !neg);
5376
5377        mutex_unlock(&trace_types_lock);
5378        mutex_unlock(&event_mutex);
5379
5380        /*
5381         * If the first trailing whitespace is replaced with '\0' by strstrip,
5382         * turn it back into a space.
5383         */
5384        if (orig_len > strlen(option))
5385                option[strlen(option)] = ' ';
5386
5387        return ret;
5388}
5389
5390static void __init apply_trace_boot_options(void)
5391{
5392        char *buf = trace_boot_options_buf;
5393        char *option;
5394
5395        while (true) {
5396                option = strsep(&buf, ",");
5397
5398                if (!option)
5399                        break;
5400
5401                if (*option)
5402                        trace_set_options(&global_trace, option);
5403
5404                /* Put back the comma to allow this to be called again */
5405                if (buf)
5406                        *(buf - 1) = ',';
5407        }
5408}
5409
5410static ssize_t
5411tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5412                        size_t cnt, loff_t *ppos)
5413{
5414        struct seq_file *m = filp->private_data;
5415        struct trace_array *tr = m->private;
5416        char buf[64];
5417        int ret;
5418
5419        if (cnt >= sizeof(buf))
5420                return -EINVAL;
5421
5422        if (copy_from_user(buf, ubuf, cnt))
5423                return -EFAULT;
5424
5425        buf[cnt] = 0;
5426
5427        ret = trace_set_options(tr, buf);
5428        if (ret < 0)
5429                return ret;
5430
5431        *ppos += cnt;
5432
5433        return cnt;
5434}
5435
5436static int tracing_trace_options_open(struct inode *inode, struct file *file)
5437{
5438        struct trace_array *tr = inode->i_private;
5439        int ret;
5440
5441        ret = tracing_check_open_get_tr(tr);
5442        if (ret)
5443                return ret;
5444
5445        ret = single_open(file, tracing_trace_options_show, inode->i_private);
5446        if (ret < 0)
5447                trace_array_put(tr);
5448
5449        return ret;
5450}
5451
5452static const struct file_operations tracing_iter_fops = {
5453        .open           = tracing_trace_options_open,
5454        .read           = seq_read,
5455        .llseek         = seq_lseek,
5456        .release        = tracing_single_release_tr,
5457        .write          = tracing_trace_options_write,
5458};
5459
5460static const char readme_msg[] =
5461        "tracing mini-HOWTO:\n\n"
5462        "# echo 0 > tracing_on : quick way to disable tracing\n"
5463        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5464        " Important files:\n"
5465        "  trace\t\t\t- The static contents of the buffer\n"
5466        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5467        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5468        "  current_tracer\t- function and latency tracers\n"
5469        "  available_tracers\t- list of configured tracers for current_tracer\n"
5470        "  error_log\t- error log for failed commands (that support it)\n"
5471        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5472        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5473        "  trace_clock\t\t- change the clock used to order events\n"
5474        "       local:   Per cpu clock but may not be synced across CPUs\n"
5475        "      global:   Synced across CPUs but slows tracing down.\n"
5476        "     counter:   Not a clock, but just an increment\n"
5477        "      uptime:   Jiffy counter from time of boot\n"
5478        "        perf:   Same clock that perf events use\n"
5479#ifdef CONFIG_X86_64
5480        "     x86-tsc:   TSC cycle counter\n"
5481#endif
5482        "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5483        "       delta:   Delta difference against a buffer-wide timestamp\n"
5484        "    absolute:   Absolute (standalone) timestamp\n"
5485        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5486        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5487        "  tracing_cpumask\t- Limit which CPUs to trace\n"
5488        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5489        "\t\t\t  Remove sub-buffer with rmdir\n"
5490        "  trace_options\t\t- Set format or modify how tracing happens\n"
5491        "\t\t\t  Disable an option by prefixing 'no' to the\n"
5492        "\t\t\t  option name\n"
5493        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5494#ifdef CONFIG_DYNAMIC_FTRACE
5495        "\n  available_filter_functions - list of functions that can be filtered on\n"
5496        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5497        "\t\t\t  functions\n"
5498        "\t     accepts: func_full_name or glob-matching-pattern\n"
5499        "\t     modules: Can select a group via module\n"
5500        "\t      Format: :mod:<module-name>\n"
5501        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5502        "\t    triggers: a command to perform when function is hit\n"
5503        "\t      Format: <function>:<trigger>[:count]\n"
5504        "\t     trigger: traceon, traceoff\n"
5505        "\t\t      enable_event:<system>:<event>\n"
5506        "\t\t      disable_event:<system>:<event>\n"
5507#ifdef CONFIG_STACKTRACE
5508        "\t\t      stacktrace\n"
5509#endif
5510#ifdef CONFIG_TRACER_SNAPSHOT
5511        "\t\t      snapshot\n"
5512#endif
5513        "\t\t      dump\n"
5514        "\t\t      cpudump\n"
5515        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5516        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5517        "\t     The first one will disable tracing every time do_fault is hit\n"
5518        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5519        "\t       The first time do trap is hit and it disables tracing, the\n"
5520        "\t       counter will decrement to 2. If tracing is already disabled,\n"
5521        "\t       the counter will not decrement. It only decrements when the\n"
5522        "\t       trigger did work\n"
5523        "\t     To remove trigger without count:\n"
5524        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5525        "\t     To remove trigger with a count:\n"
5526        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5527        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5528        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5529        "\t    modules: Can select a group via module command :mod:\n"
5530        "\t    Does not accept triggers\n"
5531#endif /* CONFIG_DYNAMIC_FTRACE */
5532#ifdef CONFIG_FUNCTION_TRACER
5533        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5534        "\t\t    (function)\n"
5535        "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5536        "\t\t    (function)\n"
5537#endif
5538#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5539        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5540        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5541        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5542#endif
5543#ifdef CONFIG_TRACER_SNAPSHOT
5544        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5545        "\t\t\t  snapshot buffer. Read the contents for more\n"
5546        "\t\t\t  information\n"
5547#endif
5548#ifdef CONFIG_STACK_TRACER
5549        "  stack_trace\t\t- Shows the max stack trace when active\n"
5550        "  stack_max_size\t- Shows current max stack size that was traced\n"
5551        "\t\t\t  Write into this file to reset the max size (trigger a\n"
5552        "\t\t\t  new trace)\n"
5553#ifdef CONFIG_DYNAMIC_FTRACE
5554        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5555        "\t\t\t  traces\n"
5556#endif
5557#endif /* CONFIG_STACK_TRACER */
5558#ifdef CONFIG_DYNAMIC_EVENTS
5559        "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5560        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5561#endif
5562#ifdef CONFIG_KPROBE_EVENTS
5563        "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5564        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5565#endif
5566#ifdef CONFIG_UPROBE_EVENTS
5567        "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5568        "\t\t\t  Write into this file to define/undefine new trace events.\n"
5569#endif
5570#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571        "\t  accepts: event-definitions (one definition per line)\n"
5572        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5573        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5574#ifdef CONFIG_HIST_TRIGGERS
5575        "\t           s:[synthetic/]<event> <field> [<field>]\n"
5576#endif
5577        "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5578        "\t           -:[<group>/]<event>\n"
5579#ifdef CONFIG_KPROBE_EVENTS
5580        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5581  "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5582#endif
5583#ifdef CONFIG_UPROBE_EVENTS
5584  "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5585#endif
5586        "\t     args: <name>=fetcharg[:type]\n"
5587        "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5588#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5589        "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5590#else
5591        "\t           $stack<index>, $stack, $retval, $comm,\n"
5592#endif
5593        "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5594        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5595        "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5596        "\t           <type>\\[<array-size>\\]\n"
5597#ifdef CONFIG_HIST_TRIGGERS
5598        "\t    field: <stype> <name>;\n"
5599        "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5600        "\t           [unsigned] char/int/long\n"
5601#endif
5602        "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5603        "\t            of the <attached-group>/<attached-event>.\n"
5604#endif
5605        "  events/\t\t- Directory containing all trace event subsystems:\n"
5606        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5607        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5608        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5609        "\t\t\t  events\n"
5610        "      filter\t\t- If set, only events passing filter are traced\n"
5611        "  events/<system>/<event>/\t- Directory containing control files for\n"
5612        "\t\t\t  <event>:\n"
5613        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5614        "      filter\t\t- If set, only events passing filter are traced\n"
5615        "      trigger\t\t- If set, a command to perform when event is hit\n"
5616        "\t    Format: <trigger>[:count][if <filter>]\n"
5617        "\t   trigger: traceon, traceoff\n"
5618        "\t            enable_event:<system>:<event>\n"
5619        "\t            disable_event:<system>:<event>\n"
5620#ifdef CONFIG_HIST_TRIGGERS
5621        "\t            enable_hist:<system>:<event>\n"
5622        "\t            disable_hist:<system>:<event>\n"
5623#endif
5624#ifdef CONFIG_STACKTRACE
5625        "\t\t    stacktrace\n"
5626#endif
5627#ifdef CONFIG_TRACER_SNAPSHOT
5628        "\t\t    snapshot\n"
5629#endif
5630#ifdef CONFIG_HIST_TRIGGERS
5631        "\t\t    hist (see below)\n"
5632#endif
5633        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5634        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5635        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5636        "\t                  events/block/block_unplug/trigger\n"
5637        "\t   The first disables tracing every time block_unplug is hit.\n"
5638        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5639        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5640        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5641        "\t   Like function triggers, the counter is only decremented if it\n"
5642        "\t    enabled or disabled tracing.\n"
5643        "\t   To remove a trigger without a count:\n"
5644        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5645        "\t   To remove a trigger with a count:\n"
5646        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5647        "\t   Filters can be ignored when removing a trigger.\n"
5648#ifdef CONFIG_HIST_TRIGGERS
5649        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5650        "\t    Format: hist:keys=<field1[,field2,...]>\n"
5651        "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5652        "\t            [:values=<field1[,field2,...]>]\n"
5653        "\t            [:sort=<field1[,field2,...]>]\n"
5654        "\t            [:size=#entries]\n"
5655        "\t            [:pause][:continue][:clear]\n"
5656        "\t            [:name=histname1]\n"
5657        "\t            [:<handler>.<action>]\n"
5658        "\t            [if <filter>]\n\n"
5659        "\t    Note, special fields can be used as well:\n"
5660        "\t            common_timestamp - to record current timestamp\n"
5661        "\t            common_cpu - to record the CPU the event happened on\n"
5662        "\n"
5663        "\t    A hist trigger variable can be:\n"
5664        "\t        - a reference to a field e.g. x=current_timestamp,\n"
5665        "\t        - a reference to another variable e.g. y=$x,\n"
5666        "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5667        "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5668        "\n"
5669        "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5670        "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5671        "\t    variable reference, field or numeric literal.\n"
5672        "\n"
5673        "\t    When a matching event is hit, an entry is added to a hash\n"
5674        "\t    table using the key(s) and value(s) named, and the value of a\n"
5675        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5676        "\t    correspond to fields in the event's format description.  Keys\n"
5677        "\t    can be any field, or the special string 'stacktrace'.\n"
5678        "\t    Compound keys consisting of up to two fields can be specified\n"
5679        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5680        "\t    fields.  Sort keys consisting of up to two fields can be\n"
5681        "\t    specified using the 'sort' keyword.  The sort direction can\n"
5682        "\t    be modified by appending '.descending' or '.ascending' to a\n"
5683        "\t    sort field.  The 'size' parameter can be used to specify more\n"
5684        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5685        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5686        "\t    its histogram data will be shared with other triggers of the\n"
5687        "\t    same name, and trigger hits will update this common data.\n\n"
5688        "\t    Reading the 'hist' file for the event will dump the hash\n"
5689        "\t    table in its entirety to stdout.  If there are multiple hist\n"
5690        "\t    triggers attached to an event, there will be a table for each\n"
5691        "\t    trigger in the output.  The table displayed for a named\n"
5692        "\t    trigger will be the same as any other instance having the\n"
5693        "\t    same name.  The default format used to display a given field\n"
5694        "\t    can be modified by appending any of the following modifiers\n"
5695        "\t    to the field name, as applicable:\n\n"
5696        "\t            .hex        display a number as a hex value\n"
5697        "\t            .sym        display an address as a symbol\n"
5698        "\t            .sym-offset display an address as a symbol and offset\n"
5699        "\t            .execname   display a common_pid as a program name\n"
5700        "\t            .syscall    display a syscall id as a syscall name\n"
5701        "\t            .log2       display log2 value rather than raw number\n"
5702        "\t            .buckets=size  display values in groups of size rather than raw number\n"
5703        "\t            .usecs      display a common_timestamp in microseconds\n\n"
5704        "\t    The 'pause' parameter can be used to pause an existing hist\n"
5705        "\t    trigger or to start a hist trigger but not log any events\n"
5706        "\t    until told to do so.  'continue' can be used to start or\n"
5707        "\t    restart a paused hist trigger.\n\n"
5708        "\t    The 'clear' parameter will clear the contents of a running\n"
5709        "\t    hist trigger and leave its current paused/active state\n"
5710        "\t    unchanged.\n\n"
5711        "\t    The enable_hist and disable_hist triggers can be used to\n"
5712        "\t    have one event conditionally start and stop another event's\n"
5713        "\t    already-attached hist trigger.  The syntax is analogous to\n"
5714        "\t    the enable_event and disable_event triggers.\n\n"
5715        "\t    Hist trigger handlers and actions are executed whenever a\n"
5716        "\t    a histogram entry is added or updated.  They take the form:\n\n"
5717        "\t        <handler>.<action>\n\n"
5718        "\t    The available handlers are:\n\n"
5719        "\t        onmatch(matching.event)  - invoke on addition or update\n"
5720        "\t        onmax(var)               - invoke if var exceeds current max\n"
5721        "\t        onchange(var)            - invoke action if var changes\n\n"
5722        "\t    The available actions are:\n\n"
5723        "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5724        "\t        save(field,...)                      - save current event fields\n"
5725#ifdef CONFIG_TRACER_SNAPSHOT
5726        "\t        snapshot()                           - snapshot the trace buffer\n\n"
5727#endif
5728#ifdef CONFIG_SYNTH_EVENTS
5729        "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5730        "\t  Write into this file to define/undefine new synthetic events.\n"
5731        "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5732#endif
5733#endif
5734;
5735
5736static ssize_t
5737tracing_readme_read(struct file *filp, char __user *ubuf,
5738                       size_t cnt, loff_t *ppos)
5739{
5740        return simple_read_from_buffer(ubuf, cnt, ppos,
5741                                        readme_msg, strlen(readme_msg));
5742}
5743
5744static const struct file_operations tracing_readme_fops = {
5745        .open           = tracing_open_generic,
5746        .read           = tracing_readme_read,
5747        .llseek         = generic_file_llseek,
5748};
5749
5750static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5751{
5752        int pid = ++(*pos);
5753
5754        return trace_find_tgid_ptr(pid);
5755}
5756
5757static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5758{
5759        int pid = *pos;
5760
5761        return trace_find_tgid_ptr(pid);
5762}
5763
5764static void saved_tgids_stop(struct seq_file *m, void *v)
5765{
5766}
5767
5768static int saved_tgids_show(struct seq_file *m, void *v)
5769{
5770        int *entry = (int *)v;
5771        int pid = entry - tgid_map;
5772        int tgid = *entry;
5773
5774        if (tgid == 0)
5775                return SEQ_SKIP;
5776
5777        seq_printf(m, "%d %d\n", pid, tgid);
5778        return 0;
5779}
5780
5781static const struct seq_operations tracing_saved_tgids_seq_ops = {
5782        .start          = saved_tgids_start,
5783        .stop           = saved_tgids_stop,
5784        .next           = saved_tgids_next,
5785        .show           = saved_tgids_show,
5786};
5787
5788static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5789{
5790        int ret;
5791
5792        ret = tracing_check_open_get_tr(NULL);
5793        if (ret)
5794                return ret;
5795
5796        return seq_open(filp, &tracing_saved_tgids_seq_ops);
5797}
5798
5799
5800static const struct file_operations tracing_saved_tgids_fops = {
5801        .open           = tracing_saved_tgids_open,
5802        .read           = seq_read,
5803        .llseek         = seq_lseek,
5804        .release        = seq_release,
5805};
5806
5807static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5808{
5809        unsigned int *ptr = v;
5810
5811        if (*pos || m->count)
5812                ptr++;
5813
5814        (*pos)++;
5815
5816        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5817             ptr++) {
5818                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5819                        continue;
5820
5821                return ptr;
5822        }
5823
5824        return NULL;
5825}
5826
5827static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5828{
5829        void *v;
5830        loff_t l = 0;
5831
5832        preempt_disable();
5833        arch_spin_lock(&trace_cmdline_lock);
5834
5835        v = &savedcmd->map_cmdline_to_pid[0];
5836        while (l <= *pos) {
5837                v = saved_cmdlines_next(m, v, &l);
5838                if (!v)
5839                        return NULL;
5840        }
5841
5842        return v;
5843}
5844
5845static void saved_cmdlines_stop(struct seq_file *m, void *v)
5846{
5847        arch_spin_unlock(&trace_cmdline_lock);
5848        preempt_enable();
5849}
5850
5851static int saved_cmdlines_show(struct seq_file *m, void *v)
5852{
5853        char buf[TASK_COMM_LEN];
5854        unsigned int *pid = v;
5855
5856        __trace_find_cmdline(*pid, buf);
5857        seq_printf(m, "%d %s\n", *pid, buf);
5858        return 0;
5859}
5860
5861static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5862        .start          = saved_cmdlines_start,
5863        .next           = saved_cmdlines_next,
5864        .stop           = saved_cmdlines_stop,
5865        .show           = saved_cmdlines_show,
5866};
5867
5868static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5869{
5870        int ret;
5871
5872        ret = tracing_check_open_get_tr(NULL);
5873        if (ret)
5874                return ret;
5875
5876        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5877}
5878
5879static const struct file_operations tracing_saved_cmdlines_fops = {
5880        .open           = tracing_saved_cmdlines_open,
5881        .read           = seq_read,
5882        .llseek         = seq_lseek,
5883        .release        = seq_release,
5884};
5885
5886static ssize_t
5887tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5888                                 size_t cnt, loff_t *ppos)
5889{
5890        char buf[64];
5891        int r;
5892
5893        arch_spin_lock(&trace_cmdline_lock);
5894        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5895        arch_spin_unlock(&trace_cmdline_lock);
5896
5897        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898}
5899
5900static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5901{
5902        kfree(s->saved_cmdlines);
5903        kfree(s->map_cmdline_to_pid);
5904        kfree(s);
5905}
5906
5907static int tracing_resize_saved_cmdlines(unsigned int val)
5908{
5909        struct saved_cmdlines_buffer *s, *savedcmd_temp;
5910
5911        s = kmalloc(sizeof(*s), GFP_KERNEL);
5912        if (!s)
5913                return -ENOMEM;
5914
5915        if (allocate_cmdlines_buffer(val, s) < 0) {
5916                kfree(s);
5917                return -ENOMEM;
5918        }
5919
5920        arch_spin_lock(&trace_cmdline_lock);
5921        savedcmd_temp = savedcmd;
5922        savedcmd = s;
5923        arch_spin_unlock(&trace_cmdline_lock);
5924        free_saved_cmdlines_buffer(savedcmd_temp);
5925
5926        return 0;
5927}
5928
5929static ssize_t
5930tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5931                                  size_t cnt, loff_t *ppos)
5932{
5933        unsigned long val;
5934        int ret;
5935
5936        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5937        if (ret)
5938                return ret;
5939
5940        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5941        if (!val || val > PID_MAX_DEFAULT)
5942                return -EINVAL;
5943
5944        ret = tracing_resize_saved_cmdlines((unsigned int)val);
5945        if (ret < 0)
5946                return ret;
5947
5948        *ppos += cnt;
5949
5950        return cnt;
5951}
5952
5953static const struct file_operations tracing_saved_cmdlines_size_fops = {
5954        .open           = tracing_open_generic,
5955        .read           = tracing_saved_cmdlines_size_read,
5956        .write          = tracing_saved_cmdlines_size_write,
5957};
5958
5959#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5960static union trace_eval_map_item *
5961update_eval_map(union trace_eval_map_item *ptr)
5962{
5963        if (!ptr->map.eval_string) {
5964                if (ptr->tail.next) {
5965                        ptr = ptr->tail.next;
5966                        /* Set ptr to the next real item (skip head) */
5967                        ptr++;
5968                } else
5969                        return NULL;
5970        }
5971        return ptr;
5972}
5973
5974static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5975{
5976        union trace_eval_map_item *ptr = v;
5977
5978        /*
5979         * Paranoid! If ptr points to end, we don't want to increment past it.
5980         * This really should never happen.
5981         */
5982        (*pos)++;
5983        ptr = update_eval_map(ptr);
5984        if (WARN_ON_ONCE(!ptr))
5985                return NULL;
5986
5987        ptr++;
5988        ptr = update_eval_map(ptr);
5989
5990        return ptr;
5991}
5992
5993static void *eval_map_start(struct seq_file *m, loff_t *pos)
5994{
5995        union trace_eval_map_item *v;
5996        loff_t l = 0;
5997
5998        mutex_lock(&trace_eval_mutex);
5999
6000        v = trace_eval_maps;
6001        if (v)
6002                v++;
6003
6004        while (v && l < *pos) {
6005                v = eval_map_next(m, v, &l);
6006        }
6007
6008        return v;
6009}
6010
6011static void eval_map_stop(struct seq_file *m, void *v)
6012{
6013        mutex_unlock(&trace_eval_mutex);
6014}
6015
6016static int eval_map_show(struct seq_file *m, void *v)
6017{
6018        union trace_eval_map_item *ptr = v;
6019
6020        seq_printf(m, "%s %ld (%s)\n",
6021                   ptr->map.eval_string, ptr->map.eval_value,
6022                   ptr->map.system);
6023
6024        return 0;
6025}
6026
6027static const struct seq_operations tracing_eval_map_seq_ops = {
6028        .start          = eval_map_start,
6029        .next           = eval_map_next,
6030        .stop           = eval_map_stop,
6031        .show           = eval_map_show,
6032};
6033
6034static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6035{
6036        int ret;
6037
6038        ret = tracing_check_open_get_tr(NULL);
6039        if (ret)
6040                return ret;
6041
6042        return seq_open(filp, &tracing_eval_map_seq_ops);
6043}
6044
6045static const struct file_operations tracing_eval_map_fops = {
6046        .open           = tracing_eval_map_open,
6047        .read           = seq_read,
6048        .llseek         = seq_lseek,
6049        .release        = seq_release,
6050};
6051
6052static inline union trace_eval_map_item *
6053trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6054{
6055        /* Return tail of array given the head */
6056        return ptr + ptr->head.length + 1;
6057}
6058
6059static void
6060trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6061                           int len)
6062{
6063        struct trace_eval_map **stop;
6064        struct trace_eval_map **map;
6065        union trace_eval_map_item *map_array;
6066        union trace_eval_map_item *ptr;
6067
6068        stop = start + len;
6069
6070        /*
6071         * The trace_eval_maps contains the map plus a head and tail item,
6072         * where the head holds the module and length of array, and the
6073         * tail holds a pointer to the next list.
6074         */
6075        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6076        if (!map_array) {
6077                pr_warn("Unable to allocate trace eval mapping\n");
6078                return;
6079        }
6080
6081        mutex_lock(&trace_eval_mutex);
6082
6083        if (!trace_eval_maps)
6084                trace_eval_maps = map_array;
6085        else {
6086                ptr = trace_eval_maps;
6087                for (;;) {
6088                        ptr = trace_eval_jmp_to_tail(ptr);
6089                        if (!ptr->tail.next)
6090                                break;
6091                        ptr = ptr->tail.next;
6092
6093                }
6094                ptr->tail.next = map_array;
6095        }
6096        map_array->head.mod = mod;
6097        map_array->head.length = len;
6098        map_array++;
6099
6100        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6101                map_array->map = **map;
6102                map_array++;
6103        }
6104        memset(map_array, 0, sizeof(*map_array));
6105
6106        mutex_unlock(&trace_eval_mutex);
6107}
6108
6109static void trace_create_eval_file(struct dentry *d_tracer)
6110{
6111        trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6112                          NULL, &tracing_eval_map_fops);
6113}
6114
6115#else /* CONFIG_TRACE_EVAL_MAP_FILE */
6116static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6117static inline void trace_insert_eval_map_file(struct module *mod,
6118                              struct trace_eval_map **start, int len) { }
6119#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6120
6121static void trace_insert_eval_map(struct module *mod,
6122                                  struct trace_eval_map **start, int len)
6123{
6124        struct trace_eval_map **map;
6125
6126        if (len <= 0)
6127                return;
6128
6129        map = start;
6130
6131        trace_event_eval_update(map, len);
6132
6133        trace_insert_eval_map_file(mod, start, len);
6134}
6135
6136static ssize_t
6137tracing_set_trace_read(struct file *filp, char __user *ubuf,
6138                       size_t cnt, loff_t *ppos)
6139{
6140        struct trace_array *tr = filp->private_data;
6141        char buf[MAX_TRACER_SIZE+2];
6142        int r;
6143
6144        mutex_lock(&trace_types_lock);
6145        r = sprintf(buf, "%s\n", tr->current_trace->name);
6146        mutex_unlock(&trace_types_lock);
6147
6148        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6149}
6150
6151int tracer_init(struct tracer *t, struct trace_array *tr)
6152{
6153        tracing_reset_online_cpus(&tr->array_buffer);
6154        return t->init(tr);
6155}
6156
6157static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6158{
6159        int cpu;
6160
6161        for_each_tracing_cpu(cpu)
6162                per_cpu_ptr(buf->data, cpu)->entries = val;
6163}
6164
6165#ifdef CONFIG_TRACER_MAX_TRACE
6166/* resize @tr's buffer to the size of @size_tr's entries */
6167static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6168                                        struct array_buffer *size_buf, int cpu_id)
6169{
6170        int cpu, ret = 0;
6171
6172        if (cpu_id == RING_BUFFER_ALL_CPUS) {
6173                for_each_tracing_cpu(cpu) {
6174                        ret = ring_buffer_resize(trace_buf->buffer,
6175                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6176                        if (ret < 0)
6177                                break;
6178                        per_cpu_ptr(trace_buf->data, cpu)->entries =
6179                                per_cpu_ptr(size_buf->data, cpu)->entries;
6180                }
6181        } else {
6182                ret = ring_buffer_resize(trace_buf->buffer,
6183                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6184                if (ret == 0)
6185                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6186                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
6187        }
6188
6189        return ret;
6190}
6191#endif /* CONFIG_TRACER_MAX_TRACE */
6192
6193static int __tracing_resize_ring_buffer(struct trace_array *tr,
6194                                        unsigned long size, int cpu)
6195{
6196        int ret;
6197
6198        /*
6199         * If kernel or user changes the size of the ring buffer
6200         * we use the size that was given, and we can forget about
6201         * expanding it later.
6202         */
6203        ring_buffer_expanded = true;
6204
6205        /* May be called before buffers are initialized */
6206        if (!tr->array_buffer.buffer)
6207                return 0;
6208
6209        ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6210        if (ret < 0)
6211                return ret;
6212
6213#ifdef CONFIG_TRACER_MAX_TRACE
6214        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6215            !tr->current_trace->use_max_tr)
6216                goto out;
6217
6218        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6219        if (ret < 0) {
6220                int r = resize_buffer_duplicate_size(&tr->array_buffer,
6221                                                     &tr->array_buffer, cpu);
6222                if (r < 0) {
6223                        /*
6224                         * AARGH! We are left with different
6225                         * size max buffer!!!!
6226                         * The max buffer is our "snapshot" buffer.
6227                         * When a tracer needs a snapshot (one of the
6228                         * latency tracers), it swaps the max buffer
6229                         * with the saved snap shot. We succeeded to
6230                         * update the size of the main buffer, but failed to
6231                         * update the size of the max buffer. But when we tried
6232                         * to reset the main buffer to the original size, we
6233                         * failed there too. This is very unlikely to
6234                         * happen, but if it does, warn and kill all
6235                         * tracing.
6236                         */
6237                        WARN_ON(1);
6238                        tracing_disabled = 1;
6239                }
6240                return ret;
6241        }
6242
6243        if (cpu == RING_BUFFER_ALL_CPUS)
6244                set_buffer_entries(&tr->max_buffer, size);
6245        else
6246                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6247
6248 out:
6249#endif /* CONFIG_TRACER_MAX_TRACE */
6250
6251        if (cpu == RING_BUFFER_ALL_CPUS)
6252                set_buffer_entries(&tr->array_buffer, size);
6253        else
6254                per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6255
6256        return ret;
6257}
6258
6259ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6260                                  unsigned long size, int cpu_id)
6261{
6262        int ret;
6263
6264        mutex_lock(&trace_types_lock);
6265
6266        if (cpu_id != RING_BUFFER_ALL_CPUS) {
6267                /* make sure, this cpu is enabled in the mask */
6268                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6269                        ret = -EINVAL;
6270                        goto out;
6271                }
6272        }
6273
6274        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6275        if (ret < 0)
6276                ret = -ENOMEM;
6277
6278out:
6279        mutex_unlock(&trace_types_lock);
6280
6281        return ret;
6282}
6283
6284
6285/**
6286 * tracing_update_buffers - used by tracing facility to expand ring buffers
6287 *
6288 * To save on memory when the tracing is never used on a system with it
6289 * configured in. The ring buffers are set to a minimum size. But once
6290 * a user starts to use the tracing facility, then they need to grow
6291 * to their default size.
6292 *
6293 * This function is to be called when a tracer is about to be used.
6294 */
6295int tracing_update_buffers(void)
6296{
6297        int ret = 0;
6298
6299        mutex_lock(&trace_types_lock);
6300        if (!ring_buffer_expanded)
6301                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6302                                                RING_BUFFER_ALL_CPUS);
6303        mutex_unlock(&trace_types_lock);
6304
6305        return ret;
6306}
6307
6308struct trace_option_dentry;
6309
6310static void
6311create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6312
6313/*
6314 * Used to clear out the tracer before deletion of an instance.
6315 * Must have trace_types_lock held.
6316 */
6317static void tracing_set_nop(struct trace_array *tr)
6318{
6319        if (tr->current_trace == &nop_trace)
6320                return;
6321        
6322        tr->current_trace->enabled--;
6323
6324        if (tr->current_trace->reset)
6325                tr->current_trace->reset(tr);
6326
6327        tr->current_trace = &nop_trace;
6328}
6329
6330static bool tracer_options_updated;
6331
6332static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6333{
6334        /* Only enable if the directory has been created already. */
6335        if (!tr->dir)
6336                return;
6337
6338        /* Only create trace option files after update_tracer_options finish */
6339        if (!tracer_options_updated)
6340                return;
6341
6342        create_trace_option_files(tr, t);
6343}
6344
6345int tracing_set_tracer(struct trace_array *tr, const char *buf)
6346{
6347        struct tracer *t;
6348#ifdef CONFIG_TRACER_MAX_TRACE
6349        bool had_max_tr;
6350#endif
6351        int ret = 0;
6352
6353        mutex_lock(&trace_types_lock);
6354
6355        if (!ring_buffer_expanded) {
6356                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6357                                                RING_BUFFER_ALL_CPUS);
6358                if (ret < 0)
6359                        goto out;
6360                ret = 0;
6361        }
6362
6363        for (t = trace_types; t; t = t->next) {
6364                if (strcmp(t->name, buf) == 0)
6365                        break;
6366        }
6367        if (!t) {
6368                ret = -EINVAL;
6369                goto out;
6370        }
6371        if (t == tr->current_trace)
6372                goto out;
6373
6374#ifdef CONFIG_TRACER_SNAPSHOT
6375        if (t->use_max_tr) {
6376                arch_spin_lock(&tr->max_lock);
6377                if (tr->cond_snapshot)
6378                        ret = -EBUSY;
6379                arch_spin_unlock(&tr->max_lock);
6380                if (ret)
6381                        goto out;
6382        }
6383#endif
6384        /* Some tracers won't work on kernel command line */
6385        if (system_state < SYSTEM_RUNNING && t->noboot) {
6386                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6387                        t->name);
6388                goto out;
6389        }
6390
6391        /* Some tracers are only allowed for the top level buffer */
6392        if (!trace_ok_for_array(t, tr)) {
6393                ret = -EINVAL;
6394                goto out;
6395        }
6396
6397        /* If trace pipe files are being read, we can't change the tracer */
6398        if (tr->trace_ref) {
6399                ret = -EBUSY;
6400                goto out;
6401        }
6402
6403        trace_branch_disable();
6404
6405        tr->current_trace->enabled--;
6406
6407        if (tr->current_trace->reset)
6408                tr->current_trace->reset(tr);
6409
6410        /* Current trace needs to be nop_trace before synchronize_rcu */
6411        tr->current_trace = &nop_trace;
6412
6413#ifdef CONFIG_TRACER_MAX_TRACE
6414        had_max_tr = tr->allocated_snapshot;
6415
6416        if (had_max_tr && !t->use_max_tr) {
6417                /*
6418                 * We need to make sure that the update_max_tr sees that
6419                 * current_trace changed to nop_trace to keep it from
6420                 * swapping the buffers after we resize it.
6421                 * The update_max_tr is called from interrupts disabled
6422                 * so a synchronized_sched() is sufficient.
6423                 */
6424                synchronize_rcu();
6425                free_snapshot(tr);
6426        }
6427
6428        if (t->use_max_tr && !had_max_tr) {
6429                ret = tracing_alloc_snapshot_instance(tr);
6430                if (ret < 0)
6431                        goto out;
6432        }
6433#endif
6434
6435        if (t->init) {
6436                ret = tracer_init(t, tr);
6437                if (ret)
6438                        goto out;
6439        }
6440
6441        tr->current_trace = t;
6442        tr->current_trace->enabled++;
6443        trace_branch_enable(tr);
6444 out:
6445        mutex_unlock(&trace_types_lock);
6446
6447        return ret;
6448}
6449
6450static ssize_t
6451tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6452                        size_t cnt, loff_t *ppos)
6453{
6454        struct trace_array *tr = filp->private_data;
6455        char buf[MAX_TRACER_SIZE+1];
6456        char *name;
6457        size_t ret;
6458        int err;
6459
6460        ret = cnt;
6461
6462        if (cnt > MAX_TRACER_SIZE)
6463                cnt = MAX_TRACER_SIZE;
6464
6465        if (copy_from_user(buf, ubuf, cnt))
6466                return -EFAULT;
6467
6468        buf[cnt] = 0;
6469
6470        name = strim(buf);
6471
6472        err = tracing_set_tracer(tr, name);
6473        if (err)
6474                return err;
6475
6476        *ppos += ret;
6477
6478        return ret;
6479}
6480
6481static ssize_t
6482tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6483                   size_t cnt, loff_t *ppos)
6484{
6485        char buf[64];
6486        int r;
6487
6488        r = snprintf(buf, sizeof(buf), "%ld\n",
6489                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6490        if (r > sizeof(buf))
6491                r = sizeof(buf);
6492        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6493}
6494
6495static ssize_t
6496tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6497                    size_t cnt, loff_t *ppos)
6498{
6499        unsigned long val;
6500        int ret;
6501
6502        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503        if (ret)
6504                return ret;
6505
6506        *ptr = val * 1000;
6507
6508        return cnt;
6509}
6510
6511static ssize_t
6512tracing_thresh_read(struct file *filp, char __user *ubuf,
6513                    size_t cnt, loff_t *ppos)
6514{
6515        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6516}
6517
6518static ssize_t
6519tracing_thresh_write(struct file *filp, const char __user *ubuf,
6520                     size_t cnt, loff_t *ppos)
6521{
6522        struct trace_array *tr = filp->private_data;
6523        int ret;
6524
6525        mutex_lock(&trace_types_lock);
6526        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6527        if (ret < 0)
6528                goto out;
6529
6530        if (tr->current_trace->update_thresh) {
6531                ret = tr->current_trace->update_thresh(tr);
6532                if (ret < 0)
6533                        goto out;
6534        }
6535
6536        ret = cnt;
6537out:
6538        mutex_unlock(&trace_types_lock);
6539
6540        return ret;
6541}
6542
6543#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6544
6545static ssize_t
6546tracing_max_lat_read(struct file *filp, char __user *ubuf,
6547                     size_t cnt, loff_t *ppos)
6548{
6549        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6550}
6551
6552static ssize_t
6553tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6554                      size_t cnt, loff_t *ppos)
6555{
6556        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6557}
6558
6559#endif
6560
6561static int tracing_open_pipe(struct inode *inode, struct file *filp)
6562{
6563        struct trace_array *tr = inode->i_private;
6564        struct trace_iterator *iter;
6565        int ret;
6566
6567        ret = tracing_check_open_get_tr(tr);
6568        if (ret)
6569                return ret;
6570
6571        mutex_lock(&trace_types_lock);
6572
6573        /* create a buffer to store the information to pass to userspace */
6574        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6575        if (!iter) {
6576                ret = -ENOMEM;
6577                __trace_array_put(tr);
6578                goto out;
6579        }
6580
6581        trace_seq_init(&iter->seq);
6582        iter->trace = tr->current_trace;
6583
6584        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6585                ret = -ENOMEM;
6586                goto fail;
6587        }
6588
6589        /* trace pipe does not show start of buffer */
6590        cpumask_setall(iter->started);
6591
6592        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6593                iter->iter_flags |= TRACE_FILE_LAT_FMT;
6594
6595        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6596        if (trace_clocks[tr->clock_id].in_ns)
6597                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6598
6599        iter->tr = tr;
6600        iter->array_buffer = &tr->array_buffer;
6601        iter->cpu_file = tracing_get_cpu(inode);
6602        mutex_init(&iter->mutex);
6603        filp->private_data = iter;
6604
6605        if (iter->trace->pipe_open)
6606                iter->trace->pipe_open(iter);
6607
6608        nonseekable_open(inode, filp);
6609
6610        tr->trace_ref++;
6611out:
6612        mutex_unlock(&trace_types_lock);
6613        return ret;
6614
6615fail:
6616        kfree(iter);
6617        __trace_array_put(tr);
6618        mutex_unlock(&trace_types_lock);
6619        return ret;
6620}
6621
6622static int tracing_release_pipe(struct inode *inode, struct file *file)
6623{
6624        struct trace_iterator *iter = file->private_data;
6625        struct trace_array *tr = inode->i_private;
6626
6627        mutex_lock(&trace_types_lock);
6628
6629        tr->trace_ref--;
6630
6631        if (iter->trace->pipe_close)
6632                iter->trace->pipe_close(iter);
6633
6634        mutex_unlock(&trace_types_lock);
6635
6636        free_cpumask_var(iter->started);
6637        mutex_destroy(&iter->mutex);
6638        kfree(iter);
6639
6640        trace_array_put(tr);
6641
6642        return 0;
6643}
6644
6645static __poll_t
6646trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6647{
6648        struct trace_array *tr = iter->tr;
6649
6650        /* Iterators are static, they should be filled or empty */
6651        if (trace_buffer_iter(iter, iter->cpu_file))
6652                return EPOLLIN | EPOLLRDNORM;
6653
6654        if (tr->trace_flags & TRACE_ITER_BLOCK)
6655                /*
6656                 * Always select as readable when in blocking mode
6657                 */
6658                return EPOLLIN | EPOLLRDNORM;
6659        else
6660                return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6661                                             filp, poll_table);
6662}
6663
6664static __poll_t
6665tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6666{
6667        struct trace_iterator *iter = filp->private_data;
6668
6669        return trace_poll(iter, filp, poll_table);
6670}
6671
6672/* Must be called with iter->mutex held. */
6673static int tracing_wait_pipe(struct file *filp)
6674{
6675        struct trace_iterator *iter = filp->private_data;
6676        int ret;
6677
6678        while (trace_empty(iter)) {
6679
6680                if ((filp->f_flags & O_NONBLOCK)) {
6681                        return -EAGAIN;
6682                }
6683
6684                /*
6685                 * We block until we read something and tracing is disabled.
6686                 * We still block if tracing is disabled, but we have never
6687                 * read anything. This allows a user to cat this file, and
6688                 * then enable tracing. But after we have read something,
6689                 * we give an EOF when tracing is again disabled.
6690                 *
6691                 * iter->pos will be 0 if we haven't read anything.
6692                 */
6693                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6694                        break;
6695
6696                mutex_unlock(&iter->mutex);
6697
6698                ret = wait_on_pipe(iter, 0);
6699
6700                mutex_lock(&iter->mutex);
6701
6702                if (ret)
6703                        return ret;
6704        }
6705
6706        return 1;
6707}
6708
6709/*
6710 * Consumer reader.
6711 */
6712static ssize_t
6713tracing_read_pipe(struct file *filp, char __user *ubuf,
6714                  size_t cnt, loff_t *ppos)
6715{
6716        struct trace_iterator *iter = filp->private_data;
6717        ssize_t sret;
6718
6719        /*
6720         * Avoid more than one consumer on a single file descriptor
6721         * This is just a matter of traces coherency, the ring buffer itself
6722         * is protected.
6723         */
6724        mutex_lock(&iter->mutex);
6725
6726        /* return any leftover data */
6727        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728        if (sret != -EBUSY)
6729                goto out;
6730
6731        trace_seq_init(&iter->seq);
6732
6733        if (iter->trace->read) {
6734                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6735                if (sret)
6736                        goto out;
6737        }
6738
6739waitagain:
6740        sret = tracing_wait_pipe(filp);
6741        if (sret <= 0)
6742                goto out;
6743
6744        /* stop when tracing is finished */
6745        if (trace_empty(iter)) {
6746                sret = 0;
6747                goto out;
6748        }
6749
6750        if (cnt >= PAGE_SIZE)
6751                cnt = PAGE_SIZE - 1;
6752
6753        /* reset all but tr, trace, and overruns */
6754        trace_iterator_reset(iter);
6755        cpumask_clear(iter->started);
6756        trace_seq_init(&iter->seq);
6757
6758        trace_event_read_lock();
6759        trace_access_lock(iter->cpu_file);
6760        while (trace_find_next_entry_inc(iter) != NULL) {
6761                enum print_line_t ret;
6762                int save_len = iter->seq.seq.len;
6763
6764                ret = print_trace_line(iter);
6765                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6766                        /* don't print partial lines */
6767                        iter->seq.seq.len = save_len;
6768                        break;
6769                }
6770                if (ret != TRACE_TYPE_NO_CONSUME)
6771                        trace_consume(iter);
6772
6773                if (trace_seq_used(&iter->seq) >= cnt)
6774                        break;
6775
6776                /*
6777                 * Setting the full flag means we reached the trace_seq buffer
6778                 * size and we should leave by partial output condition above.
6779                 * One of the trace_seq_* functions is not used properly.
6780                 */
6781                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6782                          iter->ent->type);
6783        }
6784        trace_access_unlock(iter->cpu_file);
6785        trace_event_read_unlock();
6786
6787        /* Now copy what we have to the user */
6788        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6789        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6790                trace_seq_init(&iter->seq);
6791
6792        /*
6793         * If there was nothing to send to user, in spite of consuming trace
6794         * entries, go back to wait for more entries.
6795         */
6796        if (sret == -EBUSY)
6797                goto waitagain;
6798
6799out:
6800        mutex_unlock(&iter->mutex);
6801
6802        return sret;
6803}
6804
6805static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6806                                     unsigned int idx)
6807{
6808        __free_page(spd->pages[idx]);
6809}
6810
6811static size_t
6812tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6813{
6814        size_t count;
6815        int save_len;
6816        int ret;
6817
6818        /* Seq buffer is page-sized, exactly what we need. */
6819        for (;;) {
6820                save_len = iter->seq.seq.len;
6821                ret = print_trace_line(iter);
6822
6823                if (trace_seq_has_overflowed(&iter->seq)) {
6824                        iter->seq.seq.len = save_len;
6825                        break;
6826                }
6827
6828                /*
6829                 * This should not be hit, because it should only
6830                 * be set if the iter->seq overflowed. But check it
6831                 * anyway to be safe.
6832                 */
6833                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6834                        iter->seq.seq.len = save_len;
6835                        break;
6836                }
6837
6838                count = trace_seq_used(&iter->seq) - save_len;
6839                if (rem < count) {
6840                        rem = 0;
6841                        iter->seq.seq.len = save_len;
6842                        break;
6843                }
6844
6845                if (ret != TRACE_TYPE_NO_CONSUME)
6846                        trace_consume(iter);
6847                rem -= count;
6848                if (!trace_find_next_entry_inc(iter))   {
6849                        rem = 0;
6850                        iter->ent = NULL;
6851                        break;
6852                }
6853        }
6854
6855        return rem;
6856}
6857
6858static ssize_t tracing_splice_read_pipe(struct file *filp,
6859                                        loff_t *ppos,
6860                                        struct pipe_inode_info *pipe,
6861                                        size_t len,
6862                                        unsigned int flags)
6863{
6864        struct page *pages_def[PIPE_DEF_BUFFERS];
6865        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6866        struct trace_iterator *iter = filp->private_data;
6867        struct splice_pipe_desc spd = {
6868                .pages          = pages_def,
6869                .partial        = partial_def,
6870                .nr_pages       = 0, /* This gets updated below. */
6871                .nr_pages_max   = PIPE_DEF_BUFFERS,
6872                .ops            = &default_pipe_buf_ops,
6873                .spd_release    = tracing_spd_release_pipe,
6874        };
6875        ssize_t ret;
6876        size_t rem;
6877        unsigned int i;
6878
6879        if (splice_grow_spd(pipe, &spd))
6880                return -ENOMEM;
6881
6882        mutex_lock(&iter->mutex);
6883
6884        if (iter->trace->splice_read) {
6885                ret = iter->trace->splice_read(iter, filp,
6886                                               ppos, pipe, len, flags);
6887                if (ret)
6888                        goto out_err;
6889        }
6890
6891        ret = tracing_wait_pipe(filp);
6892        if (ret <= 0)
6893                goto out_err;
6894
6895        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6896                ret = -EFAULT;
6897                goto out_err;
6898        }
6899
6900        trace_event_read_lock();
6901        trace_access_lock(iter->cpu_file);
6902
6903        /* Fill as many pages as possible. */
6904        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6905                spd.pages[i] = alloc_page(GFP_KERNEL);
6906                if (!spd.pages[i])
6907                        break;
6908
6909                rem = tracing_fill_pipe_page(rem, iter);
6910
6911                /* Copy the data into the page, so we can start over. */
6912                ret = trace_seq_to_buffer(&iter->seq,
6913                                          page_address(spd.pages[i]),
6914                                          trace_seq_used(&iter->seq));
6915                if (ret < 0) {
6916                        __free_page(spd.pages[i]);
6917                        break;
6918                }
6919                spd.partial[i].offset = 0;
6920                spd.partial[i].len = trace_seq_used(&iter->seq);
6921
6922                trace_seq_init(&iter->seq);
6923        }
6924
6925        trace_access_unlock(iter->cpu_file);
6926        trace_event_read_unlock();
6927        mutex_unlock(&iter->mutex);
6928
6929        spd.nr_pages = i;
6930
6931        if (i)
6932                ret = splice_to_pipe(pipe, &spd);
6933        else
6934                ret = 0;
6935out:
6936        splice_shrink_spd(&spd);
6937        return ret;
6938
6939out_err:
6940        mutex_unlock(&iter->mutex);
6941        goto out;
6942}
6943
6944static ssize_t
6945tracing_entries_read(struct file *filp, char __user *ubuf,
6946                     size_t cnt, loff_t *ppos)
6947{
6948        struct inode *inode = file_inode(filp);
6949        struct trace_array *tr = inode->i_private;
6950        int cpu = tracing_get_cpu(inode);
6951        char buf[64];
6952        int r = 0;
6953        ssize_t ret;
6954
6955        mutex_lock(&trace_types_lock);
6956
6957        if (cpu == RING_BUFFER_ALL_CPUS) {
6958                int cpu, buf_size_same;
6959                unsigned long size;
6960
6961                size = 0;
6962                buf_size_same = 1;
6963                /* check if all cpu sizes are same */
6964                for_each_tracing_cpu(cpu) {
6965                        /* fill in the size from first enabled cpu */
6966                        if (size == 0)
6967                                size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6968                        if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6969                                buf_size_same = 0;
6970                                break;
6971                        }
6972                }
6973
6974                if (buf_size_same) {
6975                        if (!ring_buffer_expanded)
6976                                r = sprintf(buf, "%lu (expanded: %lu)\n",
6977                                            size >> 10,
6978                                            trace_buf_size >> 10);
6979                        else
6980                                r = sprintf(buf, "%lu\n", size >> 10);
6981                } else
6982                        r = sprintf(buf, "X\n");
6983        } else
6984                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6985
6986        mutex_unlock(&trace_types_lock);
6987
6988        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989        return ret;
6990}
6991
6992static ssize_t
6993tracing_entries_write(struct file *filp, const char __user *ubuf,
6994                      size_t cnt, loff_t *ppos)
6995{
6996        struct inode *inode = file_inode(filp);
6997        struct trace_array *tr = inode->i_private;
6998        unsigned long val;
6999        int ret;
7000
7001        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002        if (ret)
7003                return ret;
7004
7005        /* must have at least 1 entry */
7006        if (!val)
7007                return -EINVAL;
7008
7009        /* value is in KB */
7010        val <<= 10;
7011        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7012        if (ret < 0)
7013                return ret;
7014
7015        *ppos += cnt;
7016
7017        return cnt;
7018}
7019
7020static ssize_t
7021tracing_total_entries_read(struct file *filp, char __user *ubuf,
7022                                size_t cnt, loff_t *ppos)
7023{
7024        struct trace_array *tr = filp->private_data;
7025        char buf[64];
7026        int r, cpu;
7027        unsigned long size = 0, expanded_size = 0;
7028
7029        mutex_lock(&trace_types_lock);
7030        for_each_tracing_cpu(cpu) {
7031                size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7032                if (!ring_buffer_expanded)
7033                        expanded_size += trace_buf_size >> 10;
7034        }
7035        if (ring_buffer_expanded)
7036                r = sprintf(buf, "%lu\n", size);
7037        else
7038                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7039        mutex_unlock(&trace_types_lock);
7040
7041        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7042}
7043
7044static ssize_t
7045tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7046                          size_t cnt, loff_t *ppos)
7047{
7048        /*
7049         * There is no need to read what the user has written, this function
7050         * is just to make sure that there is no error when "echo" is used
7051         */
7052
7053        *ppos += cnt;
7054
7055        return cnt;
7056}
7057
7058static int
7059tracing_free_buffer_release(struct inode *inode, struct file *filp)
7060{
7061        struct trace_array *tr = inode->i_private;
7062
7063        /* disable tracing ? */
7064        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7065                tracer_tracing_off(tr);
7066        /* resize the ring buffer to 0 */
7067        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7068
7069        trace_array_put(tr);
7070
7071        return 0;
7072}
7073
7074static ssize_t
7075tracing_mark_write(struct file *filp, const char __user *ubuf,
7076                                        size_t cnt, loff_t *fpos)
7077{
7078        struct trace_array *tr = filp->private_data;
7079        struct ring_buffer_event *event;
7080        enum event_trigger_type tt = ETT_NONE;
7081        struct trace_buffer *buffer;
7082        struct print_entry *entry;
7083        ssize_t written;
7084        int size;
7085        int len;
7086
7087/* Used in tracing_mark_raw_write() as well */
7088#define FAULTED_STR "<faulted>"
7089#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7090
7091        if (tracing_disabled)
7092                return -EINVAL;
7093
7094        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7095                return -EINVAL;
7096
7097        if (cnt > TRACE_BUF_SIZE)
7098                cnt = TRACE_BUF_SIZE;
7099
7100        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7101
7102        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7103
7104        /* If less than "<faulted>", then make sure we can still add that */
7105        if (cnt < FAULTED_SIZE)
7106                size += FAULTED_SIZE - cnt;
7107
7108        buffer = tr->array_buffer.buffer;
7109        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7110                                            tracing_gen_ctx());
7111        if (unlikely(!event))
7112                /* Ring buffer disabled, return as if not open for write */
7113                return -EBADF;
7114
7115        entry = ring_buffer_event_data(event);
7116        entry->ip = _THIS_IP_;
7117
7118        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7119        if (len) {
7120                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7121                cnt = FAULTED_SIZE;
7122                written = -EFAULT;
7123        } else
7124                written = cnt;
7125
7126        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7127                /* do not add \n before testing triggers, but add \0 */
7128                entry->buf[cnt] = '\0';
7129                tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7130        }
7131
7132        if (entry->buf[cnt - 1] != '\n') {
7133                entry->buf[cnt] = '\n';
7134                entry->buf[cnt + 1] = '\0';
7135        } else
7136                entry->buf[cnt] = '\0';
7137
7138        if (static_branch_unlikely(&trace_marker_exports_enabled))
7139                ftrace_exports(event, TRACE_EXPORT_MARKER);
7140        __buffer_unlock_commit(buffer, event);
7141
7142        if (tt)
7143                event_triggers_post_call(tr->trace_marker_file, tt);
7144
7145        return written;
7146}
7147
7148/* Limit it for now to 3K (including tag) */
7149#define RAW_DATA_MAX_SIZE (1024*3)
7150
7151static ssize_t
7152tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7153                                        size_t cnt, loff_t *fpos)
7154{
7155        struct trace_array *tr = filp->private_data;
7156        struct ring_buffer_event *event;
7157        struct trace_buffer *buffer;
7158        struct raw_data_entry *entry;
7159        ssize_t written;
7160        int size;
7161        int len;
7162
7163#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7164
7165        if (tracing_disabled)
7166                return -EINVAL;
7167
7168        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7169                return -EINVAL;
7170
7171        /* The marker must at least have a tag id */
7172        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7173                return -EINVAL;
7174
7175        if (cnt > TRACE_BUF_SIZE)
7176                cnt = TRACE_BUF_SIZE;
7177
7178        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7179
7180        size = sizeof(*entry) + cnt;
7181        if (cnt < FAULT_SIZE_ID)
7182                size += FAULT_SIZE_ID - cnt;
7183
7184        buffer = tr->array_buffer.buffer;
7185        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7186                                            tracing_gen_ctx());
7187        if (!event)
7188                /* Ring buffer disabled, return as if not open for write */
7189                return -EBADF;
7190
7191        entry = ring_buffer_event_data(event);
7192
7193        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7194        if (len) {
7195                entry->id = -1;
7196                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7197                written = -EFAULT;
7198        } else
7199                written = cnt;
7200
7201        __buffer_unlock_commit(buffer, event);
7202
7203        return written;
7204}
7205
7206static int tracing_clock_show(struct seq_file *m, void *v)
7207{
7208        struct trace_array *tr = m->private;
7209        int i;
7210
7211        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212                seq_printf(m,
7213                        "%s%s%s%s", i ? " " : "",
7214                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215                        i == tr->clock_id ? "]" : "");
7216        seq_putc(m, '\n');
7217
7218        return 0;
7219}
7220
7221int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222{
7223        int i;
7224
7225        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226                if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227                        break;
7228        }
7229        if (i == ARRAY_SIZE(trace_clocks))
7230                return -EINVAL;
7231
7232        mutex_lock(&trace_types_lock);
7233
7234        tr->clock_id = i;
7235
7236        ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237
7238        /*
7239         * New clock may not be consistent with the previous clock.
7240         * Reset the buffer so that it doesn't have incomparable timestamps.
7241         */
7242        tracing_reset_online_cpus(&tr->array_buffer);
7243
7244#ifdef CONFIG_TRACER_MAX_TRACE
7245        if (tr->max_buffer.buffer)
7246                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247        tracing_reset_online_cpus(&tr->max_buffer);
7248#endif
7249
7250        mutex_unlock(&trace_types_lock);
7251
7252        return 0;
7253}
7254
7255static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256                                   size_t cnt, loff_t *fpos)
7257{
7258        struct seq_file *m = filp->private_data;
7259        struct trace_array *tr = m->private;
7260        char buf[64];
7261        const char *clockstr;
7262        int ret;
7263
7264        if (cnt >= sizeof(buf))
7265                return -EINVAL;
7266
7267        if (copy_from_user(buf, ubuf, cnt))
7268                return -EFAULT;
7269
7270        buf[cnt] = 0;
7271
7272        clockstr = strstrip(buf);
7273
7274        ret = tracing_set_clock(tr, clockstr);
7275        if (ret)
7276                return ret;
7277
7278        *fpos += cnt;
7279
7280        return cnt;
7281}
7282
7283static int tracing_clock_open(struct inode *inode, struct file *file)
7284{
7285        struct trace_array *tr = inode->i_private;
7286        int ret;
7287
7288        ret = tracing_check_open_get_tr(tr);
7289        if (ret)
7290                return ret;
7291
7292        ret = single_open(file, tracing_clock_show, inode->i_private);
7293        if (ret < 0)
7294                trace_array_put(tr);
7295
7296        return ret;
7297}
7298
7299static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300{
7301        struct trace_array *tr = m->private;
7302
7303        mutex_lock(&trace_types_lock);
7304
7305        if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306                seq_puts(m, "delta [absolute]\n");
7307        else
7308                seq_puts(m, "[delta] absolute\n");
7309
7310        mutex_unlock(&trace_types_lock);
7311
7312        return 0;
7313}
7314
7315static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316{
7317        struct trace_array *tr = inode->i_private;
7318        int ret;
7319
7320        ret = tracing_check_open_get_tr(tr);
7321        if (ret)
7322                return ret;
7323
7324        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325        if (ret < 0)
7326                trace_array_put(tr);
7327
7328        return ret;
7329}
7330
7331u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332{
7333        if (rbe == this_cpu_read(trace_buffered_event))
7334                return ring_buffer_time_stamp(buffer);
7335
7336        return ring_buffer_event_time_stamp(buffer, rbe);
7337}
7338
7339/*
7340 * Set or disable using the per CPU trace_buffer_event when possible.
7341 */
7342int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343{
7344        int ret = 0;
7345
7346        mutex_lock(&trace_types_lock);
7347
7348        if (set && tr->no_filter_buffering_ref++)
7349                goto out;
7350
7351        if (!set) {
7352                if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353                        ret = -EINVAL;
7354                        goto out;
7355                }
7356
7357                --tr->no_filter_buffering_ref;
7358        }
7359 out:
7360        mutex_unlock(&trace_types_lock);
7361
7362        return ret;
7363}
7364
7365struct ftrace_buffer_info {
7366        struct trace_iterator   iter;
7367        void                    *spare;
7368        unsigned int            spare_cpu;
7369        unsigned int            read;
7370};
7371
7372#ifdef CONFIG_TRACER_SNAPSHOT
7373static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374{
7375        struct trace_array *tr = inode->i_private;
7376        struct trace_iterator *iter;
7377        struct seq_file *m;
7378        int ret;
7379
7380        ret = tracing_check_open_get_tr(tr);
7381        if (ret)
7382                return ret;
7383
7384        if (file->f_mode & FMODE_READ) {
7385                iter = __tracing_open(inode, file, true);
7386                if (IS_ERR(iter))
7387                        ret = PTR_ERR(iter);
7388        } else {
7389                /* Writes still need the seq_file to hold the private data */
7390                ret = -ENOMEM;
7391                m = kzalloc(sizeof(*m), GFP_KERNEL);
7392                if (!m)
7393                        goto out;
7394                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395                if (!iter) {
7396                        kfree(m);
7397                        goto out;
7398                }
7399                ret = 0;
7400
7401                iter->tr = tr;
7402                iter->array_buffer = &tr->max_buffer;
7403                iter->cpu_file = tracing_get_cpu(inode);
7404                m->private = iter;
7405                file->private_data = m;
7406        }
7407out:
7408        if (ret < 0)
7409                trace_array_put(tr);
7410
7411        return ret;
7412}
7413
7414static ssize_t
7415tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416                       loff_t *ppos)
7417{
7418        struct seq_file *m = filp->private_data;
7419        struct trace_iterator *iter = m->private;
7420        struct trace_array *tr = iter->tr;
7421        unsigned long val;
7422        int ret;
7423
7424        ret = tracing_update_buffers();
7425        if (ret < 0)
7426                return ret;
7427
7428        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429        if (ret)
7430                return ret;
7431
7432        mutex_lock(&trace_types_lock);
7433
7434        if (tr->current_trace->use_max_tr) {
7435                ret = -EBUSY;
7436                goto out;
7437        }
7438
7439        arch_spin_lock(&tr->max_lock);
7440        if (tr->cond_snapshot)
7441                ret = -EBUSY;
7442        arch_spin_unlock(&tr->max_lock);
7443        if (ret)
7444                goto out;
7445
7446        switch (val) {
7447        case 0:
7448                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449                        ret = -EINVAL;
7450                        break;
7451                }
7452                if (tr->allocated_snapshot)
7453                        free_snapshot(tr);
7454                break;
7455        case 1:
7456/* Only allow per-cpu swap if the ring buffer supports it */
7457#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459                        ret = -EINVAL;
7460                        break;
7461                }
7462#endif
7463                if (tr->allocated_snapshot)
7464                        ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465                                        &tr->array_buffer, iter->cpu_file);
7466                else
7467                        ret = tracing_alloc_snapshot_instance(tr);
7468                if (ret < 0)
7469                        break;
7470                local_irq_disable();
7471                /* Now, we're going to swap */
7472                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473                        update_max_tr(tr, current, smp_processor_id(), NULL);
7474                else
7475                        update_max_tr_single(tr, current, iter->cpu_file);
7476                local_irq_enable();
7477                break;
7478        default:
7479                if (tr->allocated_snapshot) {
7480                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481                                tracing_reset_online_cpus(&tr->max_buffer);
7482                        else
7483                                tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484                }
7485                break;
7486        }
7487
7488        if (ret >= 0) {
7489                *ppos += cnt;
7490                ret = cnt;
7491        }
7492out:
7493        mutex_unlock(&trace_types_lock);
7494        return ret;
7495}
7496
7497static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498{
7499        struct seq_file *m = file->private_data;
7500        int ret;
7501
7502        ret = tracing_release(inode, file);
7503
7504        if (file->f_mode & FMODE_READ)
7505                return ret;
7506
7507        /* If write only, the seq_file is just a stub */
7508        if (m)
7509                kfree(m->private);
7510        kfree(m);
7511
7512        return 0;
7513}
7514
7515static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517                                    size_t count, loff_t *ppos);
7518static int tracing_buffers_release(struct inode *inode, struct file *file);
7519static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521
7522static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523{
7524        struct ftrace_buffer_info *info;
7525        int ret;
7526
7527        /* The following checks for tracefs lockdown */
7528        ret = tracing_buffers_open(inode, filp);
7529        if (ret < 0)
7530                return ret;
7531
7532        info = filp->private_data;
7533
7534        if (info->iter.trace->use_max_tr) {
7535                tracing_buffers_release(inode, filp);
7536                return -EBUSY;
7537        }
7538
7539        info->iter.snapshot = true;
7540        info->iter.array_buffer = &info->iter.tr->max_buffer;
7541
7542        return ret;
7543}
7544
7545#endif /* CONFIG_TRACER_SNAPSHOT */
7546
7547
7548static const struct file_operations tracing_thresh_fops = {
7549        .open           = tracing_open_generic,
7550        .read           = tracing_thresh_read,
7551        .write          = tracing_thresh_write,
7552        .llseek         = generic_file_llseek,
7553};
7554
7555#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556static const struct file_operations tracing_max_lat_fops = {
7557        .open           = tracing_open_generic,
7558        .read           = tracing_max_lat_read,
7559        .write          = tracing_max_lat_write,
7560        .llseek         = generic_file_llseek,
7561};
7562#endif
7563
7564static const struct file_operations set_tracer_fops = {
7565        .open           = tracing_open_generic,
7566        .read           = tracing_set_trace_read,
7567        .write          = tracing_set_trace_write,
7568        .llseek         = generic_file_llseek,
7569};
7570
7571static const struct file_operations tracing_pipe_fops = {
7572        .open           = tracing_open_pipe,
7573        .poll           = tracing_poll_pipe,
7574        .read           = tracing_read_pipe,
7575        .splice_read    = tracing_splice_read_pipe,
7576        .release        = tracing_release_pipe,
7577        .llseek         = no_llseek,
7578};
7579
7580static const struct file_operations tracing_entries_fops = {
7581        .open           = tracing_open_generic_tr,
7582        .read           = tracing_entries_read,
7583        .write          = tracing_entries_write,
7584        .llseek         = generic_file_llseek,
7585        .release        = tracing_release_generic_tr,
7586};
7587
7588static const struct file_operations tracing_total_entries_fops = {
7589        .open           = tracing_open_generic_tr,
7590        .read           = tracing_total_entries_read,
7591        .llseek         = generic_file_llseek,
7592        .release        = tracing_release_generic_tr,
7593};
7594
7595static const struct file_operations tracing_free_buffer_fops = {
7596        .open           = tracing_open_generic_tr,
7597        .write          = tracing_free_buffer_write,
7598        .release        = tracing_free_buffer_release,
7599};
7600
7601static const struct file_operations tracing_mark_fops = {
7602        .open           = tracing_mark_open,
7603        .write          = tracing_mark_write,
7604        .release        = tracing_release_generic_tr,
7605};
7606
7607static const struct file_operations tracing_mark_raw_fops = {
7608        .open           = tracing_mark_open,
7609        .write          = tracing_mark_raw_write,
7610        .release        = tracing_release_generic_tr,
7611};
7612
7613static const struct file_operations trace_clock_fops = {
7614        .open           = tracing_clock_open,
7615        .read           = seq_read,
7616        .llseek         = seq_lseek,
7617        .release        = tracing_single_release_tr,
7618        .write          = tracing_clock_write,
7619};
7620
7621static const struct file_operations trace_time_stamp_mode_fops = {
7622        .open           = tracing_time_stamp_mode_open,
7623        .read           = seq_read,
7624        .llseek         = seq_lseek,
7625        .release        = tracing_single_release_tr,
7626};
7627
7628#ifdef CONFIG_TRACER_SNAPSHOT
7629static const struct file_operations snapshot_fops = {
7630        .open           = tracing_snapshot_open,
7631        .read           = seq_read,
7632        .write          = tracing_snapshot_write,
7633        .llseek         = tracing_lseek,
7634        .release        = tracing_snapshot_release,
7635};
7636
7637static const struct file_operations snapshot_raw_fops = {
7638        .open           = snapshot_raw_open,
7639        .read           = tracing_buffers_read,
7640        .release        = tracing_buffers_release,
7641        .splice_read    = tracing_buffers_splice_read,
7642        .llseek         = no_llseek,
7643};
7644
7645#endif /* CONFIG_TRACER_SNAPSHOT */
7646
7647/*
7648 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7649 * @filp: The active open file structure
7650 * @ubuf: The userspace provided buffer to read value into
7651 * @cnt: The maximum number of bytes to read
7652 * @ppos: The current "file" position
7653 *
7654 * This function implements the write interface for a struct trace_min_max_param.
7655 * The filp->private_data must point to a trace_min_max_param structure that
7656 * defines where to write the value, the min and the max acceptable values,
7657 * and a lock to protect the write.
7658 */
7659static ssize_t
7660trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7661{
7662        struct trace_min_max_param *param = filp->private_data;
7663        u64 val;
7664        int err;
7665
7666        if (!param)
7667                return -EFAULT;
7668
7669        err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7670        if (err)
7671                return err;
7672
7673        if (param->lock)
7674                mutex_lock(param->lock);
7675
7676        if (param->min && val < *param->min)
7677                err = -EINVAL;
7678
7679        if (param->max && val > *param->max)
7680                err = -EINVAL;
7681
7682        if (!err)
7683                *param->val = val;
7684
7685        if (param->lock)
7686                mutex_unlock(param->lock);
7687
7688        if (err)
7689                return err;
7690
7691        return cnt;
7692}
7693
7694/*
7695 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7696 * @filp: The active open file structure
7697 * @ubuf: The userspace provided buffer to read value into
7698 * @cnt: The maximum number of bytes to read
7699 * @ppos: The current "file" position
7700 *
7701 * This function implements the read interface for a struct trace_min_max_param.
7702 * The filp->private_data must point to a trace_min_max_param struct with valid
7703 * data.
7704 */
7705static ssize_t
7706trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7707{
7708        struct trace_min_max_param *param = filp->private_data;
7709        char buf[U64_STR_SIZE];
7710        int len;
7711        u64 val;
7712
7713        if (!param)
7714                return -EFAULT;
7715
7716        val = *param->val;
7717
7718        if (cnt > sizeof(buf))
7719                cnt = sizeof(buf);
7720
7721        len = snprintf(buf, sizeof(buf), "%llu\n", val);
7722
7723        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7724}
7725
7726const struct file_operations trace_min_max_fops = {
7727        .open           = tracing_open_generic,
7728        .read           = trace_min_max_read,
7729        .write          = trace_min_max_write,
7730};
7731
7732#define TRACING_LOG_ERRS_MAX    8
7733#define TRACING_LOG_LOC_MAX     128
7734
7735#define CMD_PREFIX "  Command: "
7736
7737struct err_info {
7738        const char      **errs; /* ptr to loc-specific array of err strings */
7739        u8              type;   /* index into errs -> specific err string */
7740        u16             pos;    /* caret position */
7741        u64             ts;
7742};
7743
7744struct tracing_log_err {
7745        struct list_head        list;
7746        struct err_info         info;
7747        char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7748        char                    *cmd;                     /* what caused err */
7749};
7750
7751static DEFINE_MUTEX(tracing_err_log_lock);
7752
7753static struct tracing_log_err *alloc_tracing_log_err(int len)
7754{
7755        struct tracing_log_err *err;
7756
7757        err = kzalloc(sizeof(*err), GFP_KERNEL);
7758        if (!err)
7759                return ERR_PTR(-ENOMEM);
7760
7761        err->cmd = kzalloc(len, GFP_KERNEL);
7762        if (!err->cmd) {
7763                kfree(err);
7764                return ERR_PTR(-ENOMEM);
7765        }
7766
7767        return err;
7768}
7769
7770static void free_tracing_log_err(struct tracing_log_err *err)
7771{
7772        kfree(err->cmd);
7773        kfree(err);
7774}
7775
7776static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7777                                                   int len)
7778{
7779        struct tracing_log_err *err;
7780
7781        if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7782                err = alloc_tracing_log_err(len);
7783                if (PTR_ERR(err) != -ENOMEM)
7784                        tr->n_err_log_entries++;
7785
7786                return err;
7787        }
7788
7789        err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7790        kfree(err->cmd);
7791        err->cmd = kzalloc(len, GFP_KERNEL);
7792        if (!err->cmd)
7793                return ERR_PTR(-ENOMEM);
7794        list_del(&err->list);
7795
7796        return err;
7797}
7798
7799/**
7800 * err_pos - find the position of a string within a command for error careting
7801 * @cmd: The tracing command that caused the error
7802 * @str: The string to position the caret at within @cmd
7803 *
7804 * Finds the position of the first occurrence of @str within @cmd.  The
7805 * return value can be passed to tracing_log_err() for caret placement
7806 * within @cmd.
7807 *
7808 * Returns the index within @cmd of the first occurrence of @str or 0
7809 * if @str was not found.
7810 */
7811unsigned int err_pos(char *cmd, const char *str)
7812{
7813        char *found;
7814
7815        if (WARN_ON(!strlen(cmd)))
7816                return 0;
7817
7818        found = strstr(cmd, str);
7819        if (found)
7820                return found - cmd;
7821
7822        return 0;
7823}
7824
7825/**
7826 * tracing_log_err - write an error to the tracing error log
7827 * @tr: The associated trace array for the error (NULL for top level array)
7828 * @loc: A string describing where the error occurred
7829 * @cmd: The tracing command that caused the error
7830 * @errs: The array of loc-specific static error strings
7831 * @type: The index into errs[], which produces the specific static err string
7832 * @pos: The position the caret should be placed in the cmd
7833 *
7834 * Writes an error into tracing/error_log of the form:
7835 *
7836 * <loc>: error: <text>
7837 *   Command: <cmd>
7838 *              ^
7839 *
7840 * tracing/error_log is a small log file containing the last
7841 * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7842 * unless there has been a tracing error, and the error log can be
7843 * cleared and have its memory freed by writing the empty string in
7844 * truncation mode to it i.e. echo > tracing/error_log.
7845 *
7846 * NOTE: the @errs array along with the @type param are used to
7847 * produce a static error string - this string is not copied and saved
7848 * when the error is logged - only a pointer to it is saved.  See
7849 * existing callers for examples of how static strings are typically
7850 * defined for use with tracing_log_err().
7851 */
7852void tracing_log_err(struct trace_array *tr,
7853                     const char *loc, const char *cmd,
7854                     const char **errs, u8 type, u16 pos)
7855{
7856        struct tracing_log_err *err;
7857        int len = 0;
7858
7859        if (!tr)
7860                tr = &global_trace;
7861
7862        len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7863
7864        mutex_lock(&tracing_err_log_lock);
7865        err = get_tracing_log_err(tr, len);
7866        if (PTR_ERR(err) == -ENOMEM) {
7867                mutex_unlock(&tracing_err_log_lock);
7868                return;
7869        }
7870
7871        snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7872        snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7873
7874        err->info.errs = errs;
7875        err->info.type = type;
7876        err->info.pos = pos;
7877        err->info.ts = local_clock();
7878
7879        list_add_tail(&err->list, &tr->err_log);
7880        mutex_unlock(&tracing_err_log_lock);
7881}
7882
7883static void clear_tracing_err_log(struct trace_array *tr)
7884{
7885        struct tracing_log_err *err, *next;
7886
7887        mutex_lock(&tracing_err_log_lock);
7888        list_for_each_entry_safe(err, next, &tr->err_log, list) {
7889                list_del(&err->list);
7890                free_tracing_log_err(err);
7891        }
7892
7893        tr->n_err_log_entries = 0;
7894        mutex_unlock(&tracing_err_log_lock);
7895}
7896
7897static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7898{
7899        struct trace_array *tr = m->private;
7900
7901        mutex_lock(&tracing_err_log_lock);
7902
7903        return seq_list_start(&tr->err_log, *pos);
7904}
7905
7906static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7907{
7908        struct trace_array *tr = m->private;
7909
7910        return seq_list_next(v, &tr->err_log, pos);
7911}
7912
7913static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7914{
7915        mutex_unlock(&tracing_err_log_lock);
7916}
7917
7918static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7919{
7920        u16 i;
7921
7922        for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7923                seq_putc(m, ' ');
7924        for (i = 0; i < pos; i++)
7925                seq_putc(m, ' ');
7926        seq_puts(m, "^\n");
7927}
7928
7929static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7930{
7931        struct tracing_log_err *err = v;
7932
7933        if (err) {
7934                const char *err_text = err->info.errs[err->info.type];
7935                u64 sec = err->info.ts;
7936                u32 nsec;
7937
7938                nsec = do_div(sec, NSEC_PER_SEC);
7939                seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7940                           err->loc, err_text);
7941                seq_printf(m, "%s", err->cmd);
7942                tracing_err_log_show_pos(m, err->info.pos);
7943        }
7944
7945        return 0;
7946}
7947
7948static const struct seq_operations tracing_err_log_seq_ops = {
7949        .start  = tracing_err_log_seq_start,
7950        .next   = tracing_err_log_seq_next,
7951        .stop   = tracing_err_log_seq_stop,
7952        .show   = tracing_err_log_seq_show
7953};
7954
7955static int tracing_err_log_open(struct inode *inode, struct file *file)
7956{
7957        struct trace_array *tr = inode->i_private;
7958        int ret = 0;
7959
7960        ret = tracing_check_open_get_tr(tr);
7961        if (ret)
7962                return ret;
7963
7964        /* If this file was opened for write, then erase contents */
7965        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7966                clear_tracing_err_log(tr);
7967
7968        if (file->f_mode & FMODE_READ) {
7969                ret = seq_open(file, &tracing_err_log_seq_ops);
7970                if (!ret) {
7971                        struct seq_file *m = file->private_data;
7972                        m->private = tr;
7973                } else {
7974                        trace_array_put(tr);
7975                }
7976        }
7977        return ret;
7978}
7979
7980static ssize_t tracing_err_log_write(struct file *file,
7981                                     const char __user *buffer,
7982                                     size_t count, loff_t *ppos)
7983{
7984        return count;
7985}
7986
7987static int tracing_err_log_release(struct inode *inode, struct file *file)
7988{
7989        struct trace_array *tr = inode->i_private;
7990
7991        trace_array_put(tr);
7992
7993        if (file->f_mode & FMODE_READ)
7994                seq_release(inode, file);
7995
7996        return 0;
7997}
7998
7999static const struct file_operations tracing_err_log_fops = {
8000        .open           = tracing_err_log_open,
8001        .write          = tracing_err_log_write,
8002        .read           = seq_read,
8003        .llseek         = seq_lseek,
8004        .release        = tracing_err_log_release,
8005};
8006
8007static int tracing_buffers_open(struct inode *inode, struct file *filp)
8008{
8009        struct trace_array *tr = inode->i_private;
8010        struct ftrace_buffer_info *info;
8011        int ret;
8012
8013        ret = tracing_check_open_get_tr(tr);
8014        if (ret)
8015                return ret;
8016
8017        info = kvzalloc(sizeof(*info), GFP_KERNEL);
8018        if (!info) {
8019                trace_array_put(tr);
8020                return -ENOMEM;
8021        }
8022
8023        mutex_lock(&trace_types_lock);
8024
8025        info->iter.tr           = tr;
8026        info->iter.cpu_file     = tracing_get_cpu(inode);
8027        info->iter.trace        = tr->current_trace;
8028        info->iter.array_buffer = &tr->array_buffer;
8029        info->spare             = NULL;
8030        /* Force reading ring buffer for first read */
8031        info->read              = (unsigned int)-1;
8032
8033        filp->private_data = info;
8034
8035        tr->trace_ref++;
8036
8037        mutex_unlock(&trace_types_lock);
8038
8039        ret = nonseekable_open(inode, filp);
8040        if (ret < 0)
8041                trace_array_put(tr);
8042
8043        return ret;
8044}
8045
8046static __poll_t
8047tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8048{
8049        struct ftrace_buffer_info *info = filp->private_data;
8050        struct trace_iterator *iter = &info->iter;
8051
8052        return trace_poll(iter, filp, poll_table);
8053}
8054
8055static ssize_t
8056tracing_buffers_read(struct file *filp, char __user *ubuf,
8057                     size_t count, loff_t *ppos)
8058{
8059        struct ftrace_buffer_info *info = filp->private_data;
8060        struct trace_iterator *iter = &info->iter;
8061        ssize_t ret = 0;
8062        ssize_t size;
8063
8064        if (!count)
8065                return 0;
8066
8067#ifdef CONFIG_TRACER_MAX_TRACE
8068        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8069                return -EBUSY;
8070#endif
8071
8072        if (!info->spare) {
8073                info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8074                                                          iter->cpu_file);
8075                if (IS_ERR(info->spare)) {
8076                        ret = PTR_ERR(info->spare);
8077                        info->spare = NULL;
8078                } else {
8079                        info->spare_cpu = iter->cpu_file;
8080                }
8081        }
8082        if (!info->spare)
8083                return ret;
8084
8085        /* Do we have previous read data to read? */
8086        if (info->read < PAGE_SIZE)
8087                goto read;
8088
8089 again:
8090        trace_access_lock(iter->cpu_file);
8091        ret = ring_buffer_read_page(iter->array_buffer->buffer,
8092                                    &info->spare,
8093                                    count,
8094                                    iter->cpu_file, 0);
8095        trace_access_unlock(iter->cpu_file);
8096
8097        if (ret < 0) {
8098                if (trace_empty(iter)) {
8099                        if ((filp->f_flags & O_NONBLOCK))
8100                                return -EAGAIN;
8101
8102                        ret = wait_on_pipe(iter, 0);
8103                        if (ret)
8104                                return ret;
8105
8106                        goto again;
8107                }
8108                return 0;
8109        }
8110
8111        info->read = 0;
8112 read:
8113        size = PAGE_SIZE - info->read;
8114        if (size > count)
8115                size = count;
8116
8117        ret = copy_to_user(ubuf, info->spare + info->read, size);
8118        if (ret == size)
8119                return -EFAULT;
8120
8121        size -= ret;
8122
8123        *ppos += size;
8124        info->read += size;
8125
8126        return size;
8127}
8128
8129static int tracing_buffers_release(struct inode *inode, struct file *file)
8130{
8131        struct ftrace_buffer_info *info = file->private_data;
8132        struct trace_iterator *iter = &info->iter;
8133
8134        mutex_lock(&trace_types_lock);
8135
8136        iter->tr->trace_ref--;
8137
8138        __trace_array_put(iter->tr);
8139
8140        if (info->spare)
8141                ring_buffer_free_read_page(iter->array_buffer->buffer,
8142                                           info->spare_cpu, info->spare);
8143        kvfree(info);
8144
8145        mutex_unlock(&trace_types_lock);
8146
8147        return 0;
8148}
8149
8150struct buffer_ref {
8151        struct trace_buffer     *buffer;
8152        void                    *page;
8153        int                     cpu;
8154        refcount_t              refcount;
8155};
8156
8157static void buffer_ref_release(struct buffer_ref *ref)
8158{
8159        if (!refcount_dec_and_test(&ref->refcount))
8160                return;
8161        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8162        kfree(ref);
8163}
8164
8165static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8166                                    struct pipe_buffer *buf)
8167{
8168        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8169
8170        buffer_ref_release(ref);
8171        buf->private = 0;
8172}
8173
8174static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8175                                struct pipe_buffer *buf)
8176{
8177        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8178
8179        if (refcount_read(&ref->refcount) > INT_MAX/2)
8180                return false;
8181
8182        refcount_inc(&ref->refcount);
8183        return true;
8184}
8185
8186/* Pipe buffer operations for a buffer. */
8187static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8188        .release                = buffer_pipe_buf_release,
8189        .get                    = buffer_pipe_buf_get,
8190};
8191
8192/*
8193 * Callback from splice_to_pipe(), if we need to release some pages
8194 * at the end of the spd in case we error'ed out in filling the pipe.
8195 */
8196static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8197{
8198        struct buffer_ref *ref =
8199                (struct buffer_ref *)spd->partial[i].private;
8200
8201        buffer_ref_release(ref);
8202        spd->partial[i].private = 0;
8203}
8204
8205static ssize_t
8206tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8207                            struct pipe_inode_info *pipe, size_t len,
8208                            unsigned int flags)
8209{
8210        struct ftrace_buffer_info *info = file->private_data;
8211        struct trace_iterator *iter = &info->iter;
8212        struct partial_page partial_def[PIPE_DEF_BUFFERS];
8213        struct page *pages_def[PIPE_DEF_BUFFERS];
8214        struct splice_pipe_desc spd = {
8215                .pages          = pages_def,
8216                .partial        = partial_def,
8217                .nr_pages_max   = PIPE_DEF_BUFFERS,
8218                .ops            = &buffer_pipe_buf_ops,
8219                .spd_release    = buffer_spd_release,
8220        };
8221        struct buffer_ref *ref;
8222        int entries, i;
8223        ssize_t ret = 0;
8224
8225#ifdef CONFIG_TRACER_MAX_TRACE
8226        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8227                return -EBUSY;
8228#endif
8229
8230        if (*ppos & (PAGE_SIZE - 1))
8231                return -EINVAL;
8232
8233        if (len & (PAGE_SIZE - 1)) {
8234                if (len < PAGE_SIZE)
8235                        return -EINVAL;
8236                len &= PAGE_MASK;
8237        }
8238
8239        if (splice_grow_spd(pipe, &spd))
8240                return -ENOMEM;
8241
8242 again:
8243        trace_access_lock(iter->cpu_file);
8244        entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8245
8246        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8247                struct page *page;
8248                int r;
8249
8250                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8251                if (!ref) {
8252                        ret = -ENOMEM;
8253                        break;
8254                }
8255
8256                refcount_set(&ref->refcount, 1);
8257                ref->buffer = iter->array_buffer->buffer;
8258                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8259                if (IS_ERR(ref->page)) {
8260                        ret = PTR_ERR(ref->page);
8261                        ref->page = NULL;
8262                        kfree(ref);
8263                        break;
8264                }
8265                ref->cpu = iter->cpu_file;
8266
8267                r = ring_buffer_read_page(ref->buffer, &ref->page,
8268                                          len, iter->cpu_file, 1);
8269                if (r < 0) {
8270                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
8271                                                   ref->page);
8272                        kfree(ref);
8273                        break;
8274                }
8275
8276                page = virt_to_page(ref->page);
8277
8278                spd.pages[i] = page;
8279                spd.partial[i].len = PAGE_SIZE;
8280                spd.partial[i].offset = 0;
8281                spd.partial[i].private = (unsigned long)ref;
8282                spd.nr_pages++;
8283                *ppos += PAGE_SIZE;
8284
8285                entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8286        }
8287
8288        trace_access_unlock(iter->cpu_file);
8289        spd.nr_pages = i;
8290
8291        /* did we read anything? */
8292        if (!spd.nr_pages) {
8293                if (ret)
8294                        goto out;
8295
8296                ret = -EAGAIN;
8297                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8298                        goto out;
8299
8300                ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8301                if (ret)
8302                        goto out;
8303
8304                goto again;
8305        }
8306
8307        ret = splice_to_pipe(pipe, &spd);
8308out:
8309        splice_shrink_spd(&spd);
8310
8311        return ret;
8312}
8313
8314static const struct file_operations tracing_buffers_fops = {
8315        .open           = tracing_buffers_open,
8316        .read           = tracing_buffers_read,
8317        .poll           = tracing_buffers_poll,
8318        .release        = tracing_buffers_release,
8319        .splice_read    = tracing_buffers_splice_read,
8320        .llseek         = no_llseek,
8321};
8322
8323static ssize_t
8324tracing_stats_read(struct file *filp, char __user *ubuf,
8325                   size_t count, loff_t *ppos)
8326{
8327        struct inode *inode = file_inode(filp);
8328        struct trace_array *tr = inode->i_private;
8329        struct array_buffer *trace_buf = &tr->array_buffer;
8330        int cpu = tracing_get_cpu(inode);
8331        struct trace_seq *s;
8332        unsigned long cnt;
8333        unsigned long long t;
8334        unsigned long usec_rem;
8335
8336        s = kmalloc(sizeof(*s), GFP_KERNEL);
8337        if (!s)
8338                return -ENOMEM;
8339
8340        trace_seq_init(s);
8341
8342        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343        trace_seq_printf(s, "entries: %ld\n", cnt);
8344
8345        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346        trace_seq_printf(s, "overrun: %ld\n", cnt);
8347
8348        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350
8351        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352        trace_seq_printf(s, "bytes: %ld\n", cnt);
8353
8354        if (trace_clocks[tr->clock_id].in_ns) {
8355                /* local or global for trace_clock */
8356                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357                usec_rem = do_div(t, USEC_PER_SEC);
8358                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359                                                                t, usec_rem);
8360
8361                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362                usec_rem = do_div(t, USEC_PER_SEC);
8363                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364        } else {
8365                /* counter or tsc mode for trace_clock */
8366                trace_seq_printf(s, "oldest event ts: %llu\n",
8367                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368
8369                trace_seq_printf(s, "now ts: %llu\n",
8370                                ring_buffer_time_stamp(trace_buf->buffer));
8371        }
8372
8373        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374        trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375
8376        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377        trace_seq_printf(s, "read events: %ld\n", cnt);
8378
8379        count = simple_read_from_buffer(ubuf, count, ppos,
8380                                        s->buffer, trace_seq_used(s));
8381
8382        kfree(s);
8383
8384        return count;
8385}
8386
8387static const struct file_operations tracing_stats_fops = {
8388        .open           = tracing_open_generic_tr,
8389        .read           = tracing_stats_read,
8390        .llseek         = generic_file_llseek,
8391        .release        = tracing_release_generic_tr,
8392};
8393
8394#ifdef CONFIG_DYNAMIC_FTRACE
8395
8396static ssize_t
8397tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398                  size_t cnt, loff_t *ppos)
8399{
8400        ssize_t ret;
8401        char *buf;
8402        int r;
8403
8404        /* 256 should be plenty to hold the amount needed */
8405        buf = kmalloc(256, GFP_KERNEL);
8406        if (!buf)
8407                return -ENOMEM;
8408
8409        r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410                      ftrace_update_tot_cnt,
8411                      ftrace_number_of_pages,
8412                      ftrace_number_of_groups);
8413
8414        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415        kfree(buf);
8416        return ret;
8417}
8418
8419static const struct file_operations tracing_dyn_info_fops = {
8420        .open           = tracing_open_generic,
8421        .read           = tracing_read_dyn_info,
8422        .llseek         = generic_file_llseek,
8423};
8424#endif /* CONFIG_DYNAMIC_FTRACE */
8425
8426#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427static void
8428ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429                struct trace_array *tr, struct ftrace_probe_ops *ops,
8430                void *data)
8431{
8432        tracing_snapshot_instance(tr);
8433}
8434
8435static void
8436ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437                      struct trace_array *tr, struct ftrace_probe_ops *ops,
8438                      void *data)
8439{
8440        struct ftrace_func_mapper *mapper = data;
8441        long *count = NULL;
8442
8443        if (mapper)
8444                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445
8446        if (count) {
8447
8448                if (*count <= 0)
8449                        return;
8450
8451                (*count)--;
8452        }
8453
8454        tracing_snapshot_instance(tr);
8455}
8456
8457static int
8458ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459                      struct ftrace_probe_ops *ops, void *data)
8460{
8461        struct ftrace_func_mapper *mapper = data;
8462        long *count = NULL;
8463
8464        seq_printf(m, "%ps:", (void *)ip);
8465
8466        seq_puts(m, "snapshot");
8467
8468        if (mapper)
8469                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470
8471        if (count)
8472                seq_printf(m, ":count=%ld\n", *count);
8473        else
8474                seq_puts(m, ":unlimited\n");
8475
8476        return 0;
8477}
8478
8479static int
8480ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481                     unsigned long ip, void *init_data, void **data)
8482{
8483        struct ftrace_func_mapper *mapper = *data;
8484
8485        if (!mapper) {
8486                mapper = allocate_ftrace_func_mapper();
8487                if (!mapper)
8488                        return -ENOMEM;
8489                *data = mapper;
8490        }
8491
8492        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493}
8494
8495static void
8496ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497                     unsigned long ip, void *data)
8498{
8499        struct ftrace_func_mapper *mapper = data;
8500
8501        if (!ip) {
8502                if (!mapper)
8503                        return;
8504                free_ftrace_func_mapper(mapper, NULL);
8505                return;
8506        }
8507
8508        ftrace_func_mapper_remove_ip(mapper, ip);
8509}
8510
8511static struct ftrace_probe_ops snapshot_probe_ops = {
8512        .func                   = ftrace_snapshot,
8513        .print                  = ftrace_snapshot_print,
8514};
8515
8516static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517        .func                   = ftrace_count_snapshot,
8518        .print                  = ftrace_snapshot_print,
8519        .init                   = ftrace_snapshot_init,
8520        .free                   = ftrace_snapshot_free,
8521};
8522
8523static int
8524ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525                               char *glob, char *cmd, char *param, int enable)
8526{
8527        struct ftrace_probe_ops *ops;
8528        void *count = (void *)-1;
8529        char *number;
8530        int ret;
8531
8532        if (!tr)
8533                return -ENODEV;
8534
8535        /* hash funcs only work with set_ftrace_filter */
8536        if (!enable)
8537                return -EINVAL;
8538
8539        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8540
8541        if (glob[0] == '!')
8542                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543
8544        if (!param)
8545                goto out_reg;
8546
8547        number = strsep(&param, ":");
8548
8549        if (!strlen(number))
8550                goto out_reg;
8551
8552        /*
8553         * We use the callback data field (which is a pointer)
8554         * as our counter.
8555         */
8556        ret = kstrtoul(number, 0, (unsigned long *)&count);
8557        if (ret)
8558                return ret;
8559
8560 out_reg:
8561        ret = tracing_alloc_snapshot_instance(tr);
8562        if (ret < 0)
8563                goto out;
8564
8565        ret = register_ftrace_function_probe(glob, tr, ops, count);
8566
8567 out:
8568        return ret < 0 ? ret : 0;
8569}
8570
8571static struct ftrace_func_command ftrace_snapshot_cmd = {
8572        .name                   = "snapshot",
8573        .func                   = ftrace_trace_snapshot_callback,
8574};
8575
8576static __init int register_snapshot_cmd(void)
8577{
8578        return register_ftrace_command(&ftrace_snapshot_cmd);
8579}
8580#else
8581static inline __init int register_snapshot_cmd(void) { return 0; }
8582#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8583
8584static struct dentry *tracing_get_dentry(struct trace_array *tr)
8585{
8586        if (WARN_ON(!tr->dir))
8587                return ERR_PTR(-ENODEV);
8588
8589        /* Top directory uses NULL as the parent */
8590        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8591                return NULL;
8592
8593        /* All sub buffers have a descriptor */
8594        return tr->dir;
8595}
8596
8597static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8598{
8599        struct dentry *d_tracer;
8600
8601        if (tr->percpu_dir)
8602                return tr->percpu_dir;
8603
8604        d_tracer = tracing_get_dentry(tr);
8605        if (IS_ERR(d_tracer))
8606                return NULL;
8607
8608        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8609
8610        MEM_FAIL(!tr->percpu_dir,
8611                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8612
8613        return tr->percpu_dir;
8614}
8615
8616static struct dentry *
8617trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8618                      void *data, long cpu, const struct file_operations *fops)
8619{
8620        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8621
8622        if (ret) /* See tracing_get_cpu() */
8623                d_inode(ret)->i_cdev = (void *)(cpu + 1);
8624        return ret;
8625}
8626
8627static void
8628tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8629{
8630        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8631        struct dentry *d_cpu;
8632        char cpu_dir[30]; /* 30 characters should be more than enough */
8633
8634        if (!d_percpu)
8635                return;
8636
8637        snprintf(cpu_dir, 30, "cpu%ld", cpu);
8638        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8639        if (!d_cpu) {
8640                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8641                return;
8642        }
8643
8644        /* per cpu trace_pipe */
8645        trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8646                                tr, cpu, &tracing_pipe_fops);
8647
8648        /* per cpu trace */
8649        trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8650                                tr, cpu, &tracing_fops);
8651
8652        trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8653                                tr, cpu, &tracing_buffers_fops);
8654
8655        trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8656                                tr, cpu, &tracing_stats_fops);
8657
8658        trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8659                                tr, cpu, &tracing_entries_fops);
8660
8661#ifdef CONFIG_TRACER_SNAPSHOT
8662        trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8663                                tr, cpu, &snapshot_fops);
8664
8665        trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8666                                tr, cpu, &snapshot_raw_fops);
8667#endif
8668}
8669
8670#ifdef CONFIG_FTRACE_SELFTEST
8671/* Let selftest have access to static functions in this file */
8672#include "trace_selftest.c"
8673#endif
8674
8675static ssize_t
8676trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8677                        loff_t *ppos)
8678{
8679        struct trace_option_dentry *topt = filp->private_data;
8680        char *buf;
8681
8682        if (topt->flags->val & topt->opt->bit)
8683                buf = "1\n";
8684        else
8685                buf = "0\n";
8686
8687        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8688}
8689
8690static ssize_t
8691trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8692                         loff_t *ppos)
8693{
8694        struct trace_option_dentry *topt = filp->private_data;
8695        unsigned long val;
8696        int ret;
8697
8698        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8699        if (ret)
8700                return ret;
8701
8702        if (val != 0 && val != 1)
8703                return -EINVAL;
8704
8705        if (!!(topt->flags->val & topt->opt->bit) != val) {
8706                mutex_lock(&trace_types_lock);
8707                ret = __set_tracer_option(topt->tr, topt->flags,
8708                                          topt->opt, !val);
8709                mutex_unlock(&trace_types_lock);
8710                if (ret)
8711                        return ret;
8712        }
8713
8714        *ppos += cnt;
8715
8716        return cnt;
8717}
8718
8719
8720static const struct file_operations trace_options_fops = {
8721        .open = tracing_open_generic,
8722        .read = trace_options_read,
8723        .write = trace_options_write,
8724        .llseek = generic_file_llseek,
8725};
8726
8727/*
8728 * In order to pass in both the trace_array descriptor as well as the index
8729 * to the flag that the trace option file represents, the trace_array
8730 * has a character array of trace_flags_index[], which holds the index
8731 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8732 * The address of this character array is passed to the flag option file
8733 * read/write callbacks.
8734 *
8735 * In order to extract both the index and the trace_array descriptor,
8736 * get_tr_index() uses the following algorithm.
8737 *
8738 *   idx = *ptr;
8739 *
8740 * As the pointer itself contains the address of the index (remember
8741 * index[1] == 1).
8742 *
8743 * Then to get the trace_array descriptor, by subtracting that index
8744 * from the ptr, we get to the start of the index itself.
8745 *
8746 *   ptr - idx == &index[0]
8747 *
8748 * Then a simple container_of() from that pointer gets us to the
8749 * trace_array descriptor.
8750 */
8751static void get_tr_index(void *data, struct trace_array **ptr,
8752                         unsigned int *pindex)
8753{
8754        *pindex = *(unsigned char *)data;
8755
8756        *ptr = container_of(data - *pindex, struct trace_array,
8757                            trace_flags_index);
8758}
8759
8760static ssize_t
8761trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8762                        loff_t *ppos)
8763{
8764        void *tr_index = filp->private_data;
8765        struct trace_array *tr;
8766        unsigned int index;
8767        char *buf;
8768
8769        get_tr_index(tr_index, &tr, &index);
8770
8771        if (tr->trace_flags & (1 << index))
8772                buf = "1\n";
8773        else
8774                buf = "0\n";
8775
8776        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8777}
8778
8779static ssize_t
8780trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8781                         loff_t *ppos)
8782{
8783        void *tr_index = filp->private_data;
8784        struct trace_array *tr;
8785        unsigned int index;
8786        unsigned long val;
8787        int ret;
8788
8789        get_tr_index(tr_index, &tr, &index);
8790
8791        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8792        if (ret)
8793                return ret;
8794
8795        if (val != 0 && val != 1)
8796                return -EINVAL;
8797
8798        mutex_lock(&event_mutex);
8799        mutex_lock(&trace_types_lock);
8800        ret = set_tracer_flag(tr, 1 << index, val);
8801        mutex_unlock(&trace_types_lock);
8802        mutex_unlock(&event_mutex);
8803
8804        if (ret < 0)
8805                return ret;
8806
8807        *ppos += cnt;
8808
8809        return cnt;
8810}
8811
8812static const struct file_operations trace_options_core_fops = {
8813        .open = tracing_open_generic,
8814        .read = trace_options_core_read,
8815        .write = trace_options_core_write,
8816        .llseek = generic_file_llseek,
8817};
8818
8819struct dentry *trace_create_file(const char *name,
8820                                 umode_t mode,
8821                                 struct dentry *parent,
8822                                 void *data,
8823                                 const struct file_operations *fops)
8824{
8825        struct dentry *ret;
8826
8827        ret = tracefs_create_file(name, mode, parent, data, fops);
8828        if (!ret)
8829                pr_warn("Could not create tracefs '%s' entry\n", name);
8830
8831        return ret;
8832}
8833
8834
8835static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8836{
8837        struct dentry *d_tracer;
8838
8839        if (tr->options)
8840                return tr->options;
8841
8842        d_tracer = tracing_get_dentry(tr);
8843        if (IS_ERR(d_tracer))
8844                return NULL;
8845
8846        tr->options = tracefs_create_dir("options", d_tracer);
8847        if (!tr->options) {
8848                pr_warn("Could not create tracefs directory 'options'\n");
8849                return NULL;
8850        }
8851
8852        return tr->options;
8853}
8854
8855static void
8856create_trace_option_file(struct trace_array *tr,
8857                         struct trace_option_dentry *topt,
8858                         struct tracer_flags *flags,
8859                         struct tracer_opt *opt)
8860{
8861        struct dentry *t_options;
8862
8863        t_options = trace_options_init_dentry(tr);
8864        if (!t_options)
8865                return;
8866
8867        topt->flags = flags;
8868        topt->opt = opt;
8869        topt->tr = tr;
8870
8871        topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8872                                        t_options, topt, &trace_options_fops);
8873
8874}
8875
8876static void
8877create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8878{
8879        struct trace_option_dentry *topts;
8880        struct trace_options *tr_topts;
8881        struct tracer_flags *flags;
8882        struct tracer_opt *opts;
8883        int cnt;
8884        int i;
8885
8886        if (!tracer)
8887                return;
8888
8889        flags = tracer->flags;
8890
8891        if (!flags || !flags->opts)
8892                return;
8893
8894        /*
8895         * If this is an instance, only create flags for tracers
8896         * the instance may have.
8897         */
8898        if (!trace_ok_for_array(tracer, tr))
8899                return;
8900
8901        for (i = 0; i < tr->nr_topts; i++) {
8902                /* Make sure there's no duplicate flags. */
8903                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8904                        return;
8905        }
8906
8907        opts = flags->opts;
8908
8909        for (cnt = 0; opts[cnt].name; cnt++)
8910                ;
8911
8912        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8913        if (!topts)
8914                return;
8915
8916        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8917                            GFP_KERNEL);
8918        if (!tr_topts) {
8919                kfree(topts);
8920                return;
8921        }
8922
8923        tr->topts = tr_topts;
8924        tr->topts[tr->nr_topts].tracer = tracer;
8925        tr->topts[tr->nr_topts].topts = topts;
8926        tr->nr_topts++;
8927
8928        for (cnt = 0; opts[cnt].name; cnt++) {
8929                create_trace_option_file(tr, &topts[cnt], flags,
8930                                         &opts[cnt]);
8931                MEM_FAIL(topts[cnt].entry == NULL,
8932                          "Failed to create trace option: %s",
8933                          opts[cnt].name);
8934        }
8935}
8936
8937static struct dentry *
8938create_trace_option_core_file(struct trace_array *tr,
8939                              const char *option, long index)
8940{
8941        struct dentry *t_options;
8942
8943        t_options = trace_options_init_dentry(tr);
8944        if (!t_options)
8945                return NULL;
8946
8947        return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8948                                 (void *)&tr->trace_flags_index[index],
8949                                 &trace_options_core_fops);
8950}
8951
8952static void create_trace_options_dir(struct trace_array *tr)
8953{
8954        struct dentry *t_options;
8955        bool top_level = tr == &global_trace;
8956        int i;
8957
8958        t_options = trace_options_init_dentry(tr);
8959        if (!t_options)
8960                return;
8961
8962        for (i = 0; trace_options[i]; i++) {
8963                if (top_level ||
8964                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8965                        create_trace_option_core_file(tr, trace_options[i], i);
8966        }
8967}
8968
8969static ssize_t
8970rb_simple_read(struct file *filp, char __user *ubuf,
8971               size_t cnt, loff_t *ppos)
8972{
8973        struct trace_array *tr = filp->private_data;
8974        char buf[64];
8975        int r;
8976
8977        r = tracer_tracing_is_on(tr);
8978        r = sprintf(buf, "%d\n", r);
8979
8980        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8981}
8982
8983static ssize_t
8984rb_simple_write(struct file *filp, const char __user *ubuf,
8985                size_t cnt, loff_t *ppos)
8986{
8987        struct trace_array *tr = filp->private_data;
8988        struct trace_buffer *buffer = tr->array_buffer.buffer;
8989        unsigned long val;
8990        int ret;
8991
8992        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993        if (ret)
8994                return ret;
8995
8996        if (buffer) {
8997                mutex_lock(&trace_types_lock);
8998                if (!!val == tracer_tracing_is_on(tr)) {
8999                        val = 0; /* do nothing */
9000                } else if (val) {
9001                        tracer_tracing_on(tr);
9002                        if (tr->current_trace->start)
9003                                tr->current_trace->start(tr);
9004                } else {
9005                        tracer_tracing_off(tr);
9006                        if (tr->current_trace->stop)
9007                                tr->current_trace->stop(tr);
9008                }
9009                mutex_unlock(&trace_types_lock);
9010        }
9011
9012        (*ppos)++;
9013
9014        return cnt;
9015}
9016
9017static const struct file_operations rb_simple_fops = {
9018        .open           = tracing_open_generic_tr,
9019        .read           = rb_simple_read,
9020        .write          = rb_simple_write,
9021        .release        = tracing_release_generic_tr,
9022        .llseek         = default_llseek,
9023};
9024
9025static ssize_t
9026buffer_percent_read(struct file *filp, char __user *ubuf,
9027                    size_t cnt, loff_t *ppos)
9028{
9029        struct trace_array *tr = filp->private_data;
9030        char buf[64];
9031        int r;
9032
9033        r = tr->buffer_percent;
9034        r = sprintf(buf, "%d\n", r);
9035
9036        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9037}
9038
9039static ssize_t
9040buffer_percent_write(struct file *filp, const char __user *ubuf,
9041                     size_t cnt, loff_t *ppos)
9042{
9043        struct trace_array *tr = filp->private_data;
9044        unsigned long val;
9045        int ret;
9046
9047        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9048        if (ret)
9049                return ret;
9050
9051        if (val > 100)
9052                return -EINVAL;
9053
9054        if (!val)
9055                val = 1;
9056
9057        tr->buffer_percent = val;
9058
9059        (*ppos)++;
9060
9061        return cnt;
9062}
9063
9064static const struct file_operations buffer_percent_fops = {
9065        .open           = tracing_open_generic_tr,
9066        .read           = buffer_percent_read,
9067        .write          = buffer_percent_write,
9068        .release        = tracing_release_generic_tr,
9069        .llseek         = default_llseek,
9070};
9071
9072static struct dentry *trace_instance_dir;
9073
9074static void
9075init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9076
9077static int
9078allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9079{
9080        enum ring_buffer_flags rb_flags;
9081
9082        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9083
9084        buf->tr = tr;
9085
9086        buf->buffer = ring_buffer_alloc(size, rb_flags);
9087        if (!buf->buffer)
9088                return -ENOMEM;
9089
9090        buf->data = alloc_percpu(struct trace_array_cpu);
9091        if (!buf->data) {
9092                ring_buffer_free(buf->buffer);
9093                buf->buffer = NULL;
9094                return -ENOMEM;
9095        }
9096
9097        /* Allocate the first page for all buffers */
9098        set_buffer_entries(&tr->array_buffer,
9099                           ring_buffer_size(tr->array_buffer.buffer, 0));
9100
9101        return 0;
9102}
9103
9104static int allocate_trace_buffers(struct trace_array *tr, int size)
9105{
9106        int ret;
9107
9108        ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9109        if (ret)
9110                return ret;
9111
9112#ifdef CONFIG_TRACER_MAX_TRACE
9113        ret = allocate_trace_buffer(tr, &tr->max_buffer,
9114                                    allocate_snapshot ? size : 1);
9115        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9116                ring_buffer_free(tr->array_buffer.buffer);
9117                tr->array_buffer.buffer = NULL;
9118                free_percpu(tr->array_buffer.data);
9119                tr->array_buffer.data = NULL;
9120                return -ENOMEM;
9121        }
9122        tr->allocated_snapshot = allocate_snapshot;
9123
9124        /*
9125         * Only the top level trace array gets its snapshot allocated
9126         * from the kernel command line.
9127         */
9128        allocate_snapshot = false;
9129#endif
9130
9131        return 0;
9132}
9133
9134static void free_trace_buffer(struct array_buffer *buf)
9135{
9136        if (buf->buffer) {
9137                ring_buffer_free(buf->buffer);
9138                buf->buffer = NULL;
9139                free_percpu(buf->data);
9140                buf->data = NULL;
9141        }
9142}
9143
9144static void free_trace_buffers(struct trace_array *tr)
9145{
9146        if (!tr)
9147                return;
9148
9149        free_trace_buffer(&tr->array_buffer);
9150
9151#ifdef CONFIG_TRACER_MAX_TRACE
9152        free_trace_buffer(&tr->max_buffer);
9153#endif
9154}
9155
9156static void init_trace_flags_index(struct trace_array *tr)
9157{
9158        int i;
9159
9160        /* Used by the trace options files */
9161        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9162                tr->trace_flags_index[i] = i;
9163}
9164
9165static void __update_tracer_options(struct trace_array *tr)
9166{
9167        struct tracer *t;
9168
9169        for (t = trace_types; t; t = t->next)
9170                add_tracer_options(tr, t);
9171}
9172
9173static void update_tracer_options(struct trace_array *tr)
9174{
9175        mutex_lock(&trace_types_lock);
9176        tracer_options_updated = true;
9177        __update_tracer_options(tr);
9178        mutex_unlock(&trace_types_lock);
9179}
9180
9181/* Must have trace_types_lock held */
9182struct trace_array *trace_array_find(const char *instance)
9183{
9184        struct trace_array *tr, *found = NULL;
9185
9186        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9187                if (tr->name && strcmp(tr->name, instance) == 0) {
9188                        found = tr;
9189                        break;
9190                }
9191        }
9192
9193        return found;
9194}
9195
9196struct trace_array *trace_array_find_get(const char *instance)
9197{
9198        struct trace_array *tr;
9199
9200        mutex_lock(&trace_types_lock);
9201        tr = trace_array_find(instance);
9202        if (tr)
9203                tr->ref++;
9204        mutex_unlock(&trace_types_lock);
9205
9206        return tr;
9207}
9208
9209static int trace_array_create_dir(struct trace_array *tr)
9210{
9211        int ret;
9212
9213        tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9214        if (!tr->dir)
9215                return -EINVAL;
9216
9217        ret = event_trace_add_tracer(tr->dir, tr);
9218        if (ret) {
9219                tracefs_remove(tr->dir);
9220                return ret;
9221        }
9222
9223        init_tracer_tracefs(tr, tr->dir);
9224        __update_tracer_options(tr);
9225
9226        return ret;
9227}
9228
9229static struct trace_array *trace_array_create(const char *name)
9230{
9231        struct trace_array *tr;
9232        int ret;
9233
9234        ret = -ENOMEM;
9235        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9236        if (!tr)
9237                return ERR_PTR(ret);
9238
9239        tr->name = kstrdup(name, GFP_KERNEL);
9240        if (!tr->name)
9241                goto out_free_tr;
9242
9243        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9244                goto out_free_tr;
9245
9246        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9247
9248        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9249
9250        raw_spin_lock_init(&tr->start_lock);
9251
9252        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9253
9254        tr->current_trace = &nop_trace;
9255
9256        INIT_LIST_HEAD(&tr->systems);
9257        INIT_LIST_HEAD(&tr->events);
9258        INIT_LIST_HEAD(&tr->hist_vars);
9259        INIT_LIST_HEAD(&tr->err_log);
9260
9261        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9262                goto out_free_tr;
9263
9264        if (ftrace_allocate_ftrace_ops(tr) < 0)
9265                goto out_free_tr;
9266
9267        ftrace_init_trace_array(tr);
9268
9269        init_trace_flags_index(tr);
9270
9271        if (trace_instance_dir) {
9272                ret = trace_array_create_dir(tr);
9273                if (ret)
9274                        goto out_free_tr;
9275        } else
9276                __trace_early_add_events(tr);
9277
9278        list_add(&tr->list, &ftrace_trace_arrays);
9279
9280        tr->ref++;
9281
9282        return tr;
9283
9284 out_free_tr:
9285        ftrace_free_ftrace_ops(tr);
9286        free_trace_buffers(tr);
9287        free_cpumask_var(tr->tracing_cpumask);
9288        kfree(tr->name);
9289        kfree(tr);
9290
9291        return ERR_PTR(ret);
9292}
9293
9294static int instance_mkdir(const char *name)
9295{
9296        struct trace_array *tr;
9297        int ret;
9298
9299        mutex_lock(&event_mutex);
9300        mutex_lock(&trace_types_lock);
9301
9302        ret = -EEXIST;
9303        if (trace_array_find(name))
9304                goto out_unlock;
9305
9306        tr = trace_array_create(name);
9307
9308        ret = PTR_ERR_OR_ZERO(tr);
9309
9310out_unlock:
9311        mutex_unlock(&trace_types_lock);
9312        mutex_unlock(&event_mutex);
9313        return ret;
9314}
9315
9316/**
9317 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9318 * @name: The name of the trace array to be looked up/created.
9319 *
9320 * Returns pointer to trace array with given name.
9321 * NULL, if it cannot be created.
9322 *
9323 * NOTE: This function increments the reference counter associated with the
9324 * trace array returned. This makes sure it cannot be freed while in use.
9325 * Use trace_array_put() once the trace array is no longer needed.
9326 * If the trace_array is to be freed, trace_array_destroy() needs to
9327 * be called after the trace_array_put(), or simply let user space delete
9328 * it from the tracefs instances directory. But until the
9329 * trace_array_put() is called, user space can not delete it.
9330 *
9331 */
9332struct trace_array *trace_array_get_by_name(const char *name)
9333{
9334        struct trace_array *tr;
9335
9336        mutex_lock(&event_mutex);
9337        mutex_lock(&trace_types_lock);
9338
9339        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9340                if (tr->name && strcmp(tr->name, name) == 0)
9341                        goto out_unlock;
9342        }
9343
9344        tr = trace_array_create(name);
9345
9346        if (IS_ERR(tr))
9347                tr = NULL;
9348out_unlock:
9349        if (tr)
9350                tr->ref++;
9351
9352        mutex_unlock(&trace_types_lock);
9353        mutex_unlock(&event_mutex);
9354        return tr;
9355}
9356EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9357
9358static int __remove_instance(struct trace_array *tr)
9359{
9360        int i;
9361
9362        /* Reference counter for a newly created trace array = 1. */
9363        if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9364                return -EBUSY;
9365
9366        list_del(&tr->list);
9367
9368        /* Disable all the flags that were enabled coming in */
9369        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9370                if ((1 << i) & ZEROED_TRACE_FLAGS)
9371                        set_tracer_flag(tr, 1 << i, 0);
9372        }
9373
9374        tracing_set_nop(tr);
9375        clear_ftrace_function_probes(tr);
9376        event_trace_del_tracer(tr);
9377        ftrace_clear_pids(tr);
9378        ftrace_destroy_function_files(tr);
9379        tracefs_remove(tr->dir);
9380        free_percpu(tr->last_func_repeats);
9381        free_trace_buffers(tr);
9382
9383        for (i = 0; i < tr->nr_topts; i++) {
9384                kfree(tr->topts[i].topts);
9385        }
9386        kfree(tr->topts);
9387
9388        free_cpumask_var(tr->tracing_cpumask);
9389        kfree(tr->name);
9390        kfree(tr);
9391
9392        return 0;
9393}
9394
9395int trace_array_destroy(struct trace_array *this_tr)
9396{
9397        struct trace_array *tr;
9398        int ret;
9399
9400        if (!this_tr)
9401                return -EINVAL;
9402
9403        mutex_lock(&event_mutex);
9404        mutex_lock(&trace_types_lock);
9405
9406        ret = -ENODEV;
9407
9408        /* Making sure trace array exists before destroying it. */
9409        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9410                if (tr == this_tr) {
9411                        ret = __remove_instance(tr);
9412                        break;
9413                }
9414        }
9415
9416        mutex_unlock(&trace_types_lock);
9417        mutex_unlock(&event_mutex);
9418
9419        return ret;
9420}
9421EXPORT_SYMBOL_GPL(trace_array_destroy);
9422
9423static int instance_rmdir(const char *name)
9424{
9425        struct trace_array *tr;
9426        int ret;
9427
9428        mutex_lock(&event_mutex);
9429        mutex_lock(&trace_types_lock);
9430
9431        ret = -ENODEV;
9432        tr = trace_array_find(name);
9433        if (tr)
9434                ret = __remove_instance(tr);
9435
9436        mutex_unlock(&trace_types_lock);
9437        mutex_unlock(&event_mutex);
9438
9439        return ret;
9440}
9441
9442static __init void create_trace_instances(struct dentry *d_tracer)
9443{
9444        struct trace_array *tr;
9445
9446        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9447                                                         instance_mkdir,
9448                                                         instance_rmdir);
9449        if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9450                return;
9451
9452        mutex_lock(&event_mutex);
9453        mutex_lock(&trace_types_lock);
9454
9455        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9456                if (!tr->name)
9457                        continue;
9458                if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9459                             "Failed to create instance directory\n"))
9460                        break;
9461        }
9462
9463        mutex_unlock(&trace_types_lock);
9464        mutex_unlock(&event_mutex);
9465}
9466
9467static void
9468init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9469{
9470        struct trace_event_file *file;
9471        int cpu;
9472
9473        trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9474                        tr, &show_traces_fops);
9475
9476        trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9477                        tr, &set_tracer_fops);
9478
9479        trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9480                          tr, &tracing_cpumask_fops);
9481
9482        trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9483                          tr, &tracing_iter_fops);
9484
9485        trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9486                          tr, &tracing_fops);
9487
9488        trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9489                          tr, &tracing_pipe_fops);
9490
9491        trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9492                          tr, &tracing_entries_fops);
9493
9494        trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9495                          tr, &tracing_total_entries_fops);
9496
9497        trace_create_file("free_buffer", 0200, d_tracer,
9498                          tr, &tracing_free_buffer_fops);
9499
9500        trace_create_file("trace_marker", 0220, d_tracer,
9501                          tr, &tracing_mark_fops);
9502
9503        file = __find_event_file(tr, "ftrace", "print");
9504        if (file && file->dir)
9505                trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9506                                  file, &event_trigger_fops);
9507        tr->trace_marker_file = file;
9508
9509        trace_create_file("trace_marker_raw", 0220, d_tracer,
9510                          tr, &tracing_mark_raw_fops);
9511
9512        trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9513                          &trace_clock_fops);
9514
9515        trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9516                          tr, &rb_simple_fops);
9517
9518        trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9519                          &trace_time_stamp_mode_fops);
9520
9521        tr->buffer_percent = 50;
9522
9523        trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9524                        tr, &buffer_percent_fops);
9525
9526        create_trace_options_dir(tr);
9527
9528        trace_create_maxlat_file(tr, d_tracer);
9529
9530        if (ftrace_create_function_files(tr, d_tracer))
9531                MEM_FAIL(1, "Could not allocate function filter files");
9532
9533#ifdef CONFIG_TRACER_SNAPSHOT
9534        trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9535                          tr, &snapshot_fops);
9536#endif
9537
9538        trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9539                          tr, &tracing_err_log_fops);
9540
9541        for_each_tracing_cpu(cpu)
9542                tracing_init_tracefs_percpu(tr, cpu);
9543
9544        ftrace_init_tracefs(tr, d_tracer);
9545}
9546
9547static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9548{
9549        struct vfsmount *mnt;
9550        struct file_system_type *type;
9551
9552        /*
9553         * To maintain backward compatibility for tools that mount
9554         * debugfs to get to the tracing facility, tracefs is automatically
9555         * mounted to the debugfs/tracing directory.
9556         */
9557        type = get_fs_type("tracefs");
9558        if (!type)
9559                return NULL;
9560        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9561        put_filesystem(type);
9562        if (IS_ERR(mnt))
9563                return NULL;
9564        mntget(mnt);
9565
9566        return mnt;
9567}
9568
9569/**
9570 * tracing_init_dentry - initialize top level trace array
9571 *
9572 * This is called when creating files or directories in the tracing
9573 * directory. It is called via fs_initcall() by any of the boot up code
9574 * and expects to return the dentry of the top level tracing directory.
9575 */
9576int tracing_init_dentry(void)
9577{
9578        struct trace_array *tr = &global_trace;
9579
9580        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9581                pr_warn("Tracing disabled due to lockdown\n");
9582                return -EPERM;
9583        }
9584
9585        /* The top level trace array uses  NULL as parent */
9586        if (tr->dir)
9587                return 0;
9588
9589        if (WARN_ON(!tracefs_initialized()))
9590                return -ENODEV;
9591
9592        /*
9593         * As there may still be users that expect the tracing
9594         * files to exist in debugfs/tracing, we must automount
9595         * the tracefs file system there, so older tools still
9596         * work with the newer kernel.
9597         */
9598        tr->dir = debugfs_create_automount("tracing", NULL,
9599                                           trace_automount, NULL);
9600
9601        return 0;
9602}
9603
9604extern struct trace_eval_map *__start_ftrace_eval_maps[];
9605extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9606
9607static struct workqueue_struct *eval_map_wq __initdata;
9608static struct work_struct eval_map_work __initdata;
9609static struct work_struct tracerfs_init_work __initdata;
9610
9611static void __init eval_map_work_func(struct work_struct *work)
9612{
9613        int len;
9614
9615        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9616        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9617}
9618
9619static int __init trace_eval_init(void)
9620{
9621        INIT_WORK(&eval_map_work, eval_map_work_func);
9622
9623        eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9624        if (!eval_map_wq) {
9625                pr_err("Unable to allocate eval_map_wq\n");
9626                /* Do work here */
9627                eval_map_work_func(&eval_map_work);
9628                return -ENOMEM;
9629        }
9630
9631        queue_work(eval_map_wq, &eval_map_work);
9632        return 0;
9633}
9634
9635subsys_initcall(trace_eval_init);
9636
9637static int __init trace_eval_sync(void)
9638{
9639        /* Make sure the eval map updates are finished */
9640        if (eval_map_wq)
9641                destroy_workqueue(eval_map_wq);
9642        return 0;
9643}
9644
9645late_initcall_sync(trace_eval_sync);
9646
9647
9648#ifdef CONFIG_MODULES
9649static void trace_module_add_evals(struct module *mod)
9650{
9651        if (!mod->num_trace_evals)
9652                return;
9653
9654        /*
9655         * Modules with bad taint do not have events created, do
9656         * not bother with enums either.
9657         */
9658        if (trace_module_has_bad_taint(mod))
9659                return;
9660
9661        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9662}
9663
9664#ifdef CONFIG_TRACE_EVAL_MAP_FILE
9665static void trace_module_remove_evals(struct module *mod)
9666{
9667        union trace_eval_map_item *map;
9668        union trace_eval_map_item **last = &trace_eval_maps;
9669
9670        if (!mod->num_trace_evals)
9671                return;
9672
9673        mutex_lock(&trace_eval_mutex);
9674
9675        map = trace_eval_maps;
9676
9677        while (map) {
9678                if (map->head.mod == mod)
9679                        break;
9680                map = trace_eval_jmp_to_tail(map);
9681                last = &map->tail.next;
9682                map = map->tail.next;
9683        }
9684        if (!map)
9685                goto out;
9686
9687        *last = trace_eval_jmp_to_tail(map)->tail.next;
9688        kfree(map);
9689 out:
9690        mutex_unlock(&trace_eval_mutex);
9691}
9692#else
9693static inline void trace_module_remove_evals(struct module *mod) { }
9694#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9695
9696static int trace_module_notify(struct notifier_block *self,
9697                               unsigned long val, void *data)
9698{
9699        struct module *mod = data;
9700
9701        switch (val) {
9702        case MODULE_STATE_COMING:
9703                trace_module_add_evals(mod);
9704                break;
9705        case MODULE_STATE_GOING:
9706                trace_module_remove_evals(mod);
9707                break;
9708        }
9709
9710        return NOTIFY_OK;
9711}
9712
9713static struct notifier_block trace_module_nb = {
9714        .notifier_call = trace_module_notify,
9715        .priority = 0,
9716};
9717#endif /* CONFIG_MODULES */
9718
9719static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9720{
9721
9722        event_trace_init();
9723
9724        init_tracer_tracefs(&global_trace, NULL);
9725        ftrace_init_tracefs_toplevel(&global_trace, NULL);
9726
9727        trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9728                        &global_trace, &tracing_thresh_fops);
9729
9730        trace_create_file("README", TRACE_MODE_READ, NULL,
9731                        NULL, &tracing_readme_fops);
9732
9733        trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9734                        NULL, &tracing_saved_cmdlines_fops);
9735
9736        trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9737                          NULL, &tracing_saved_cmdlines_size_fops);
9738
9739        trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9740                        NULL, &tracing_saved_tgids_fops);
9741
9742        trace_create_eval_file(NULL);
9743
9744#ifdef CONFIG_MODULES
9745        register_module_notifier(&trace_module_nb);
9746#endif
9747
9748#ifdef CONFIG_DYNAMIC_FTRACE
9749        trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9750                        NULL, &tracing_dyn_info_fops);
9751#endif
9752
9753        create_trace_instances(NULL);
9754
9755        update_tracer_options(&global_trace);
9756}
9757
9758static __init int tracer_init_tracefs(void)
9759{
9760        int ret;
9761
9762        trace_access_lock_init();
9763
9764        ret = tracing_init_dentry();
9765        if (ret)
9766                return 0;
9767
9768        if (eval_map_wq) {
9769                INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9770                queue_work(eval_map_wq, &tracerfs_init_work);
9771        } else {
9772                tracer_init_tracefs_work_func(NULL);
9773        }
9774
9775        return 0;
9776}
9777
9778fs_initcall(tracer_init_tracefs);
9779
9780static int trace_panic_handler(struct notifier_block *this,
9781                               unsigned long event, void *unused)
9782{
9783        if (ftrace_dump_on_oops)
9784                ftrace_dump(ftrace_dump_on_oops);
9785        return NOTIFY_OK;
9786}
9787
9788static struct notifier_block trace_panic_notifier = {
9789        .notifier_call  = trace_panic_handler,
9790        .next           = NULL,
9791        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9792};
9793
9794static int trace_die_handler(struct notifier_block *self,
9795                             unsigned long val,
9796                             void *data)
9797{
9798        switch (val) {
9799        case DIE_OOPS:
9800                if (ftrace_dump_on_oops)
9801                        ftrace_dump(ftrace_dump_on_oops);
9802                break;
9803        default:
9804                break;
9805        }
9806        return NOTIFY_OK;
9807}
9808
9809static struct notifier_block trace_die_notifier = {
9810        .notifier_call = trace_die_handler,
9811        .priority = 200
9812};
9813
9814/*
9815 * printk is set to max of 1024, we really don't need it that big.
9816 * Nothing should be printing 1000 characters anyway.
9817 */
9818#define TRACE_MAX_PRINT         1000
9819
9820/*
9821 * Define here KERN_TRACE so that we have one place to modify
9822 * it if we decide to change what log level the ftrace dump
9823 * should be at.
9824 */
9825#define KERN_TRACE              KERN_EMERG
9826
9827void
9828trace_printk_seq(struct trace_seq *s)
9829{
9830        /* Probably should print a warning here. */
9831        if (s->seq.len >= TRACE_MAX_PRINT)
9832                s->seq.len = TRACE_MAX_PRINT;
9833
9834        /*
9835         * More paranoid code. Although the buffer size is set to
9836         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9837         * an extra layer of protection.
9838         */
9839        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9840                s->seq.len = s->seq.size - 1;
9841
9842        /* should be zero ended, but we are paranoid. */
9843        s->buffer[s->seq.len] = 0;
9844
9845        printk(KERN_TRACE "%s", s->buffer);
9846
9847        trace_seq_init(s);
9848}
9849
9850void trace_init_global_iter(struct trace_iterator *iter)
9851{
9852        iter->tr = &global_trace;
9853        iter->trace = iter->tr->current_trace;
9854        iter->cpu_file = RING_BUFFER_ALL_CPUS;
9855        iter->array_buffer = &global_trace.array_buffer;
9856
9857        if (iter->trace && iter->trace->open)
9858                iter->trace->open(iter);
9859
9860        /* Annotate start of buffers if we had overruns */
9861        if (ring_buffer_overruns(iter->array_buffer->buffer))
9862                iter->iter_flags |= TRACE_FILE_ANNOTATE;
9863
9864        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9865        if (trace_clocks[iter->tr->clock_id].in_ns)
9866                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9867
9868        /* Can not use kmalloc for iter.temp and iter.fmt */
9869        iter->temp = static_temp_buf;
9870        iter->temp_size = STATIC_TEMP_BUF_SIZE;
9871        iter->fmt = static_fmt_buf;
9872        iter->fmt_size = STATIC_FMT_BUF_SIZE;
9873}
9874
9875void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9876{
9877        /* use static because iter can be a bit big for the stack */
9878        static struct trace_iterator iter;
9879        static atomic_t dump_running;
9880        struct trace_array *tr = &global_trace;
9881        unsigned int old_userobj;
9882        unsigned long flags;
9883        int cnt = 0, cpu;
9884
9885        /* Only allow one dump user at a time. */
9886        if (atomic_inc_return(&dump_running) != 1) {
9887                atomic_dec(&dump_running);
9888                return;
9889        }
9890
9891        /*
9892         * Always turn off tracing when we dump.
9893         * We don't need to show trace output of what happens
9894         * between multiple crashes.
9895         *
9896         * If the user does a sysrq-z, then they can re-enable
9897         * tracing with echo 1 > tracing_on.
9898         */
9899        tracing_off();
9900
9901        local_irq_save(flags);
9902
9903        /* Simulate the iterator */
9904        trace_init_global_iter(&iter);
9905
9906        for_each_tracing_cpu(cpu) {
9907                atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9908        }
9909
9910        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9911
9912        /* don't look at user memory in panic mode */
9913        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9914
9915        switch (oops_dump_mode) {
9916        case DUMP_ALL:
9917                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9918                break;
9919        case DUMP_ORIG:
9920                iter.cpu_file = raw_smp_processor_id();
9921                break;
9922        case DUMP_NONE:
9923                goto out_enable;
9924        default:
9925                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9926                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9927        }
9928
9929        printk(KERN_TRACE "Dumping ftrace buffer:\n");
9930
9931        /* Did function tracer already get disabled? */
9932        if (ftrace_is_dead()) {
9933                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9934                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9935        }
9936
9937        /*
9938         * We need to stop all tracing on all CPUS to read
9939         * the next buffer. This is a bit expensive, but is
9940         * not done often. We fill all what we can read,
9941         * and then release the locks again.
9942         */
9943
9944        while (!trace_empty(&iter)) {
9945
9946                if (!cnt)
9947                        printk(KERN_TRACE "---------------------------------\n");
9948
9949                cnt++;
9950
9951                trace_iterator_reset(&iter);
9952                iter.iter_flags |= TRACE_FILE_LAT_FMT;
9953
9954                if (trace_find_next_entry_inc(&iter) != NULL) {
9955                        int ret;
9956
9957                        ret = print_trace_line(&iter);
9958                        if (ret != TRACE_TYPE_NO_CONSUME)
9959                                trace_consume(&iter);
9960                }
9961                touch_nmi_watchdog();
9962
9963                trace_printk_seq(&iter.seq);
9964        }
9965
9966        if (!cnt)
9967                printk(KERN_TRACE "   (ftrace buffer empty)\n");
9968        else
9969                printk(KERN_TRACE "---------------------------------\n");
9970
9971 out_enable:
9972        tr->trace_flags |= old_userobj;
9973
9974        for_each_tracing_cpu(cpu) {
9975                atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9976        }
9977        atomic_dec(&dump_running);
9978        local_irq_restore(flags);
9979}
9980EXPORT_SYMBOL_GPL(ftrace_dump);
9981
9982#define WRITE_BUFSIZE  4096
9983
9984ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9985                                size_t count, loff_t *ppos,
9986                                int (*createfn)(const char *))
9987{
9988        char *kbuf, *buf, *tmp;
9989        int ret = 0;
9990        size_t done = 0;
9991        size_t size;
9992
9993        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9994        if (!kbuf)
9995                return -ENOMEM;
9996
9997        while (done < count) {
9998                size = count - done;
9999
10000                if (size >= WRITE_BUFSIZE)
10001                        size = WRITE_BUFSIZE - 1;
10002
10003                if (copy_from_user(kbuf, buffer + done, size)) {
10004                        ret = -EFAULT;
10005                        goto out;
10006                }
10007                kbuf[size] = '\0';
10008                buf = kbuf;
10009                do {
10010                        tmp = strchr(buf, '\n');
10011                        if (tmp) {
10012                                *tmp = '\0';
10013                                size = tmp - buf + 1;
10014                        } else {
10015                                size = strlen(buf);
10016                                if (done + size < count) {
10017                                        if (buf != kbuf)
10018                                                break;
10019                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10020                                        pr_warn("Line length is too long: Should be less than %d\n",
10021                                                WRITE_BUFSIZE - 2);
10022                                        ret = -EINVAL;
10023                                        goto out;
10024                                }
10025                        }
10026                        done += size;
10027
10028                        /* Remove comments */
10029                        tmp = strchr(buf, '#');
10030
10031                        if (tmp)
10032                                *tmp = '\0';
10033
10034                        ret = createfn(buf);
10035                        if (ret)
10036                                goto out;
10037                        buf += size;
10038
10039                } while (done < count);
10040        }
10041        ret = done;
10042
10043out:
10044        kfree(kbuf);
10045
10046        return ret;
10047}
10048
10049__init static int tracer_alloc_buffers(void)
10050{
10051        int ring_buf_size;
10052        int ret = -ENOMEM;
10053
10054
10055        if (security_locked_down(LOCKDOWN_TRACEFS)) {
10056                pr_warn("Tracing disabled due to lockdown\n");
10057                return -EPERM;
10058        }
10059
10060        /*
10061         * Make sure we don't accidentally add more trace options
10062         * than we have bits for.
10063         */
10064        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10065
10066        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10067                goto out;
10068
10069        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10070                goto out_free_buffer_mask;
10071
10072        /* Only allocate trace_printk buffers if a trace_printk exists */
10073        if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10074                /* Must be called before global_trace.buffer is allocated */
10075                trace_printk_init_buffers();
10076
10077        /* To save memory, keep the ring buffer size to its minimum */
10078        if (ring_buffer_expanded)
10079                ring_buf_size = trace_buf_size;
10080        else
10081                ring_buf_size = 1;
10082
10083        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10084        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10085
10086        raw_spin_lock_init(&global_trace.start_lock);
10087
10088        /*
10089         * The prepare callbacks allocates some memory for the ring buffer. We
10090         * don't free the buffer if the CPU goes down. If we were to free
10091         * the buffer, then the user would lose any trace that was in the
10092         * buffer. The memory will be removed once the "instance" is removed.
10093         */
10094        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10095                                      "trace/RB:preapre", trace_rb_cpu_prepare,
10096                                      NULL);
10097        if (ret < 0)
10098                goto out_free_cpumask;
10099        /* Used for event triggers */
10100        ret = -ENOMEM;
10101        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10102        if (!temp_buffer)
10103                goto out_rm_hp_state;
10104
10105        if (trace_create_savedcmd() < 0)
10106                goto out_free_temp_buffer;
10107
10108        /* TODO: make the number of buffers hot pluggable with CPUS */
10109        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10110                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10111                goto out_free_savedcmd;
10112        }
10113
10114        if (global_trace.buffer_disabled)
10115                tracing_off();
10116
10117        if (trace_boot_clock) {
10118                ret = tracing_set_clock(&global_trace, trace_boot_clock);
10119                if (ret < 0)
10120                        pr_warn("Trace clock %s not defined, going back to default\n",
10121                                trace_boot_clock);
10122        }
10123
10124        /*
10125         * register_tracer() might reference current_trace, so it
10126         * needs to be set before we register anything. This is
10127         * just a bootstrap of current_trace anyway.
10128         */
10129        global_trace.current_trace = &nop_trace;
10130
10131        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10132
10133        ftrace_init_global_array_ops(&global_trace);
10134
10135        init_trace_flags_index(&global_trace);
10136
10137        register_tracer(&nop_trace);
10138
10139        /* Function tracing may start here (via kernel command line) */
10140        init_function_trace();
10141
10142        /* All seems OK, enable tracing */
10143        tracing_disabled = 0;
10144
10145        atomic_notifier_chain_register(&panic_notifier_list,
10146                                       &trace_panic_notifier);
10147
10148        register_die_notifier(&trace_die_notifier);
10149
10150        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10151
10152        INIT_LIST_HEAD(&global_trace.systems);
10153        INIT_LIST_HEAD(&global_trace.events);
10154        INIT_LIST_HEAD(&global_trace.hist_vars);
10155        INIT_LIST_HEAD(&global_trace.err_log);
10156        list_add(&global_trace.list, &ftrace_trace_arrays);
10157
10158        apply_trace_boot_options();
10159
10160        register_snapshot_cmd();
10161
10162        test_can_verify();
10163
10164        return 0;
10165
10166out_free_savedcmd:
10167        free_saved_cmdlines_buffer(savedcmd);
10168out_free_temp_buffer:
10169        ring_buffer_free(temp_buffer);
10170out_rm_hp_state:
10171        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10172out_free_cpumask:
10173        free_cpumask_var(global_trace.tracing_cpumask);
10174out_free_buffer_mask:
10175        free_cpumask_var(tracing_buffer_mask);
10176out:
10177        return ret;
10178}
10179
10180void __init ftrace_boot_snapshot(void)
10181{
10182        if (snapshot_at_boot) {
10183                tracing_snapshot();
10184                internal_trace_puts("** Boot snapshot taken **\n");
10185        }
10186}
10187
10188void __init early_trace_init(void)
10189{
10190        if (tracepoint_printk) {
10191                tracepoint_print_iter =
10192                        kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10193                if (MEM_FAIL(!tracepoint_print_iter,
10194                             "Failed to allocate trace iterator\n"))
10195                        tracepoint_printk = 0;
10196                else
10197                        static_key_enable(&tracepoint_printk_key.key);
10198        }
10199        tracer_alloc_buffers();
10200}
10201
10202void __init trace_init(void)
10203{
10204        trace_event_init();
10205}
10206
10207__init static void clear_boot_tracer(void)
10208{
10209        /*
10210         * The default tracer at boot buffer is an init section.
10211         * This function is called in lateinit. If we did not
10212         * find the boot tracer, then clear it out, to prevent
10213         * later registration from accessing the buffer that is
10214         * about to be freed.
10215         */
10216        if (!default_bootup_tracer)
10217                return;
10218
10219        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10220               default_bootup_tracer);
10221        default_bootup_tracer = NULL;
10222}
10223
10224#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10225__init static void tracing_set_default_clock(void)
10226{
10227        /* sched_clock_stable() is determined in late_initcall */
10228        if (!trace_boot_clock && !sched_clock_stable()) {
10229                if (security_locked_down(LOCKDOWN_TRACEFS)) {
10230                        pr_warn("Can not set tracing clock due to lockdown\n");
10231                        return;
10232                }
10233
10234                printk(KERN_WARNING
10235                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10236                       "If you want to keep using the local clock, then add:\n"
10237                       "  \"trace_clock=local\"\n"
10238                       "on the kernel command line\n");
10239                tracing_set_clock(&global_trace, "global");
10240        }
10241}
10242#else
10243static inline void tracing_set_default_clock(void) { }
10244#endif
10245
10246__init static int late_trace_init(void)
10247{
10248        if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10249                static_key_disable(&tracepoint_printk_key.key);
10250                tracepoint_printk = 0;
10251        }
10252
10253        tracing_set_default_clock();
10254        clear_boot_tracer();
10255        return 0;
10256}
10257
10258late_initcall_sync(late_trace_init);
10259