linux/kernel/trace/trace.c
<<
>>
Prefs
   1/*
   2 * ring buffer based function tracer
   3 *
   4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   6 *
   7 * Originally taken from the RT patch by:
   8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
   9 *
  10 * Based on code from the latency_tracer, that is:
  11 *  Copyright (C) 2004-2006 Ingo Molnar
  12 *  Copyright (C) 2004 Nadia Yvette Chambers
  13 */
  14#include <linux/ring_buffer.h>
  15#include <generated/utsrelease.h>
  16#include <linux/stacktrace.h>
  17#include <linux/writeback.h>
  18#include <linux/kallsyms.h>
  19#include <linux/seq_file.h>
  20#include <linux/notifier.h>
  21#include <linux/irqflags.h>
  22#include <linux/debugfs.h>
  23#include <linux/tracefs.h>
  24#include <linux/pagemap.h>
  25#include <linux/hardirq.h>
  26#include <linux/linkage.h>
  27#include <linux/uaccess.h>
  28#include <linux/vmalloc.h>
  29#include <linux/ftrace.h>
  30#include <linux/module.h>
  31#include <linux/percpu.h>
  32#include <linux/splice.h>
  33#include <linux/kdebug.h>
  34#include <linux/string.h>
  35#include <linux/mount.h>
  36#include <linux/rwsem.h>
  37#include <linux/slab.h>
  38#include <linux/ctype.h>
  39#include <linux/init.h>
  40#include <linux/poll.h>
  41#include <linux/nmi.h>
  42#include <linux/fs.h>
  43#include <linux/trace.h>
  44#include <linux/sched/clock.h>
  45#include <linux/sched/rt.h>
  46
  47#include "trace.h"
  48#include "trace_output.h"
  49
  50/*
  51 * On boot up, the ring buffer is set to the minimum size, so that
  52 * we do not waste memory on systems that are not using tracing.
  53 */
  54bool ring_buffer_expanded;
  55
  56/*
  57 * We need to change this state when a selftest is running.
  58 * A selftest will lurk into the ring-buffer to count the
  59 * entries inserted during the selftest although some concurrent
  60 * insertions into the ring-buffer such as trace_printk could occurred
  61 * at the same time, giving false positive or negative results.
  62 */
  63static bool __read_mostly tracing_selftest_running;
  64
  65/*
  66 * If a tracer is running, we do not want to run SELFTEST.
  67 */
  68bool __read_mostly tracing_selftest_disabled;
  69
  70/* Pipe tracepoints to printk */
  71struct trace_iterator *tracepoint_print_iter;
  72int tracepoint_printk;
  73static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  74
  75/* For tracers that don't implement custom flags */
  76static struct tracer_opt dummy_tracer_opt[] = {
  77        { }
  78};
  79
  80static int
  81dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  82{
  83        return 0;
  84}
  85
  86/*
  87 * To prevent the comm cache from being overwritten when no
  88 * tracing is active, only save the comm when a trace event
  89 * occurred.
  90 */
  91static DEFINE_PER_CPU(bool, trace_taskinfo_save);
  92
  93/*
  94 * Kill all tracing for good (never come back).
  95 * It is initialized to 1 but will turn to zero if the initialization
  96 * of the tracer is successful. But that is the only place that sets
  97 * this back to zero.
  98 */
  99static int tracing_disabled = 1;
 100
 101cpumask_var_t __read_mostly     tracing_buffer_mask;
 102
 103/*
 104 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 105 *
 106 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 107 * is set, then ftrace_dump is called. This will output the contents
 108 * of the ftrace buffers to the console.  This is very useful for
 109 * capturing traces that lead to crashes and outputing it to a
 110 * serial console.
 111 *
 112 * It is default off, but you can enable it with either specifying
 113 * "ftrace_dump_on_oops" in the kernel command line, or setting
 114 * /proc/sys/kernel/ftrace_dump_on_oops
 115 * Set 1 if you want to dump buffers of all CPUs
 116 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 117 */
 118
 119enum ftrace_dump_mode ftrace_dump_on_oops;
 120
 121/* When set, tracing will stop when a WARN*() is hit */
 122int __disable_trace_on_warning;
 123
 124#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 125/* Map of enums to their values, for "eval_map" file */
 126struct trace_eval_map_head {
 127        struct module                   *mod;
 128        unsigned long                   length;
 129};
 130
 131union trace_eval_map_item;
 132
 133struct trace_eval_map_tail {
 134        /*
 135         * "end" is first and points to NULL as it must be different
 136         * than "mod" or "eval_string"
 137         */
 138        union trace_eval_map_item       *next;
 139        const char                      *end;   /* points to NULL */
 140};
 141
 142static DEFINE_MUTEX(trace_eval_mutex);
 143
 144/*
 145 * The trace_eval_maps are saved in an array with two extra elements,
 146 * one at the beginning, and one at the end. The beginning item contains
 147 * the count of the saved maps (head.length), and the module they
 148 * belong to if not built in (head.mod). The ending item contains a
 149 * pointer to the next array of saved eval_map items.
 150 */
 151union trace_eval_map_item {
 152        struct trace_eval_map           map;
 153        struct trace_eval_map_head      head;
 154        struct trace_eval_map_tail      tail;
 155};
 156
 157static union trace_eval_map_item *trace_eval_maps;
 158#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 159
 160static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 161
 162#define MAX_TRACER_SIZE         100
 163static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 164static char *default_bootup_tracer;
 165
 166static bool allocate_snapshot;
 167
 168static int __init set_cmdline_ftrace(char *str)
 169{
 170        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 171        default_bootup_tracer = bootup_tracer_buf;
 172        /* We are using ftrace early, expand it */
 173        ring_buffer_expanded = true;
 174        return 1;
 175}
 176__setup("ftrace=", set_cmdline_ftrace);
 177
 178static int __init set_ftrace_dump_on_oops(char *str)
 179{
 180        if (*str++ != '=' || !*str) {
 181                ftrace_dump_on_oops = DUMP_ALL;
 182                return 1;
 183        }
 184
 185        if (!strcmp("orig_cpu", str)) {
 186                ftrace_dump_on_oops = DUMP_ORIG;
 187                return 1;
 188        }
 189
 190        return 0;
 191}
 192__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 193
 194static int __init stop_trace_on_warning(char *str)
 195{
 196        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 197                __disable_trace_on_warning = 1;
 198        return 1;
 199}
 200__setup("traceoff_on_warning", stop_trace_on_warning);
 201
 202static int __init boot_alloc_snapshot(char *str)
 203{
 204        allocate_snapshot = true;
 205        /* We also need the main ring buffer expanded */
 206        ring_buffer_expanded = true;
 207        return 1;
 208}
 209__setup("alloc_snapshot", boot_alloc_snapshot);
 210
 211
 212static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 213
 214static int __init set_trace_boot_options(char *str)
 215{
 216        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 217        return 0;
 218}
 219__setup("trace_options=", set_trace_boot_options);
 220
 221static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 222static char *trace_boot_clock __initdata;
 223
 224static int __init set_trace_boot_clock(char *str)
 225{
 226        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 227        trace_boot_clock = trace_boot_clock_buf;
 228        return 0;
 229}
 230__setup("trace_clock=", set_trace_boot_clock);
 231
 232static int __init set_tracepoint_printk(char *str)
 233{
 234        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 235                tracepoint_printk = 1;
 236        return 1;
 237}
 238__setup("tp_printk", set_tracepoint_printk);
 239
 240unsigned long long ns2usecs(u64 nsec)
 241{
 242        nsec += 500;
 243        do_div(nsec, 1000);
 244        return nsec;
 245}
 246
 247/* trace_flags holds trace_options default values */
 248#define TRACE_DEFAULT_FLAGS                                             \
 249        (FUNCTION_DEFAULT_FLAGS |                                       \
 250         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 251         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 252         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 253         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
 254
 255/* trace_options that are only supported by global_trace */
 256#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 257               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 258
 259/* trace_flags that are default zero for instances */
 260#define ZEROED_TRACE_FLAGS \
 261        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 262
 263/*
 264 * The global_trace is the descriptor that holds the top-level tracing
 265 * buffers for the live tracing.
 266 */
 267static struct trace_array global_trace = {
 268        .trace_flags = TRACE_DEFAULT_FLAGS,
 269};
 270
 271LIST_HEAD(ftrace_trace_arrays);
 272
 273int trace_array_get(struct trace_array *this_tr)
 274{
 275        struct trace_array *tr;
 276        int ret = -ENODEV;
 277
 278        mutex_lock(&trace_types_lock);
 279        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 280                if (tr == this_tr) {
 281                        tr->ref++;
 282                        ret = 0;
 283                        break;
 284                }
 285        }
 286        mutex_unlock(&trace_types_lock);
 287
 288        return ret;
 289}
 290
 291static void __trace_array_put(struct trace_array *this_tr)
 292{
 293        WARN_ON(!this_tr->ref);
 294        this_tr->ref--;
 295}
 296
 297void trace_array_put(struct trace_array *this_tr)
 298{
 299        mutex_lock(&trace_types_lock);
 300        __trace_array_put(this_tr);
 301        mutex_unlock(&trace_types_lock);
 302}
 303
 304int call_filter_check_discard(struct trace_event_call *call, void *rec,
 305                              struct ring_buffer *buffer,
 306                              struct ring_buffer_event *event)
 307{
 308        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 309            !filter_match_preds(call->filter, rec)) {
 310                __trace_event_discard_commit(buffer, event);
 311                return 1;
 312        }
 313
 314        return 0;
 315}
 316
 317void trace_free_pid_list(struct trace_pid_list *pid_list)
 318{
 319        vfree(pid_list->pids);
 320        kfree(pid_list);
 321}
 322
 323/**
 324 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 325 * @filtered_pids: The list of pids to check
 326 * @search_pid: The PID to find in @filtered_pids
 327 *
 328 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
 329 */
 330bool
 331trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 332{
 333        /*
 334         * If pid_max changed after filtered_pids was created, we
 335         * by default ignore all pids greater than the previous pid_max.
 336         */
 337        if (search_pid >= filtered_pids->pid_max)
 338                return false;
 339
 340        return test_bit(search_pid, filtered_pids->pids);
 341}
 342
 343/**
 344 * trace_ignore_this_task - should a task be ignored for tracing
 345 * @filtered_pids: The list of pids to check
 346 * @task: The task that should be ignored if not filtered
 347 *
 348 * Checks if @task should be traced or not from @filtered_pids.
 349 * Returns true if @task should *NOT* be traced.
 350 * Returns false if @task should be traced.
 351 */
 352bool
 353trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 354{
 355        /*
 356         * Return false, because if filtered_pids does not exist,
 357         * all pids are good to trace.
 358         */
 359        if (!filtered_pids)
 360                return false;
 361
 362        return !trace_find_filtered_pid(filtered_pids, task->pid);
 363}
 364
 365/**
 366 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
 367 * @pid_list: The list to modify
 368 * @self: The current task for fork or NULL for exit
 369 * @task: The task to add or remove
 370 *
 371 * If adding a task, if @self is defined, the task is only added if @self
 372 * is also included in @pid_list. This happens on fork and tasks should
 373 * only be added when the parent is listed. If @self is NULL, then the
 374 * @task pid will be removed from the list, which would happen on exit
 375 * of a task.
 376 */
 377void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 378                                  struct task_struct *self,
 379                                  struct task_struct *task)
 380{
 381        if (!pid_list)
 382                return;
 383
 384        /* For forks, we only add if the forking task is listed */
 385        if (self) {
 386                if (!trace_find_filtered_pid(pid_list, self->pid))
 387                        return;
 388        }
 389
 390        /* Sorry, but we don't support pid_max changing after setting */
 391        if (task->pid >= pid_list->pid_max)
 392                return;
 393
 394        /* "self" is set for forks, and NULL for exits */
 395        if (self)
 396                set_bit(task->pid, pid_list->pids);
 397        else
 398                clear_bit(task->pid, pid_list->pids);
 399}
 400
 401/**
 402 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 403 * @pid_list: The pid list to show
 404 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 405 * @pos: The position of the file
 406 *
 407 * This is used by the seq_file "next" operation to iterate the pids
 408 * listed in a trace_pid_list structure.
 409 *
 410 * Returns the pid+1 as we want to display pid of zero, but NULL would
 411 * stop the iteration.
 412 */
 413void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 414{
 415        unsigned long pid = (unsigned long)v;
 416
 417        (*pos)++;
 418
 419        /* pid already is +1 of the actual prevous bit */
 420        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 421
 422        /* Return pid + 1 to allow zero to be represented */
 423        if (pid < pid_list->pid_max)
 424                return (void *)(pid + 1);
 425
 426        return NULL;
 427}
 428
 429/**
 430 * trace_pid_start - Used for seq_file to start reading pid lists
 431 * @pid_list: The pid list to show
 432 * @pos: The position of the file
 433 *
 434 * This is used by seq_file "start" operation to start the iteration
 435 * of listing pids.
 436 *
 437 * Returns the pid+1 as we want to display pid of zero, but NULL would
 438 * stop the iteration.
 439 */
 440void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 441{
 442        unsigned long pid;
 443        loff_t l = 0;
 444
 445        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 446        if (pid >= pid_list->pid_max)
 447                return NULL;
 448
 449        /* Return pid + 1 so that zero can be the exit value */
 450        for (pid++; pid && l < *pos;
 451             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 452                ;
 453        return (void *)pid;
 454}
 455
 456/**
 457 * trace_pid_show - show the current pid in seq_file processing
 458 * @m: The seq_file structure to write into
 459 * @v: A void pointer of the pid (+1) value to display
 460 *
 461 * Can be directly used by seq_file operations to display the current
 462 * pid value.
 463 */
 464int trace_pid_show(struct seq_file *m, void *v)
 465{
 466        unsigned long pid = (unsigned long)v - 1;
 467
 468        seq_printf(m, "%lu\n", pid);
 469        return 0;
 470}
 471
 472/* 128 should be much more than enough */
 473#define PID_BUF_SIZE            127
 474
 475int trace_pid_write(struct trace_pid_list *filtered_pids,
 476                    struct trace_pid_list **new_pid_list,
 477                    const char __user *ubuf, size_t cnt)
 478{
 479        struct trace_pid_list *pid_list;
 480        struct trace_parser parser;
 481        unsigned long val;
 482        int nr_pids = 0;
 483        ssize_t read = 0;
 484        ssize_t ret = 0;
 485        loff_t pos;
 486        pid_t pid;
 487
 488        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 489                return -ENOMEM;
 490
 491        /*
 492         * Always recreate a new array. The write is an all or nothing
 493         * operation. Always create a new array when adding new pids by
 494         * the user. If the operation fails, then the current list is
 495         * not modified.
 496         */
 497        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 498        if (!pid_list)
 499                return -ENOMEM;
 500
 501        pid_list->pid_max = READ_ONCE(pid_max);
 502
 503        /* Only truncating will shrink pid_max */
 504        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 505                pid_list->pid_max = filtered_pids->pid_max;
 506
 507        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 508        if (!pid_list->pids) {
 509                kfree(pid_list);
 510                return -ENOMEM;
 511        }
 512
 513        if (filtered_pids) {
 514                /* copy the current bits to the new max */
 515                for_each_set_bit(pid, filtered_pids->pids,
 516                                 filtered_pids->pid_max) {
 517                        set_bit(pid, pid_list->pids);
 518                        nr_pids++;
 519                }
 520        }
 521
 522        while (cnt > 0) {
 523
 524                pos = 0;
 525
 526                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 527                if (ret < 0 || !trace_parser_loaded(&parser))
 528                        break;
 529
 530                read += ret;
 531                ubuf += ret;
 532                cnt -= ret;
 533
 534                ret = -EINVAL;
 535                if (kstrtoul(parser.buffer, 0, &val))
 536                        break;
 537                if (val >= pid_list->pid_max)
 538                        break;
 539
 540                pid = (pid_t)val;
 541
 542                set_bit(pid, pid_list->pids);
 543                nr_pids++;
 544
 545                trace_parser_clear(&parser);
 546                ret = 0;
 547        }
 548        trace_parser_put(&parser);
 549
 550        if (ret < 0) {
 551                trace_free_pid_list(pid_list);
 552                return ret;
 553        }
 554
 555        if (!nr_pids) {
 556                /* Cleared the list of pids */
 557                trace_free_pid_list(pid_list);
 558                read = ret;
 559                pid_list = NULL;
 560        }
 561
 562        *new_pid_list = pid_list;
 563
 564        return read;
 565}
 566
 567static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 568{
 569        u64 ts;
 570
 571        /* Early boot up does not have a buffer yet */
 572        if (!buf->buffer)
 573                return trace_clock_local();
 574
 575        ts = ring_buffer_time_stamp(buf->buffer, cpu);
 576        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 577
 578        return ts;
 579}
 580
 581u64 ftrace_now(int cpu)
 582{
 583        return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
 584}
 585
 586/**
 587 * tracing_is_enabled - Show if global_trace has been disabled
 588 *
 589 * Shows if the global trace has been enabled or not. It uses the
 590 * mirror flag "buffer_disabled" to be used in fast paths such as for
 591 * the irqsoff tracer. But it may be inaccurate due to races. If you
 592 * need to know the accurate state, use tracing_is_on() which is a little
 593 * slower, but accurate.
 594 */
 595int tracing_is_enabled(void)
 596{
 597        /*
 598         * For quick access (irqsoff uses this in fast path), just
 599         * return the mirror variable of the state of the ring buffer.
 600         * It's a little racy, but we don't really care.
 601         */
 602        smp_rmb();
 603        return !global_trace.buffer_disabled;
 604}
 605
 606/*
 607 * trace_buf_size is the size in bytes that is allocated
 608 * for a buffer. Note, the number of bytes is always rounded
 609 * to page size.
 610 *
 611 * This number is purposely set to a low number of 16384.
 612 * If the dump on oops happens, it will be much appreciated
 613 * to not have to wait for all that output. Anyway this can be
 614 * boot time and run time configurable.
 615 */
 616#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 617
 618static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 619
 620/* trace_types holds a link list of available tracers. */
 621static struct tracer            *trace_types __read_mostly;
 622
 623/*
 624 * trace_types_lock is used to protect the trace_types list.
 625 */
 626DEFINE_MUTEX(trace_types_lock);
 627
 628/*
 629 * serialize the access of the ring buffer
 630 *
 631 * ring buffer serializes readers, but it is low level protection.
 632 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 633 * are not protected by ring buffer.
 634 *
 635 * The content of events may become garbage if we allow other process consumes
 636 * these events concurrently:
 637 *   A) the page of the consumed events may become a normal page
 638 *      (not reader page) in ring buffer, and this page will be rewrited
 639 *      by events producer.
 640 *   B) The page of the consumed events may become a page for splice_read,
 641 *      and this page will be returned to system.
 642 *
 643 * These primitives allow multi process access to different cpu ring buffer
 644 * concurrently.
 645 *
 646 * These primitives don't distinguish read-only and read-consume access.
 647 * Multi read-only access are also serialized.
 648 */
 649
 650#ifdef CONFIG_SMP
 651static DECLARE_RWSEM(all_cpu_access_lock);
 652static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 653
 654static inline void trace_access_lock(int cpu)
 655{
 656        if (cpu == RING_BUFFER_ALL_CPUS) {
 657                /* gain it for accessing the whole ring buffer. */
 658                down_write(&all_cpu_access_lock);
 659        } else {
 660                /* gain it for accessing a cpu ring buffer. */
 661
 662                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 663                down_read(&all_cpu_access_lock);
 664
 665                /* Secondly block other access to this @cpu ring buffer. */
 666                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 667        }
 668}
 669
 670static inline void trace_access_unlock(int cpu)
 671{
 672        if (cpu == RING_BUFFER_ALL_CPUS) {
 673                up_write(&all_cpu_access_lock);
 674        } else {
 675                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 676                up_read(&all_cpu_access_lock);
 677        }
 678}
 679
 680static inline void trace_access_lock_init(void)
 681{
 682        int cpu;
 683
 684        for_each_possible_cpu(cpu)
 685                mutex_init(&per_cpu(cpu_access_lock, cpu));
 686}
 687
 688#else
 689
 690static DEFINE_MUTEX(access_lock);
 691
 692static inline void trace_access_lock(int cpu)
 693{
 694        (void)cpu;
 695        mutex_lock(&access_lock);
 696}
 697
 698static inline void trace_access_unlock(int cpu)
 699{
 700        (void)cpu;
 701        mutex_unlock(&access_lock);
 702}
 703
 704static inline void trace_access_lock_init(void)
 705{
 706}
 707
 708#endif
 709
 710#ifdef CONFIG_STACKTRACE
 711static void __ftrace_trace_stack(struct ring_buffer *buffer,
 712                                 unsigned long flags,
 713                                 int skip, int pc, struct pt_regs *regs);
 714static inline void ftrace_trace_stack(struct trace_array *tr,
 715                                      struct ring_buffer *buffer,
 716                                      unsigned long flags,
 717                                      int skip, int pc, struct pt_regs *regs);
 718
 719#else
 720static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
 721                                        unsigned long flags,
 722                                        int skip, int pc, struct pt_regs *regs)
 723{
 724}
 725static inline void ftrace_trace_stack(struct trace_array *tr,
 726                                      struct ring_buffer *buffer,
 727                                      unsigned long flags,
 728                                      int skip, int pc, struct pt_regs *regs)
 729{
 730}
 731
 732#endif
 733
 734static __always_inline void
 735trace_event_setup(struct ring_buffer_event *event,
 736                  int type, unsigned long flags, int pc)
 737{
 738        struct trace_entry *ent = ring_buffer_event_data(event);
 739
 740        tracing_generic_entry_update(ent, flags, pc);
 741        ent->type = type;
 742}
 743
 744static __always_inline struct ring_buffer_event *
 745__trace_buffer_lock_reserve(struct ring_buffer *buffer,
 746                          int type,
 747                          unsigned long len,
 748                          unsigned long flags, int pc)
 749{
 750        struct ring_buffer_event *event;
 751
 752        event = ring_buffer_lock_reserve(buffer, len);
 753        if (event != NULL)
 754                trace_event_setup(event, type, flags, pc);
 755
 756        return event;
 757}
 758
 759void tracer_tracing_on(struct trace_array *tr)
 760{
 761        if (tr->trace_buffer.buffer)
 762                ring_buffer_record_on(tr->trace_buffer.buffer);
 763        /*
 764         * This flag is looked at when buffers haven't been allocated
 765         * yet, or by some tracers (like irqsoff), that just want to
 766         * know if the ring buffer has been disabled, but it can handle
 767         * races of where it gets disabled but we still do a record.
 768         * As the check is in the fast path of the tracers, it is more
 769         * important to be fast than accurate.
 770         */
 771        tr->buffer_disabled = 0;
 772        /* Make the flag seen by readers */
 773        smp_wmb();
 774}
 775
 776/**
 777 * tracing_on - enable tracing buffers
 778 *
 779 * This function enables tracing buffers that may have been
 780 * disabled with tracing_off.
 781 */
 782void tracing_on(void)
 783{
 784        tracer_tracing_on(&global_trace);
 785}
 786EXPORT_SYMBOL_GPL(tracing_on);
 787
 788
 789static __always_inline void
 790__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 791{
 792        __this_cpu_write(trace_taskinfo_save, true);
 793
 794        /* If this is the temp buffer, we need to commit fully */
 795        if (this_cpu_read(trace_buffered_event) == event) {
 796                /* Length is in event->array[0] */
 797                ring_buffer_write(buffer, event->array[0], &event->array[1]);
 798                /* Release the temp buffer */
 799                this_cpu_dec(trace_buffered_event_cnt);
 800        } else
 801                ring_buffer_unlock_commit(buffer, event);
 802}
 803
 804/**
 805 * __trace_puts - write a constant string into the trace buffer.
 806 * @ip:    The address of the caller
 807 * @str:   The constant string to write
 808 * @size:  The size of the string.
 809 */
 810int __trace_puts(unsigned long ip, const char *str, int size)
 811{
 812        struct ring_buffer_event *event;
 813        struct ring_buffer *buffer;
 814        struct print_entry *entry;
 815        unsigned long irq_flags;
 816        int alloc;
 817        int pc;
 818
 819        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 820                return 0;
 821
 822        pc = preempt_count();
 823
 824        if (unlikely(tracing_selftest_running || tracing_disabled))
 825                return 0;
 826
 827        alloc = sizeof(*entry) + size + 2; /* possible \n added */
 828
 829        local_save_flags(irq_flags);
 830        buffer = global_trace.trace_buffer.buffer;
 831        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
 832                                            irq_flags, pc);
 833        if (!event)
 834                return 0;
 835
 836        entry = ring_buffer_event_data(event);
 837        entry->ip = ip;
 838
 839        memcpy(&entry->buf, str, size);
 840
 841        /* Add a newline if necessary */
 842        if (entry->buf[size - 1] != '\n') {
 843                entry->buf[size] = '\n';
 844                entry->buf[size + 1] = '\0';
 845        } else
 846                entry->buf[size] = '\0';
 847
 848        __buffer_unlock_commit(buffer, event);
 849        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 850
 851        return size;
 852}
 853EXPORT_SYMBOL_GPL(__trace_puts);
 854
 855/**
 856 * __trace_bputs - write the pointer to a constant string into trace buffer
 857 * @ip:    The address of the caller
 858 * @str:   The constant string to write to the buffer to
 859 */
 860int __trace_bputs(unsigned long ip, const char *str)
 861{
 862        struct ring_buffer_event *event;
 863        struct ring_buffer *buffer;
 864        struct bputs_entry *entry;
 865        unsigned long irq_flags;
 866        int size = sizeof(struct bputs_entry);
 867        int pc;
 868
 869        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 870                return 0;
 871
 872        pc = preempt_count();
 873
 874        if (unlikely(tracing_selftest_running || tracing_disabled))
 875                return 0;
 876
 877        local_save_flags(irq_flags);
 878        buffer = global_trace.trace_buffer.buffer;
 879        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
 880                                            irq_flags, pc);
 881        if (!event)
 882                return 0;
 883
 884        entry = ring_buffer_event_data(event);
 885        entry->ip                       = ip;
 886        entry->str                      = str;
 887
 888        __buffer_unlock_commit(buffer, event);
 889        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 890
 891        return 1;
 892}
 893EXPORT_SYMBOL_GPL(__trace_bputs);
 894
 895#ifdef CONFIG_TRACER_SNAPSHOT
 896void tracing_snapshot_instance(struct trace_array *tr)
 897{
 898        struct tracer *tracer = tr->current_trace;
 899        unsigned long flags;
 900
 901        if (in_nmi()) {
 902                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
 903                internal_trace_puts("*** snapshot is being ignored        ***\n");
 904                return;
 905        }
 906
 907        if (!tr->allocated_snapshot) {
 908                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
 909                internal_trace_puts("*** stopping trace here!   ***\n");
 910                tracing_off();
 911                return;
 912        }
 913
 914        /* Note, snapshot can not be used when the tracer uses it */
 915        if (tracer->use_max_tr) {
 916                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
 917                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
 918                return;
 919        }
 920
 921        local_irq_save(flags);
 922        update_max_tr(tr, current, smp_processor_id());
 923        local_irq_restore(flags);
 924}
 925
 926/**
 927 * tracing_snapshot - take a snapshot of the current buffer.
 928 *
 929 * This causes a swap between the snapshot buffer and the current live
 930 * tracing buffer. You can use this to take snapshots of the live
 931 * trace when some condition is triggered, but continue to trace.
 932 *
 933 * Note, make sure to allocate the snapshot with either
 934 * a tracing_snapshot_alloc(), or by doing it manually
 935 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 936 *
 937 * If the snapshot buffer is not allocated, it will stop tracing.
 938 * Basically making a permanent snapshot.
 939 */
 940void tracing_snapshot(void)
 941{
 942        struct trace_array *tr = &global_trace;
 943
 944        tracing_snapshot_instance(tr);
 945}
 946EXPORT_SYMBOL_GPL(tracing_snapshot);
 947
 948static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
 949                                        struct trace_buffer *size_buf, int cpu_id);
 950static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
 951
 952int tracing_alloc_snapshot_instance(struct trace_array *tr)
 953{
 954        int ret;
 955
 956        if (!tr->allocated_snapshot) {
 957
 958                /* allocate spare buffer */
 959                ret = resize_buffer_duplicate_size(&tr->max_buffer,
 960                                   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
 961                if (ret < 0)
 962                        return ret;
 963
 964                tr->allocated_snapshot = true;
 965        }
 966
 967        return 0;
 968}
 969
 970static void free_snapshot(struct trace_array *tr)
 971{
 972        /*
 973         * We don't free the ring buffer. instead, resize it because
 974         * The max_tr ring buffer has some state (e.g. ring->clock) and
 975         * we want preserve it.
 976         */
 977        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
 978        set_buffer_entries(&tr->max_buffer, 1);
 979        tracing_reset_online_cpus(&tr->max_buffer);
 980        tr->allocated_snapshot = false;
 981}
 982
 983/**
 984 * tracing_alloc_snapshot - allocate snapshot buffer.
 985 *
 986 * This only allocates the snapshot buffer if it isn't already
 987 * allocated - it doesn't also take a snapshot.
 988 *
 989 * This is meant to be used in cases where the snapshot buffer needs
 990 * to be set up for events that can't sleep but need to be able to
 991 * trigger a snapshot.
 992 */
 993int tracing_alloc_snapshot(void)
 994{
 995        struct trace_array *tr = &global_trace;
 996        int ret;
 997
 998        ret = tracing_alloc_snapshot_instance(tr);
 999        WARN_ON(ret < 0);
1000
1001        return ret;
1002}
1003EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004
1005/**
1006 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007 *
1008 * This is similar to tracing_snapshot(), but it will allocate the
1009 * snapshot buffer if it isn't already allocated. Use this only
1010 * where it is safe to sleep, as the allocation may sleep.
1011 *
1012 * This causes a swap between the snapshot buffer and the current live
1013 * tracing buffer. You can use this to take snapshots of the live
1014 * trace when some condition is triggered, but continue to trace.
1015 */
1016void tracing_snapshot_alloc(void)
1017{
1018        int ret;
1019
1020        ret = tracing_alloc_snapshot();
1021        if (ret < 0)
1022                return;
1023
1024        tracing_snapshot();
1025}
1026EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027#else
1028void tracing_snapshot(void)
1029{
1030        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031}
1032EXPORT_SYMBOL_GPL(tracing_snapshot);
1033int tracing_alloc_snapshot(void)
1034{
1035        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036        return -ENODEV;
1037}
1038EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039void tracing_snapshot_alloc(void)
1040{
1041        /* Give warning */
1042        tracing_snapshot();
1043}
1044EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045#endif /* CONFIG_TRACER_SNAPSHOT */
1046
1047void tracer_tracing_off(struct trace_array *tr)
1048{
1049        if (tr->trace_buffer.buffer)
1050                ring_buffer_record_off(tr->trace_buffer.buffer);
1051        /*
1052         * This flag is looked at when buffers haven't been allocated
1053         * yet, or by some tracers (like irqsoff), that just want to
1054         * know if the ring buffer has been disabled, but it can handle
1055         * races of where it gets disabled but we still do a record.
1056         * As the check is in the fast path of the tracers, it is more
1057         * important to be fast than accurate.
1058         */
1059        tr->buffer_disabled = 1;
1060        /* Make the flag seen by readers */
1061        smp_wmb();
1062}
1063
1064/**
1065 * tracing_off - turn off tracing buffers
1066 *
1067 * This function stops the tracing buffers from recording data.
1068 * It does not disable any overhead the tracers themselves may
1069 * be causing. This function simply causes all recording to
1070 * the ring buffers to fail.
1071 */
1072void tracing_off(void)
1073{
1074        tracer_tracing_off(&global_trace);
1075}
1076EXPORT_SYMBOL_GPL(tracing_off);
1077
1078void disable_trace_on_warning(void)
1079{
1080        if (__disable_trace_on_warning)
1081                tracing_off();
1082}
1083
1084/**
1085 * tracer_tracing_is_on - show real state of ring buffer enabled
1086 * @tr : the trace array to know if ring buffer is enabled
1087 *
1088 * Shows real state of the ring buffer if it is enabled or not.
1089 */
1090int tracer_tracing_is_on(struct trace_array *tr)
1091{
1092        if (tr->trace_buffer.buffer)
1093                return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094        return !tr->buffer_disabled;
1095}
1096
1097/**
1098 * tracing_is_on - show state of ring buffers enabled
1099 */
1100int tracing_is_on(void)
1101{
1102        return tracer_tracing_is_on(&global_trace);
1103}
1104EXPORT_SYMBOL_GPL(tracing_is_on);
1105
1106static int __init set_buf_size(char *str)
1107{
1108        unsigned long buf_size;
1109
1110        if (!str)
1111                return 0;
1112        buf_size = memparse(str, &str);
1113        /* nr_entries can not be zero */
1114        if (buf_size == 0)
1115                return 0;
1116        trace_buf_size = buf_size;
1117        return 1;
1118}
1119__setup("trace_buf_size=", set_buf_size);
1120
1121static int __init set_tracing_thresh(char *str)
1122{
1123        unsigned long threshold;
1124        int ret;
1125
1126        if (!str)
1127                return 0;
1128        ret = kstrtoul(str, 0, &threshold);
1129        if (ret < 0)
1130                return 0;
1131        tracing_thresh = threshold * 1000;
1132        return 1;
1133}
1134__setup("tracing_thresh=", set_tracing_thresh);
1135
1136unsigned long nsecs_to_usecs(unsigned long nsecs)
1137{
1138        return nsecs / 1000;
1139}
1140
1141/*
1142 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145 * of strings in the order that the evals (enum) were defined.
1146 */
1147#undef C
1148#define C(a, b) b
1149
1150/* These must match the bit postions in trace_iterator_flags */
1151static const char *trace_options[] = {
1152        TRACE_FLAGS
1153        NULL
1154};
1155
1156static struct {
1157        u64 (*func)(void);
1158        const char *name;
1159        int in_ns;              /* is this clock in nanoseconds? */
1160} trace_clocks[] = {
1161        { trace_clock_local,            "local",        1 },
1162        { trace_clock_global,           "global",       1 },
1163        { trace_clock_counter,          "counter",      0 },
1164        { trace_clock_jiffies,          "uptime",       0 },
1165        { trace_clock,                  "perf",         1 },
1166        { ktime_get_mono_fast_ns,       "mono",         1 },
1167        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168        { ktime_get_boot_fast_ns,       "boot",         1 },
1169        ARCH_TRACE_CLOCKS
1170};
1171
1172bool trace_clock_in_ns(struct trace_array *tr)
1173{
1174        if (trace_clocks[tr->clock_id].in_ns)
1175                return true;
1176
1177        return false;
1178}
1179
1180/*
1181 * trace_parser_get_init - gets the buffer for trace parser
1182 */
1183int trace_parser_get_init(struct trace_parser *parser, int size)
1184{
1185        memset(parser, 0, sizeof(*parser));
1186
1187        parser->buffer = kmalloc(size, GFP_KERNEL);
1188        if (!parser->buffer)
1189                return 1;
1190
1191        parser->size = size;
1192        return 0;
1193}
1194
1195/*
1196 * trace_parser_put - frees the buffer for trace parser
1197 */
1198void trace_parser_put(struct trace_parser *parser)
1199{
1200        kfree(parser->buffer);
1201        parser->buffer = NULL;
1202}
1203
1204/*
1205 * trace_get_user - reads the user input string separated by  space
1206 * (matched by isspace(ch))
1207 *
1208 * For each string found the 'struct trace_parser' is updated,
1209 * and the function returns.
1210 *
1211 * Returns number of bytes read.
1212 *
1213 * See kernel/trace/trace.h for 'struct trace_parser' details.
1214 */
1215int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216        size_t cnt, loff_t *ppos)
1217{
1218        char ch;
1219        size_t read = 0;
1220        ssize_t ret;
1221
1222        if (!*ppos)
1223                trace_parser_clear(parser);
1224
1225        ret = get_user(ch, ubuf++);
1226        if (ret)
1227                goto out;
1228
1229        read++;
1230        cnt--;
1231
1232        /*
1233         * The parser is not finished with the last write,
1234         * continue reading the user input without skipping spaces.
1235         */
1236        if (!parser->cont) {
1237                /* skip white space */
1238                while (cnt && isspace(ch)) {
1239                        ret = get_user(ch, ubuf++);
1240                        if (ret)
1241                                goto out;
1242                        read++;
1243                        cnt--;
1244                }
1245
1246                parser->idx = 0;
1247
1248                /* only spaces were written */
1249                if (isspace(ch) || !ch) {
1250                        *ppos += read;
1251                        ret = read;
1252                        goto out;
1253                }
1254        }
1255
1256        /* read the non-space input */
1257        while (cnt && !isspace(ch) && ch) {
1258                if (parser->idx < parser->size - 1)
1259                        parser->buffer[parser->idx++] = ch;
1260                else {
1261                        ret = -EINVAL;
1262                        goto out;
1263                }
1264                ret = get_user(ch, ubuf++);
1265                if (ret)
1266                        goto out;
1267                read++;
1268                cnt--;
1269        }
1270
1271        /* We either got finished input or we have to wait for another call. */
1272        if (isspace(ch) || !ch) {
1273                parser->buffer[parser->idx] = 0;
1274                parser->cont = false;
1275        } else if (parser->idx < parser->size - 1) {
1276                parser->cont = true;
1277                parser->buffer[parser->idx++] = ch;
1278                /* Make sure the parsed string always terminates with '\0'. */
1279                parser->buffer[parser->idx] = 0;
1280        } else {
1281                ret = -EINVAL;
1282                goto out;
1283        }
1284
1285        *ppos += read;
1286        ret = read;
1287
1288out:
1289        return ret;
1290}
1291
1292/* TODO add a seq_buf_to_buffer() */
1293static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294{
1295        int len;
1296
1297        if (trace_seq_used(s) <= s->seq.readpos)
1298                return -EBUSY;
1299
1300        len = trace_seq_used(s) - s->seq.readpos;
1301        if (cnt > len)
1302                cnt = len;
1303        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305        s->seq.readpos += cnt;
1306        return cnt;
1307}
1308
1309unsigned long __read_mostly     tracing_thresh;
1310
1311#ifdef CONFIG_TRACER_MAX_TRACE
1312/*
1313 * Copy the new maximum trace into the separate maximum-trace
1314 * structure. (this way the maximum trace is permanently saved,
1315 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316 */
1317static void
1318__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319{
1320        struct trace_buffer *trace_buf = &tr->trace_buffer;
1321        struct trace_buffer *max_buf = &tr->max_buffer;
1322        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325        max_buf->cpu = cpu;
1326        max_buf->time_start = data->preempt_timestamp;
1327
1328        max_data->saved_latency = tr->max_latency;
1329        max_data->critical_start = data->critical_start;
1330        max_data->critical_end = data->critical_end;
1331
1332        memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333        max_data->pid = tsk->pid;
1334        /*
1335         * If tsk == current, then use current_uid(), as that does not use
1336         * RCU. The irq tracer can be called out of RCU scope.
1337         */
1338        if (tsk == current)
1339                max_data->uid = current_uid();
1340        else
1341                max_data->uid = task_uid(tsk);
1342
1343        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344        max_data->policy = tsk->policy;
1345        max_data->rt_priority = tsk->rt_priority;
1346
1347        /* record this tasks comm */
1348        tracing_record_cmdline(tsk);
1349}
1350
1351/**
1352 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353 * @tr: tracer
1354 * @tsk: the task with the latency
1355 * @cpu: The cpu that initiated the trace.
1356 *
1357 * Flip the buffers between the @tr and the max_tr and record information
1358 * about which task was the cause of this latency.
1359 */
1360void
1361update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362{
1363        if (tr->stop_count)
1364                return;
1365
1366        WARN_ON_ONCE(!irqs_disabled());
1367
1368        if (!tr->allocated_snapshot) {
1369                /* Only the nop tracer should hit this when disabling */
1370                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1371                return;
1372        }
1373
1374        arch_spin_lock(&tr->max_lock);
1375
1376        /* Inherit the recordable setting from trace_buffer */
1377        if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1378                ring_buffer_record_on(tr->max_buffer.buffer);
1379        else
1380                ring_buffer_record_off(tr->max_buffer.buffer);
1381
1382        swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1383
1384        __update_max_tr(tr, tsk, cpu);
1385        arch_spin_unlock(&tr->max_lock);
1386}
1387
1388/**
1389 * update_max_tr_single - only copy one trace over, and reset the rest
1390 * @tr - tracer
1391 * @tsk - task with the latency
1392 * @cpu - the cpu of the buffer to copy.
1393 *
1394 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1395 */
1396void
1397update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1398{
1399        int ret;
1400
1401        if (tr->stop_count)
1402                return;
1403
1404        WARN_ON_ONCE(!irqs_disabled());
1405        if (!tr->allocated_snapshot) {
1406                /* Only the nop tracer should hit this when disabling */
1407                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1408                return;
1409        }
1410
1411        arch_spin_lock(&tr->max_lock);
1412
1413        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1414
1415        if (ret == -EBUSY) {
1416                /*
1417                 * We failed to swap the buffer due to a commit taking
1418                 * place on this CPU. We fail to record, but we reset
1419                 * the max trace buffer (no one writes directly to it)
1420                 * and flag that it failed.
1421                 */
1422                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1423                        "Failed to swap buffers due to commit in progress\n");
1424        }
1425
1426        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1427
1428        __update_max_tr(tr, tsk, cpu);
1429        arch_spin_unlock(&tr->max_lock);
1430}
1431#endif /* CONFIG_TRACER_MAX_TRACE */
1432
1433static int wait_on_pipe(struct trace_iterator *iter, bool full)
1434{
1435        /* Iterators are static, they should be filled or empty */
1436        if (trace_buffer_iter(iter, iter->cpu_file))
1437                return 0;
1438
1439        return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1440                                full);
1441}
1442
1443#ifdef CONFIG_FTRACE_STARTUP_TEST
1444static bool selftests_can_run;
1445
1446struct trace_selftests {
1447        struct list_head                list;
1448        struct tracer                   *type;
1449};
1450
1451static LIST_HEAD(postponed_selftests);
1452
1453static int save_selftest(struct tracer *type)
1454{
1455        struct trace_selftests *selftest;
1456
1457        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1458        if (!selftest)
1459                return -ENOMEM;
1460
1461        selftest->type = type;
1462        list_add(&selftest->list, &postponed_selftests);
1463        return 0;
1464}
1465
1466static int run_tracer_selftest(struct tracer *type)
1467{
1468        struct trace_array *tr = &global_trace;
1469        struct tracer *saved_tracer = tr->current_trace;
1470        int ret;
1471
1472        if (!type->selftest || tracing_selftest_disabled)
1473                return 0;
1474
1475        /*
1476         * If a tracer registers early in boot up (before scheduling is
1477         * initialized and such), then do not run its selftests yet.
1478         * Instead, run it a little later in the boot process.
1479         */
1480        if (!selftests_can_run)
1481                return save_selftest(type);
1482
1483        /*
1484         * Run a selftest on this tracer.
1485         * Here we reset the trace buffer, and set the current
1486         * tracer to be this tracer. The tracer can then run some
1487         * internal tracing to verify that everything is in order.
1488         * If we fail, we do not register this tracer.
1489         */
1490        tracing_reset_online_cpus(&tr->trace_buffer);
1491
1492        tr->current_trace = type;
1493
1494#ifdef CONFIG_TRACER_MAX_TRACE
1495        if (type->use_max_tr) {
1496                /* If we expanded the buffers, make sure the max is expanded too */
1497                if (ring_buffer_expanded)
1498                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1499                                           RING_BUFFER_ALL_CPUS);
1500                tr->allocated_snapshot = true;
1501        }
1502#endif
1503
1504        /* the test is responsible for initializing and enabling */
1505        pr_info("Testing tracer %s: ", type->name);
1506        ret = type->selftest(type, tr);
1507        /* the test is responsible for resetting too */
1508        tr->current_trace = saved_tracer;
1509        if (ret) {
1510                printk(KERN_CONT "FAILED!\n");
1511                /* Add the warning after printing 'FAILED' */
1512                WARN_ON(1);
1513                return -1;
1514        }
1515        /* Only reset on passing, to avoid touching corrupted buffers */
1516        tracing_reset_online_cpus(&tr->trace_buffer);
1517
1518#ifdef CONFIG_TRACER_MAX_TRACE
1519        if (type->use_max_tr) {
1520                tr->allocated_snapshot = false;
1521
1522                /* Shrink the max buffer again */
1523                if (ring_buffer_expanded)
1524                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1525                                           RING_BUFFER_ALL_CPUS);
1526        }
1527#endif
1528
1529        printk(KERN_CONT "PASSED\n");
1530        return 0;
1531}
1532
1533static __init int init_trace_selftests(void)
1534{
1535        struct trace_selftests *p, *n;
1536        struct tracer *t, **last;
1537        int ret;
1538
1539        selftests_can_run = true;
1540
1541        mutex_lock(&trace_types_lock);
1542
1543        if (list_empty(&postponed_selftests))
1544                goto out;
1545
1546        pr_info("Running postponed tracer tests:\n");
1547
1548        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1549                ret = run_tracer_selftest(p->type);
1550                /* If the test fails, then warn and remove from available_tracers */
1551                if (ret < 0) {
1552                        WARN(1, "tracer: %s failed selftest, disabling\n",
1553                             p->type->name);
1554                        last = &trace_types;
1555                        for (t = trace_types; t; t = t->next) {
1556                                if (t == p->type) {
1557                                        *last = t->next;
1558                                        break;
1559                                }
1560                                last = &t->next;
1561                        }
1562                }
1563                list_del(&p->list);
1564                kfree(p);
1565        }
1566
1567 out:
1568        mutex_unlock(&trace_types_lock);
1569
1570        return 0;
1571}
1572core_initcall(init_trace_selftests);
1573#else
1574static inline int run_tracer_selftest(struct tracer *type)
1575{
1576        return 0;
1577}
1578#endif /* CONFIG_FTRACE_STARTUP_TEST */
1579
1580static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1581
1582static void __init apply_trace_boot_options(void);
1583
1584/**
1585 * register_tracer - register a tracer with the ftrace system.
1586 * @type - the plugin for the tracer
1587 *
1588 * Register a new plugin tracer.
1589 */
1590int __init register_tracer(struct tracer *type)
1591{
1592        struct tracer *t;
1593        int ret = 0;
1594
1595        if (!type->name) {
1596                pr_info("Tracer must have a name\n");
1597                return -1;
1598        }
1599
1600        if (strlen(type->name) >= MAX_TRACER_SIZE) {
1601                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1602                return -1;
1603        }
1604
1605        mutex_lock(&trace_types_lock);
1606
1607        tracing_selftest_running = true;
1608
1609        for (t = trace_types; t; t = t->next) {
1610                if (strcmp(type->name, t->name) == 0) {
1611                        /* already found */
1612                        pr_info("Tracer %s already registered\n",
1613                                type->name);
1614                        ret = -1;
1615                        goto out;
1616                }
1617        }
1618
1619        if (!type->set_flag)
1620                type->set_flag = &dummy_set_flag;
1621        if (!type->flags) {
1622                /*allocate a dummy tracer_flags*/
1623                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1624                if (!type->flags) {
1625                        ret = -ENOMEM;
1626                        goto out;
1627                }
1628                type->flags->val = 0;
1629                type->flags->opts = dummy_tracer_opt;
1630        } else
1631                if (!type->flags->opts)
1632                        type->flags->opts = dummy_tracer_opt;
1633
1634        /* store the tracer for __set_tracer_option */
1635        type->flags->trace = type;
1636
1637        ret = run_tracer_selftest(type);
1638        if (ret < 0)
1639                goto out;
1640
1641        type->next = trace_types;
1642        trace_types = type;
1643        add_tracer_options(&global_trace, type);
1644
1645 out:
1646        tracing_selftest_running = false;
1647        mutex_unlock(&trace_types_lock);
1648
1649        if (ret || !default_bootup_tracer)
1650                goto out_unlock;
1651
1652        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1653                goto out_unlock;
1654
1655        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1656        /* Do we want this tracer to start on bootup? */
1657        tracing_set_tracer(&global_trace, type->name);
1658        default_bootup_tracer = NULL;
1659
1660        apply_trace_boot_options();
1661
1662        /* disable other selftests, since this will break it. */
1663        tracing_selftest_disabled = true;
1664#ifdef CONFIG_FTRACE_STARTUP_TEST
1665        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1666               type->name);
1667#endif
1668
1669 out_unlock:
1670        return ret;
1671}
1672
1673void tracing_reset(struct trace_buffer *buf, int cpu)
1674{
1675        struct ring_buffer *buffer = buf->buffer;
1676
1677        if (!buffer)
1678                return;
1679
1680        ring_buffer_record_disable(buffer);
1681
1682        /* Make sure all commits have finished */
1683        synchronize_sched();
1684        ring_buffer_reset_cpu(buffer, cpu);
1685
1686        ring_buffer_record_enable(buffer);
1687}
1688
1689void tracing_reset_online_cpus(struct trace_buffer *buf)
1690{
1691        struct ring_buffer *buffer = buf->buffer;
1692        int cpu;
1693
1694        if (!buffer)
1695                return;
1696
1697        ring_buffer_record_disable(buffer);
1698
1699        /* Make sure all commits have finished */
1700        synchronize_sched();
1701
1702        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1703
1704        for_each_online_cpu(cpu)
1705                ring_buffer_reset_cpu(buffer, cpu);
1706
1707        ring_buffer_record_enable(buffer);
1708}
1709
1710/* Must have trace_types_lock held */
1711void tracing_reset_all_online_cpus(void)
1712{
1713        struct trace_array *tr;
1714
1715        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1716                if (!tr->clear_trace)
1717                        continue;
1718                tr->clear_trace = false;
1719                tracing_reset_online_cpus(&tr->trace_buffer);
1720#ifdef CONFIG_TRACER_MAX_TRACE
1721                tracing_reset_online_cpus(&tr->max_buffer);
1722#endif
1723        }
1724}
1725
1726static int *tgid_map;
1727
1728#define SAVED_CMDLINES_DEFAULT 128
1729#define NO_CMDLINE_MAP UINT_MAX
1730static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1731struct saved_cmdlines_buffer {
1732        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1733        unsigned *map_cmdline_to_pid;
1734        unsigned cmdline_num;
1735        int cmdline_idx;
1736        char *saved_cmdlines;
1737};
1738static struct saved_cmdlines_buffer *savedcmd;
1739
1740/* temporary disable recording */
1741static atomic_t trace_record_taskinfo_disabled __read_mostly;
1742
1743static inline char *get_saved_cmdlines(int idx)
1744{
1745        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1746}
1747
1748static inline void set_cmdline(int idx, const char *cmdline)
1749{
1750        memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1751}
1752
1753static int allocate_cmdlines_buffer(unsigned int val,
1754                                    struct saved_cmdlines_buffer *s)
1755{
1756        s->map_cmdline_to_pid = kmalloc_array(val,
1757                                              sizeof(*s->map_cmdline_to_pid),
1758                                              GFP_KERNEL);
1759        if (!s->map_cmdline_to_pid)
1760                return -ENOMEM;
1761
1762        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1763        if (!s->saved_cmdlines) {
1764                kfree(s->map_cmdline_to_pid);
1765                return -ENOMEM;
1766        }
1767
1768        s->cmdline_idx = 0;
1769        s->cmdline_num = val;
1770        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1771               sizeof(s->map_pid_to_cmdline));
1772        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1773               val * sizeof(*s->map_cmdline_to_pid));
1774
1775        return 0;
1776}
1777
1778static int trace_create_savedcmd(void)
1779{
1780        int ret;
1781
1782        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1783        if (!savedcmd)
1784                return -ENOMEM;
1785
1786        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1787        if (ret < 0) {
1788                kfree(savedcmd);
1789                savedcmd = NULL;
1790                return -ENOMEM;
1791        }
1792
1793        return 0;
1794}
1795
1796int is_tracing_stopped(void)
1797{
1798        return global_trace.stop_count;
1799}
1800
1801/**
1802 * tracing_start - quick start of the tracer
1803 *
1804 * If tracing is enabled but was stopped by tracing_stop,
1805 * this will start the tracer back up.
1806 */
1807void tracing_start(void)
1808{
1809        struct ring_buffer *buffer;
1810        unsigned long flags;
1811
1812        if (tracing_disabled)
1813                return;
1814
1815        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1816        if (--global_trace.stop_count) {
1817                if (global_trace.stop_count < 0) {
1818                        /* Someone screwed up their debugging */
1819                        WARN_ON_ONCE(1);
1820                        global_trace.stop_count = 0;
1821                }
1822                goto out;
1823        }
1824
1825        /* Prevent the buffers from switching */
1826        arch_spin_lock(&global_trace.max_lock);
1827
1828        buffer = global_trace.trace_buffer.buffer;
1829        if (buffer)
1830                ring_buffer_record_enable(buffer);
1831
1832#ifdef CONFIG_TRACER_MAX_TRACE
1833        buffer = global_trace.max_buffer.buffer;
1834        if (buffer)
1835                ring_buffer_record_enable(buffer);
1836#endif
1837
1838        arch_spin_unlock(&global_trace.max_lock);
1839
1840 out:
1841        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1842}
1843
1844static void tracing_start_tr(struct trace_array *tr)
1845{
1846        struct ring_buffer *buffer;
1847        unsigned long flags;
1848
1849        if (tracing_disabled)
1850                return;
1851
1852        /* If global, we need to also start the max tracer */
1853        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1854                return tracing_start();
1855
1856        raw_spin_lock_irqsave(&tr->start_lock, flags);
1857
1858        if (--tr->stop_count) {
1859                if (tr->stop_count < 0) {
1860                        /* Someone screwed up their debugging */
1861                        WARN_ON_ONCE(1);
1862                        tr->stop_count = 0;
1863                }
1864                goto out;
1865        }
1866
1867        buffer = tr->trace_buffer.buffer;
1868        if (buffer)
1869                ring_buffer_record_enable(buffer);
1870
1871 out:
1872        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1873}
1874
1875/**
1876 * tracing_stop - quick stop of the tracer
1877 *
1878 * Light weight way to stop tracing. Use in conjunction with
1879 * tracing_start.
1880 */
1881void tracing_stop(void)
1882{
1883        struct ring_buffer *buffer;
1884        unsigned long flags;
1885
1886        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1887        if (global_trace.stop_count++)
1888                goto out;
1889
1890        /* Prevent the buffers from switching */
1891        arch_spin_lock(&global_trace.max_lock);
1892
1893        buffer = global_trace.trace_buffer.buffer;
1894        if (buffer)
1895                ring_buffer_record_disable(buffer);
1896
1897#ifdef CONFIG_TRACER_MAX_TRACE
1898        buffer = global_trace.max_buffer.buffer;
1899        if (buffer)
1900                ring_buffer_record_disable(buffer);
1901#endif
1902
1903        arch_spin_unlock(&global_trace.max_lock);
1904
1905 out:
1906        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1907}
1908
1909static void tracing_stop_tr(struct trace_array *tr)
1910{
1911        struct ring_buffer *buffer;
1912        unsigned long flags;
1913
1914        /* If global, we need to also stop the max tracer */
1915        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1916                return tracing_stop();
1917
1918        raw_spin_lock_irqsave(&tr->start_lock, flags);
1919        if (tr->stop_count++)
1920                goto out;
1921
1922        buffer = tr->trace_buffer.buffer;
1923        if (buffer)
1924                ring_buffer_record_disable(buffer);
1925
1926 out:
1927        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1928}
1929
1930static int trace_save_cmdline(struct task_struct *tsk)
1931{
1932        unsigned pid, idx;
1933
1934        /* treat recording of idle task as a success */
1935        if (!tsk->pid)
1936                return 1;
1937
1938        if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1939                return 0;
1940
1941        /*
1942         * It's not the end of the world if we don't get
1943         * the lock, but we also don't want to spin
1944         * nor do we want to disable interrupts,
1945         * so if we miss here, then better luck next time.
1946         */
1947        if (!arch_spin_trylock(&trace_cmdline_lock))
1948                return 0;
1949
1950        idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1951        if (idx == NO_CMDLINE_MAP) {
1952                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1953
1954                /*
1955                 * Check whether the cmdline buffer at idx has a pid
1956                 * mapped. We are going to overwrite that entry so we
1957                 * need to clear the map_pid_to_cmdline. Otherwise we
1958                 * would read the new comm for the old pid.
1959                 */
1960                pid = savedcmd->map_cmdline_to_pid[idx];
1961                if (pid != NO_CMDLINE_MAP)
1962                        savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1963
1964                savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1965                savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1966
1967                savedcmd->cmdline_idx = idx;
1968        }
1969
1970        set_cmdline(idx, tsk->comm);
1971
1972        arch_spin_unlock(&trace_cmdline_lock);
1973
1974        return 1;
1975}
1976
1977static void __trace_find_cmdline(int pid, char comm[])
1978{
1979        unsigned map;
1980
1981        if (!pid) {
1982                strcpy(comm, "<idle>");
1983                return;
1984        }
1985
1986        if (WARN_ON_ONCE(pid < 0)) {
1987                strcpy(comm, "<XXX>");
1988                return;
1989        }
1990
1991        if (pid > PID_MAX_DEFAULT) {
1992                strcpy(comm, "<...>");
1993                return;
1994        }
1995
1996        map = savedcmd->map_pid_to_cmdline[pid];
1997        if (map != NO_CMDLINE_MAP)
1998                strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1999        else
2000                strcpy(comm, "<...>");
2001}
2002
2003void trace_find_cmdline(int pid, char comm[])
2004{
2005        preempt_disable();
2006        arch_spin_lock(&trace_cmdline_lock);
2007
2008        __trace_find_cmdline(pid, comm);
2009
2010        arch_spin_unlock(&trace_cmdline_lock);
2011        preempt_enable();
2012}
2013
2014int trace_find_tgid(int pid)
2015{
2016        if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2017                return 0;
2018
2019        return tgid_map[pid];
2020}
2021
2022static int trace_save_tgid(struct task_struct *tsk)
2023{
2024        /* treat recording of idle task as a success */
2025        if (!tsk->pid)
2026                return 1;
2027
2028        if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2029                return 0;
2030
2031        tgid_map[tsk->pid] = tsk->tgid;
2032        return 1;
2033}
2034
2035static bool tracing_record_taskinfo_skip(int flags)
2036{
2037        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2038                return true;
2039        if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2040                return true;
2041        if (!__this_cpu_read(trace_taskinfo_save))
2042                return true;
2043        return false;
2044}
2045
2046/**
2047 * tracing_record_taskinfo - record the task info of a task
2048 *
2049 * @task  - task to record
2050 * @flags - TRACE_RECORD_CMDLINE for recording comm
2051 *        - TRACE_RECORD_TGID for recording tgid
2052 */
2053void tracing_record_taskinfo(struct task_struct *task, int flags)
2054{
2055        bool done;
2056
2057        if (tracing_record_taskinfo_skip(flags))
2058                return;
2059
2060        /*
2061         * Record as much task information as possible. If some fail, continue
2062         * to try to record the others.
2063         */
2064        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2065        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2066
2067        /* If recording any information failed, retry again soon. */
2068        if (!done)
2069                return;
2070
2071        __this_cpu_write(trace_taskinfo_save, false);
2072}
2073
2074/**
2075 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2076 *
2077 * @prev - previous task during sched_switch
2078 * @next - next task during sched_switch
2079 * @flags - TRACE_RECORD_CMDLINE for recording comm
2080 *          TRACE_RECORD_TGID for recording tgid
2081 */
2082void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2083                                          struct task_struct *next, int flags)
2084{
2085        bool done;
2086
2087        if (tracing_record_taskinfo_skip(flags))
2088                return;
2089
2090        /*
2091         * Record as much task information as possible. If some fail, continue
2092         * to try to record the others.
2093         */
2094        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2095        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2096        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2097        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2098
2099        /* If recording any information failed, retry again soon. */
2100        if (!done)
2101                return;
2102
2103        __this_cpu_write(trace_taskinfo_save, false);
2104}
2105
2106/* Helpers to record a specific task information */
2107void tracing_record_cmdline(struct task_struct *task)
2108{
2109        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2110}
2111
2112void tracing_record_tgid(struct task_struct *task)
2113{
2114        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2115}
2116
2117/*
2118 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2119 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2120 * simplifies those functions and keeps them in sync.
2121 */
2122enum print_line_t trace_handle_return(struct trace_seq *s)
2123{
2124        return trace_seq_has_overflowed(s) ?
2125                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2126}
2127EXPORT_SYMBOL_GPL(trace_handle_return);
2128
2129void
2130tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2131                             int pc)
2132{
2133        struct task_struct *tsk = current;
2134
2135        entry->preempt_count            = pc & 0xff;
2136        entry->pid                      = (tsk) ? tsk->pid : 0;
2137        entry->flags =
2138#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2139                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2140#else
2141                TRACE_FLAG_IRQS_NOSUPPORT |
2142#endif
2143                ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2144                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2145                ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2146                (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2147                (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2148}
2149EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2150
2151struct ring_buffer_event *
2152trace_buffer_lock_reserve(struct ring_buffer *buffer,
2153                          int type,
2154                          unsigned long len,
2155                          unsigned long flags, int pc)
2156{
2157        return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2158}
2159
2160DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2161DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2162static int trace_buffered_event_ref;
2163
2164/**
2165 * trace_buffered_event_enable - enable buffering events
2166 *
2167 * When events are being filtered, it is quicker to use a temporary
2168 * buffer to write the event data into if there's a likely chance
2169 * that it will not be committed. The discard of the ring buffer
2170 * is not as fast as committing, and is much slower than copying
2171 * a commit.
2172 *
2173 * When an event is to be filtered, allocate per cpu buffers to
2174 * write the event data into, and if the event is filtered and discarded
2175 * it is simply dropped, otherwise, the entire data is to be committed
2176 * in one shot.
2177 */
2178void trace_buffered_event_enable(void)
2179{
2180        struct ring_buffer_event *event;
2181        struct page *page;
2182        int cpu;
2183
2184        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2185
2186        if (trace_buffered_event_ref++)
2187                return;
2188
2189        for_each_tracing_cpu(cpu) {
2190                page = alloc_pages_node(cpu_to_node(cpu),
2191                                        GFP_KERNEL | __GFP_NORETRY, 0);
2192                if (!page)
2193                        goto failed;
2194
2195                event = page_address(page);
2196                memset(event, 0, sizeof(*event));
2197
2198                per_cpu(trace_buffered_event, cpu) = event;
2199
2200                preempt_disable();
2201                if (cpu == smp_processor_id() &&
2202                    this_cpu_read(trace_buffered_event) !=
2203                    per_cpu(trace_buffered_event, cpu))
2204                        WARN_ON_ONCE(1);
2205                preempt_enable();
2206        }
2207
2208        return;
2209 failed:
2210        trace_buffered_event_disable();
2211}
2212
2213static void enable_trace_buffered_event(void *data)
2214{
2215        /* Probably not needed, but do it anyway */
2216        smp_rmb();
2217        this_cpu_dec(trace_buffered_event_cnt);
2218}
2219
2220static void disable_trace_buffered_event(void *data)
2221{
2222        this_cpu_inc(trace_buffered_event_cnt);
2223}
2224
2225/**
2226 * trace_buffered_event_disable - disable buffering events
2227 *
2228 * When a filter is removed, it is faster to not use the buffered
2229 * events, and to commit directly into the ring buffer. Free up
2230 * the temp buffers when there are no more users. This requires
2231 * special synchronization with current events.
2232 */
2233void trace_buffered_event_disable(void)
2234{
2235        int cpu;
2236
2237        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2238
2239        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2240                return;
2241
2242        if (--trace_buffered_event_ref)
2243                return;
2244
2245        preempt_disable();
2246        /* For each CPU, set the buffer as used. */
2247        smp_call_function_many(tracing_buffer_mask,
2248                               disable_trace_buffered_event, NULL, 1);
2249        preempt_enable();
2250
2251        /* Wait for all current users to finish */
2252        synchronize_sched();
2253
2254        for_each_tracing_cpu(cpu) {
2255                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2256                per_cpu(trace_buffered_event, cpu) = NULL;
2257        }
2258        /*
2259         * Make sure trace_buffered_event is NULL before clearing
2260         * trace_buffered_event_cnt.
2261         */
2262        smp_wmb();
2263
2264        preempt_disable();
2265        /* Do the work on each cpu */
2266        smp_call_function_many(tracing_buffer_mask,
2267                               enable_trace_buffered_event, NULL, 1);
2268        preempt_enable();
2269}
2270
2271static struct ring_buffer *temp_buffer;
2272
2273struct ring_buffer_event *
2274trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2275                          struct trace_event_file *trace_file,
2276                          int type, unsigned long len,
2277                          unsigned long flags, int pc)
2278{
2279        struct ring_buffer_event *entry;
2280        int val;
2281
2282        *current_rb = trace_file->tr->trace_buffer.buffer;
2283
2284        if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2285             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2286            (entry = this_cpu_read(trace_buffered_event))) {
2287                /* Try to use the per cpu buffer first */
2288                val = this_cpu_inc_return(trace_buffered_event_cnt);
2289                if (val == 1) {
2290                        trace_event_setup(entry, type, flags, pc);
2291                        entry->array[0] = len;
2292                        return entry;
2293                }
2294                this_cpu_dec(trace_buffered_event_cnt);
2295        }
2296
2297        entry = __trace_buffer_lock_reserve(*current_rb,
2298                                            type, len, flags, pc);
2299        /*
2300         * If tracing is off, but we have triggers enabled
2301         * we still need to look at the event data. Use the temp_buffer
2302         * to store the trace event for the tigger to use. It's recusive
2303         * safe and will not be recorded anywhere.
2304         */
2305        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2306                *current_rb = temp_buffer;
2307                entry = __trace_buffer_lock_reserve(*current_rb,
2308                                                    type, len, flags, pc);
2309        }
2310        return entry;
2311}
2312EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2313
2314static DEFINE_SPINLOCK(tracepoint_iter_lock);
2315static DEFINE_MUTEX(tracepoint_printk_mutex);
2316
2317static void output_printk(struct trace_event_buffer *fbuffer)
2318{
2319        struct trace_event_call *event_call;
2320        struct trace_event *event;
2321        unsigned long flags;
2322        struct trace_iterator *iter = tracepoint_print_iter;
2323
2324        /* We should never get here if iter is NULL */
2325        if (WARN_ON_ONCE(!iter))
2326                return;
2327
2328        event_call = fbuffer->trace_file->event_call;
2329        if (!event_call || !event_call->event.funcs ||
2330            !event_call->event.funcs->trace)
2331                return;
2332
2333        event = &fbuffer->trace_file->event_call->event;
2334
2335        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2336        trace_seq_init(&iter->seq);
2337        iter->ent = fbuffer->entry;
2338        event_call->event.funcs->trace(iter, 0, event);
2339        trace_seq_putc(&iter->seq, 0);
2340        printk("%s", iter->seq.buffer);
2341
2342        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2343}
2344
2345int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2346                             void __user *buffer, size_t *lenp,
2347                             loff_t *ppos)
2348{
2349        int save_tracepoint_printk;
2350        int ret;
2351
2352        mutex_lock(&tracepoint_printk_mutex);
2353        save_tracepoint_printk = tracepoint_printk;
2354
2355        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2356
2357        /*
2358         * This will force exiting early, as tracepoint_printk
2359         * is always zero when tracepoint_printk_iter is not allocated
2360         */
2361        if (!tracepoint_print_iter)
2362                tracepoint_printk = 0;
2363
2364        if (save_tracepoint_printk == tracepoint_printk)
2365                goto out;
2366
2367        if (tracepoint_printk)
2368                static_key_enable(&tracepoint_printk_key.key);
2369        else
2370                static_key_disable(&tracepoint_printk_key.key);
2371
2372 out:
2373        mutex_unlock(&tracepoint_printk_mutex);
2374
2375        return ret;
2376}
2377
2378void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2379{
2380        if (static_key_false(&tracepoint_printk_key.key))
2381                output_printk(fbuffer);
2382
2383        event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2384                                    fbuffer->event, fbuffer->entry,
2385                                    fbuffer->flags, fbuffer->pc);
2386}
2387EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2388
2389/*
2390 * Skip 3:
2391 *
2392 *   trace_buffer_unlock_commit_regs()
2393 *   trace_event_buffer_commit()
2394 *   trace_event_raw_event_xxx()
2395 */
2396# define STACK_SKIP 3
2397
2398void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2399                                     struct ring_buffer *buffer,
2400                                     struct ring_buffer_event *event,
2401                                     unsigned long flags, int pc,
2402                                     struct pt_regs *regs)
2403{
2404        __buffer_unlock_commit(buffer, event);
2405
2406        /*
2407         * If regs is not set, then skip the necessary functions.
2408         * Note, we can still get here via blktrace, wakeup tracer
2409         * and mmiotrace, but that's ok if they lose a function or
2410         * two. They are not that meaningful.
2411         */
2412        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2413        ftrace_trace_userstack(buffer, flags, pc);
2414}
2415
2416/*
2417 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2418 */
2419void
2420trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2421                                   struct ring_buffer_event *event)
2422{
2423        __buffer_unlock_commit(buffer, event);
2424}
2425
2426static void
2427trace_process_export(struct trace_export *export,
2428               struct ring_buffer_event *event)
2429{
2430        struct trace_entry *entry;
2431        unsigned int size = 0;
2432
2433        entry = ring_buffer_event_data(event);
2434        size = ring_buffer_event_length(event);
2435        export->write(export, entry, size);
2436}
2437
2438static DEFINE_MUTEX(ftrace_export_lock);
2439
2440static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2441
2442static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2443
2444static inline void ftrace_exports_enable(void)
2445{
2446        static_branch_enable(&ftrace_exports_enabled);
2447}
2448
2449static inline void ftrace_exports_disable(void)
2450{
2451        static_branch_disable(&ftrace_exports_enabled);
2452}
2453
2454void ftrace_exports(struct ring_buffer_event *event)
2455{
2456        struct trace_export *export;
2457
2458        preempt_disable_notrace();
2459
2460        export = rcu_dereference_raw_notrace(ftrace_exports_list);
2461        while (export) {
2462                trace_process_export(export, event);
2463                export = rcu_dereference_raw_notrace(export->next);
2464        }
2465
2466        preempt_enable_notrace();
2467}
2468
2469static inline void
2470add_trace_export(struct trace_export **list, struct trace_export *export)
2471{
2472        rcu_assign_pointer(export->next, *list);
2473        /*
2474         * We are entering export into the list but another
2475         * CPU might be walking that list. We need to make sure
2476         * the export->next pointer is valid before another CPU sees
2477         * the export pointer included into the list.
2478         */
2479        rcu_assign_pointer(*list, export);
2480}
2481
2482static inline int
2483rm_trace_export(struct trace_export **list, struct trace_export *export)
2484{
2485        struct trace_export **p;
2486
2487        for (p = list; *p != NULL; p = &(*p)->next)
2488                if (*p == export)
2489                        break;
2490
2491        if (*p != export)
2492                return -1;
2493
2494        rcu_assign_pointer(*p, (*p)->next);
2495
2496        return 0;
2497}
2498
2499static inline void
2500add_ftrace_export(struct trace_export **list, struct trace_export *export)
2501{
2502        if (*list == NULL)
2503                ftrace_exports_enable();
2504
2505        add_trace_export(list, export);
2506}
2507
2508static inline int
2509rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2510{
2511        int ret;
2512
2513        ret = rm_trace_export(list, export);
2514        if (*list == NULL)
2515                ftrace_exports_disable();
2516
2517        return ret;
2518}
2519
2520int register_ftrace_export(struct trace_export *export)
2521{
2522        if (WARN_ON_ONCE(!export->write))
2523                return -1;
2524
2525        mutex_lock(&ftrace_export_lock);
2526
2527        add_ftrace_export(&ftrace_exports_list, export);
2528
2529        mutex_unlock(&ftrace_export_lock);
2530
2531        return 0;
2532}
2533EXPORT_SYMBOL_GPL(register_ftrace_export);
2534
2535int unregister_ftrace_export(struct trace_export *export)
2536{
2537        int ret;
2538
2539        mutex_lock(&ftrace_export_lock);
2540
2541        ret = rm_ftrace_export(&ftrace_exports_list, export);
2542
2543        mutex_unlock(&ftrace_export_lock);
2544
2545        return ret;
2546}
2547EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2548
2549void
2550trace_function(struct trace_array *tr,
2551               unsigned long ip, unsigned long parent_ip, unsigned long flags,
2552               int pc)
2553{
2554        struct trace_event_call *call = &event_function;
2555        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2556        struct ring_buffer_event *event;
2557        struct ftrace_entry *entry;
2558
2559        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2560                                            flags, pc);
2561        if (!event)
2562                return;
2563        entry   = ring_buffer_event_data(event);
2564        entry->ip                       = ip;
2565        entry->parent_ip                = parent_ip;
2566
2567        if (!call_filter_check_discard(call, entry, buffer, event)) {
2568                if (static_branch_unlikely(&ftrace_exports_enabled))
2569                        ftrace_exports(event);
2570                __buffer_unlock_commit(buffer, event);
2571        }
2572}
2573
2574#ifdef CONFIG_STACKTRACE
2575
2576#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2577struct ftrace_stack {
2578        unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2579};
2580
2581static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2582static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2583
2584static void __ftrace_trace_stack(struct ring_buffer *buffer,
2585                                 unsigned long flags,
2586                                 int skip, int pc, struct pt_regs *regs)
2587{
2588        struct trace_event_call *call = &event_kernel_stack;
2589        struct ring_buffer_event *event;
2590        struct stack_entry *entry;
2591        struct stack_trace trace;
2592        int use_stack;
2593        int size = FTRACE_STACK_ENTRIES;
2594
2595        trace.nr_entries        = 0;
2596        trace.skip              = skip;
2597
2598        /*
2599         * Add one, for this function and the call to save_stack_trace()
2600         * If regs is set, then these functions will not be in the way.
2601         */
2602#ifndef CONFIG_UNWINDER_ORC
2603        if (!regs)
2604                trace.skip++;
2605#endif
2606
2607        /*
2608         * Since events can happen in NMIs there's no safe way to
2609         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2610         * or NMI comes in, it will just have to use the default
2611         * FTRACE_STACK_SIZE.
2612         */
2613        preempt_disable_notrace();
2614
2615        use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2616        /*
2617         * We don't need any atomic variables, just a barrier.
2618         * If an interrupt comes in, we don't care, because it would
2619         * have exited and put the counter back to what we want.
2620         * We just need a barrier to keep gcc from moving things
2621         * around.
2622         */
2623        barrier();
2624        if (use_stack == 1) {
2625                trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2626                trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2627
2628                if (regs)
2629                        save_stack_trace_regs(regs, &trace);
2630                else
2631                        save_stack_trace(&trace);
2632
2633                if (trace.nr_entries > size)
2634                        size = trace.nr_entries;
2635        } else
2636                /* From now on, use_stack is a boolean */
2637                use_stack = 0;
2638
2639        size *= sizeof(unsigned long);
2640
2641        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2642                                            sizeof(*entry) + size, flags, pc);
2643        if (!event)
2644                goto out;
2645        entry = ring_buffer_event_data(event);
2646
2647        memset(&entry->caller, 0, size);
2648
2649        if (use_stack)
2650                memcpy(&entry->caller, trace.entries,
2651                       trace.nr_entries * sizeof(unsigned long));
2652        else {
2653                trace.max_entries       = FTRACE_STACK_ENTRIES;
2654                trace.entries           = entry->caller;
2655                if (regs)
2656                        save_stack_trace_regs(regs, &trace);
2657                else
2658                        save_stack_trace(&trace);
2659        }
2660
2661        entry->size = trace.nr_entries;
2662
2663        if (!call_filter_check_discard(call, entry, buffer, event))
2664                __buffer_unlock_commit(buffer, event);
2665
2666 out:
2667        /* Again, don't let gcc optimize things here */
2668        barrier();
2669        __this_cpu_dec(ftrace_stack_reserve);
2670        preempt_enable_notrace();
2671
2672}
2673
2674static inline void ftrace_trace_stack(struct trace_array *tr,
2675                                      struct ring_buffer *buffer,
2676                                      unsigned long flags,
2677                                      int skip, int pc, struct pt_regs *regs)
2678{
2679        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2680                return;
2681
2682        __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2683}
2684
2685void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2686                   int pc)
2687{
2688        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2689
2690        if (rcu_is_watching()) {
2691                __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2692                return;
2693        }
2694
2695        /*
2696         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2697         * but if the above rcu_is_watching() failed, then the NMI
2698         * triggered someplace critical, and rcu_irq_enter() should
2699         * not be called from NMI.
2700         */
2701        if (unlikely(in_nmi()))
2702                return;
2703
2704        rcu_irq_enter_irqson();
2705        __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2706        rcu_irq_exit_irqson();
2707}
2708
2709/**
2710 * trace_dump_stack - record a stack back trace in the trace buffer
2711 * @skip: Number of functions to skip (helper handlers)
2712 */
2713void trace_dump_stack(int skip)
2714{
2715        unsigned long flags;
2716
2717        if (tracing_disabled || tracing_selftest_running)
2718                return;
2719
2720        local_save_flags(flags);
2721
2722#ifndef CONFIG_UNWINDER_ORC
2723        /* Skip 1 to skip this function. */
2724        skip++;
2725#endif
2726        __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2727                             flags, skip, preempt_count(), NULL);
2728}
2729
2730static DEFINE_PER_CPU(int, user_stack_count);
2731
2732void
2733ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2734{
2735        struct trace_event_call *call = &event_user_stack;
2736        struct ring_buffer_event *event;
2737        struct userstack_entry *entry;
2738        struct stack_trace trace;
2739
2740        if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2741                return;
2742
2743        /*
2744         * NMIs can not handle page faults, even with fix ups.
2745         * The save user stack can (and often does) fault.
2746         */
2747        if (unlikely(in_nmi()))
2748                return;
2749
2750        /*
2751         * prevent recursion, since the user stack tracing may
2752         * trigger other kernel events.
2753         */
2754        preempt_disable();
2755        if (__this_cpu_read(user_stack_count))
2756                goto out;
2757
2758        __this_cpu_inc(user_stack_count);
2759
2760        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2761                                            sizeof(*entry), flags, pc);
2762        if (!event)
2763                goto out_drop_count;
2764        entry   = ring_buffer_event_data(event);
2765
2766        entry->tgid             = current->tgid;
2767        memset(&entry->caller, 0, sizeof(entry->caller));
2768
2769        trace.nr_entries        = 0;
2770        trace.max_entries       = FTRACE_STACK_ENTRIES;
2771        trace.skip              = 0;
2772        trace.entries           = entry->caller;
2773
2774        save_stack_trace_user(&trace);
2775        if (!call_filter_check_discard(call, entry, buffer, event))
2776                __buffer_unlock_commit(buffer, event);
2777
2778 out_drop_count:
2779        __this_cpu_dec(user_stack_count);
2780 out:
2781        preempt_enable();
2782}
2783
2784#ifdef UNUSED
2785static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2786{
2787        ftrace_trace_userstack(tr, flags, preempt_count());
2788}
2789#endif /* UNUSED */
2790
2791#endif /* CONFIG_STACKTRACE */
2792
2793/* created for use with alloc_percpu */
2794struct trace_buffer_struct {
2795        int nesting;
2796        char buffer[4][TRACE_BUF_SIZE];
2797};
2798
2799static struct trace_buffer_struct *trace_percpu_buffer;
2800
2801/*
2802 * Thise allows for lockless recording.  If we're nested too deeply, then
2803 * this returns NULL.
2804 */
2805static char *get_trace_buf(void)
2806{
2807        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2808
2809        if (!buffer || buffer->nesting >= 4)
2810                return NULL;
2811
2812        buffer->nesting++;
2813
2814        /* Interrupts must see nesting incremented before we use the buffer */
2815        barrier();
2816        return &buffer->buffer[buffer->nesting][0];
2817}
2818
2819static void put_trace_buf(void)
2820{
2821        /* Don't let the decrement of nesting leak before this */
2822        barrier();
2823        this_cpu_dec(trace_percpu_buffer->nesting);
2824}
2825
2826static int alloc_percpu_trace_buffer(void)
2827{
2828        struct trace_buffer_struct *buffers;
2829
2830        buffers = alloc_percpu(struct trace_buffer_struct);
2831        if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2832                return -ENOMEM;
2833
2834        trace_percpu_buffer = buffers;
2835        return 0;
2836}
2837
2838static int buffers_allocated;
2839
2840void trace_printk_init_buffers(void)
2841{
2842        if (buffers_allocated)
2843                return;
2844
2845        if (alloc_percpu_trace_buffer())
2846                return;
2847
2848        /* trace_printk() is for debug use only. Don't use it in production. */
2849
2850        pr_warn("\n");
2851        pr_warn("**********************************************************\n");
2852        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2853        pr_warn("**                                                      **\n");
2854        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2855        pr_warn("**                                                      **\n");
2856        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2857        pr_warn("** unsafe for production use.                           **\n");
2858        pr_warn("**                                                      **\n");
2859        pr_warn("** If you see this message and you are not debugging    **\n");
2860        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2861        pr_warn("**                                                      **\n");
2862        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2863        pr_warn("**********************************************************\n");
2864
2865        /* Expand the buffers to set size */
2866        tracing_update_buffers();
2867
2868        buffers_allocated = 1;
2869
2870        /*
2871         * trace_printk_init_buffers() can be called by modules.
2872         * If that happens, then we need to start cmdline recording
2873         * directly here. If the global_trace.buffer is already
2874         * allocated here, then this was called by module code.
2875         */
2876        if (global_trace.trace_buffer.buffer)
2877                tracing_start_cmdline_record();
2878}
2879
2880void trace_printk_start_comm(void)
2881{
2882        /* Start tracing comms if trace printk is set */
2883        if (!buffers_allocated)
2884                return;
2885        tracing_start_cmdline_record();
2886}
2887
2888static void trace_printk_start_stop_comm(int enabled)
2889{
2890        if (!buffers_allocated)
2891                return;
2892
2893        if (enabled)
2894                tracing_start_cmdline_record();
2895        else
2896                tracing_stop_cmdline_record();
2897}
2898
2899/**
2900 * trace_vbprintk - write binary msg to tracing buffer
2901 *
2902 */
2903int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2904{
2905        struct trace_event_call *call = &event_bprint;
2906        struct ring_buffer_event *event;
2907        struct ring_buffer *buffer;
2908        struct trace_array *tr = &global_trace;
2909        struct bprint_entry *entry;
2910        unsigned long flags;
2911        char *tbuffer;
2912        int len = 0, size, pc;
2913
2914        if (unlikely(tracing_selftest_running || tracing_disabled))
2915                return 0;
2916
2917        /* Don't pollute graph traces with trace_vprintk internals */
2918        pause_graph_tracing();
2919
2920        pc = preempt_count();
2921        preempt_disable_notrace();
2922
2923        tbuffer = get_trace_buf();
2924        if (!tbuffer) {
2925                len = 0;
2926                goto out_nobuffer;
2927        }
2928
2929        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2930
2931        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2932                goto out;
2933
2934        local_save_flags(flags);
2935        size = sizeof(*entry) + sizeof(u32) * len;
2936        buffer = tr->trace_buffer.buffer;
2937        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2938                                            flags, pc);
2939        if (!event)
2940                goto out;
2941        entry = ring_buffer_event_data(event);
2942        entry->ip                       = ip;
2943        entry->fmt                      = fmt;
2944
2945        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2946        if (!call_filter_check_discard(call, entry, buffer, event)) {
2947                __buffer_unlock_commit(buffer, event);
2948                ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2949        }
2950
2951out:
2952        put_trace_buf();
2953
2954out_nobuffer:
2955        preempt_enable_notrace();
2956        unpause_graph_tracing();
2957
2958        return len;
2959}
2960EXPORT_SYMBOL_GPL(trace_vbprintk);
2961
2962__printf(3, 0)
2963static int
2964__trace_array_vprintk(struct ring_buffer *buffer,
2965                      unsigned long ip, const char *fmt, va_list args)
2966{
2967        struct trace_event_call *call = &event_print;
2968        struct ring_buffer_event *event;
2969        int len = 0, size, pc;
2970        struct print_entry *entry;
2971        unsigned long flags;
2972        char *tbuffer;
2973
2974        if (tracing_disabled || tracing_selftest_running)
2975                return 0;
2976
2977        /* Don't pollute graph traces with trace_vprintk internals */
2978        pause_graph_tracing();
2979
2980        pc = preempt_count();
2981        preempt_disable_notrace();
2982
2983
2984        tbuffer = get_trace_buf();
2985        if (!tbuffer) {
2986                len = 0;
2987                goto out_nobuffer;
2988        }
2989
2990        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2991
2992        local_save_flags(flags);
2993        size = sizeof(*entry) + len + 1;
2994        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2995                                            flags, pc);
2996        if (!event)
2997                goto out;
2998        entry = ring_buffer_event_data(event);
2999        entry->ip = ip;
3000
3001        memcpy(&entry->buf, tbuffer, len + 1);
3002        if (!call_filter_check_discard(call, entry, buffer, event)) {
3003                __buffer_unlock_commit(buffer, event);
3004                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3005        }
3006
3007out:
3008        put_trace_buf();
3009
3010out_nobuffer:
3011        preempt_enable_notrace();
3012        unpause_graph_tracing();
3013
3014        return len;
3015}
3016
3017__printf(3, 0)
3018int trace_array_vprintk(struct trace_array *tr,
3019                        unsigned long ip, const char *fmt, va_list args)
3020{
3021        return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3022}
3023
3024__printf(3, 0)
3025int trace_array_printk(struct trace_array *tr,
3026                       unsigned long ip, const char *fmt, ...)
3027{
3028        int ret;
3029        va_list ap;
3030
3031        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3032                return 0;
3033
3034        va_start(ap, fmt);
3035        ret = trace_array_vprintk(tr, ip, fmt, ap);
3036        va_end(ap);
3037        return ret;
3038}
3039
3040__printf(3, 4)
3041int trace_array_printk_buf(struct ring_buffer *buffer,
3042                           unsigned long ip, const char *fmt, ...)
3043{
3044        int ret;
3045        va_list ap;
3046
3047        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3048                return 0;
3049
3050        va_start(ap, fmt);
3051        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3052        va_end(ap);
3053        return ret;
3054}
3055
3056__printf(2, 0)
3057int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3058{
3059        return trace_array_vprintk(&global_trace, ip, fmt, args);
3060}
3061EXPORT_SYMBOL_GPL(trace_vprintk);
3062
3063static void trace_iterator_increment(struct trace_iterator *iter)
3064{
3065        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3066
3067        iter->idx++;
3068        if (buf_iter)
3069                ring_buffer_read(buf_iter, NULL);
3070}
3071
3072static struct trace_entry *
3073peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3074                unsigned long *lost_events)
3075{
3076        struct ring_buffer_event *event;
3077        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3078
3079        if (buf_iter)
3080                event = ring_buffer_iter_peek(buf_iter, ts);
3081        else
3082                event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3083                                         lost_events);
3084
3085        if (event) {
3086                iter->ent_size = ring_buffer_event_length(event);
3087                return ring_buffer_event_data(event);
3088        }
3089        iter->ent_size = 0;
3090        return NULL;
3091}
3092
3093static struct trace_entry *
3094__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3095                  unsigned long *missing_events, u64 *ent_ts)
3096{
3097        struct ring_buffer *buffer = iter->trace_buffer->buffer;
3098        struct trace_entry *ent, *next = NULL;
3099        unsigned long lost_events = 0, next_lost = 0;
3100        int cpu_file = iter->cpu_file;
3101        u64 next_ts = 0, ts;
3102        int next_cpu = -1;
3103        int next_size = 0;
3104        int cpu;
3105
3106        /*
3107         * If we are in a per_cpu trace file, don't bother by iterating over
3108         * all cpu and peek directly.
3109         */
3110        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3111                if (ring_buffer_empty_cpu(buffer, cpu_file))
3112                        return NULL;
3113                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3114                if (ent_cpu)
3115                        *ent_cpu = cpu_file;
3116
3117                return ent;
3118        }
3119
3120        for_each_tracing_cpu(cpu) {
3121
3122                if (ring_buffer_empty_cpu(buffer, cpu))
3123                        continue;
3124
3125                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3126
3127                /*
3128                 * Pick the entry with the smallest timestamp:
3129                 */
3130                if (ent && (!next || ts < next_ts)) {
3131                        next = ent;
3132                        next_cpu = cpu;
3133                        next_ts = ts;
3134                        next_lost = lost_events;
3135                        next_size = iter->ent_size;
3136                }
3137        }
3138
3139        iter->ent_size = next_size;
3140
3141        if (ent_cpu)
3142                *ent_cpu = next_cpu;
3143
3144        if (ent_ts)
3145                *ent_ts = next_ts;
3146
3147        if (missing_events)
3148                *missing_events = next_lost;
3149
3150        return next;
3151}
3152
3153/* Find the next real entry, without updating the iterator itself */
3154struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3155                                          int *ent_cpu, u64 *ent_ts)
3156{
3157        return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3158}
3159
3160/* Find the next real entry, and increment the iterator to the next entry */
3161void *trace_find_next_entry_inc(struct trace_iterator *iter)
3162{
3163        iter->ent = __find_next_entry(iter, &iter->cpu,
3164                                      &iter->lost_events, &iter->ts);
3165
3166        if (iter->ent)
3167                trace_iterator_increment(iter);
3168
3169        return iter->ent ? iter : NULL;
3170}
3171
3172static void trace_consume(struct trace_iterator *iter)
3173{
3174        ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3175                            &iter->lost_events);
3176}
3177
3178static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3179{
3180        struct trace_iterator *iter = m->private;
3181        int i = (int)*pos;
3182        void *ent;
3183
3184        WARN_ON_ONCE(iter->leftover);
3185
3186        (*pos)++;
3187
3188        /* can't go backwards */
3189        if (iter->idx > i)
3190                return NULL;
3191
3192        if (iter->idx < 0)
3193                ent = trace_find_next_entry_inc(iter);
3194        else
3195                ent = iter;
3196
3197        while (ent && iter->idx < i)
3198                ent = trace_find_next_entry_inc(iter);
3199
3200        iter->pos = *pos;
3201
3202        return ent;
3203}
3204
3205void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3206{
3207        struct ring_buffer_event *event;
3208        struct ring_buffer_iter *buf_iter;
3209        unsigned long entries = 0;
3210        u64 ts;
3211
3212        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3213
3214        buf_iter = trace_buffer_iter(iter, cpu);
3215        if (!buf_iter)
3216                return;
3217
3218        ring_buffer_iter_reset(buf_iter);
3219
3220        /*
3221         * We could have the case with the max latency tracers
3222         * that a reset never took place on a cpu. This is evident
3223         * by the timestamp being before the start of the buffer.
3224         */
3225        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3226                if (ts >= iter->trace_buffer->time_start)
3227                        break;
3228                entries++;
3229                ring_buffer_read(buf_iter, NULL);
3230        }
3231
3232        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3233}
3234
3235/*
3236 * The current tracer is copied to avoid a global locking
3237 * all around.
3238 */
3239static void *s_start(struct seq_file *m, loff_t *pos)
3240{
3241        struct trace_iterator *iter = m->private;
3242        struct trace_array *tr = iter->tr;
3243        int cpu_file = iter->cpu_file;
3244        void *p = NULL;
3245        loff_t l = 0;
3246        int cpu;
3247
3248        /*
3249         * copy the tracer to avoid using a global lock all around.
3250         * iter->trace is a copy of current_trace, the pointer to the
3251         * name may be used instead of a strcmp(), as iter->trace->name
3252         * will point to the same string as current_trace->name.
3253         */
3254        mutex_lock(&trace_types_lock);
3255        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3256                *iter->trace = *tr->current_trace;
3257        mutex_unlock(&trace_types_lock);
3258
3259#ifdef CONFIG_TRACER_MAX_TRACE
3260        if (iter->snapshot && iter->trace->use_max_tr)
3261                return ERR_PTR(-EBUSY);
3262#endif
3263
3264        if (!iter->snapshot)
3265                atomic_inc(&trace_record_taskinfo_disabled);
3266
3267        if (*pos != iter->pos) {
3268                iter->ent = NULL;
3269                iter->cpu = 0;
3270                iter->idx = -1;
3271
3272                if (cpu_file == RING_BUFFER_ALL_CPUS) {
3273                        for_each_tracing_cpu(cpu)
3274                                tracing_iter_reset(iter, cpu);
3275                } else
3276                        tracing_iter_reset(iter, cpu_file);
3277
3278                iter->leftover = 0;
3279                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3280                        ;
3281
3282        } else {
3283                /*
3284                 * If we overflowed the seq_file before, then we want
3285                 * to just reuse the trace_seq buffer again.
3286                 */
3287                if (iter->leftover)
3288                        p = iter;
3289                else {
3290                        l = *pos - 1;
3291                        p = s_next(m, p, &l);
3292                }
3293        }
3294
3295        trace_event_read_lock();
3296        trace_access_lock(cpu_file);
3297        return p;
3298}
3299
3300static void s_stop(struct seq_file *m, void *p)
3301{
3302        struct trace_iterator *iter = m->private;
3303
3304#ifdef CONFIG_TRACER_MAX_TRACE
3305        if (iter->snapshot && iter->trace->use_max_tr)
3306                return;
3307#endif
3308
3309        if (!iter->snapshot)
3310                atomic_dec(&trace_record_taskinfo_disabled);
3311
3312        trace_access_unlock(iter->cpu_file);
3313        trace_event_read_unlock();
3314}
3315
3316static void
3317get_total_entries(struct trace_buffer *buf,
3318                  unsigned long *total, unsigned long *entries)
3319{
3320        unsigned long count;
3321        int cpu;
3322
3323        *total = 0;
3324        *entries = 0;
3325
3326        for_each_tracing_cpu(cpu) {
3327                count = ring_buffer_entries_cpu(buf->buffer, cpu);
3328                /*
3329                 * If this buffer has skipped entries, then we hold all
3330                 * entries for the trace and we need to ignore the
3331                 * ones before the time stamp.
3332                 */
3333                if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3334                        count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3335                        /* total is the same as the entries */
3336                        *total += count;
3337                } else
3338                        *total += count +
3339                                ring_buffer_overrun_cpu(buf->buffer, cpu);
3340                *entries += count;
3341        }
3342}
3343
3344static void print_lat_help_header(struct seq_file *m)
3345{
3346        seq_puts(m, "#                  _------=> CPU#            \n"
3347                    "#                 / _-----=> irqs-off        \n"
3348                    "#                | / _----=> need-resched    \n"
3349                    "#                || / _---=> hardirq/softirq \n"
3350                    "#                ||| / _--=> preempt-depth   \n"
3351                    "#                |||| /     delay            \n"
3352                    "#  cmd     pid   ||||| time  |   caller      \n"
3353                    "#     \\   /      |||||  \\    |   /         \n");
3354}
3355
3356static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3357{
3358        unsigned long total;
3359        unsigned long entries;
3360
3361        get_total_entries(buf, &total, &entries);
3362        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3363                   entries, total, num_online_cpus());
3364        seq_puts(m, "#\n");
3365}
3366
3367static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3368                                   unsigned int flags)
3369{
3370        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3371
3372        print_event_info(buf, m);
3373
3374        seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3375        seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3376}
3377
3378static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3379                                       unsigned int flags)
3380{
3381        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3382        const char tgid_space[] = "          ";
3383        const char space[] = "  ";
3384
3385        seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3386                   tgid ? tgid_space : space);
3387        seq_printf(m, "#                          %s / _----=> need-resched\n",
3388                   tgid ? tgid_space : space);
3389        seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3390                   tgid ? tgid_space : space);
3391        seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3392                   tgid ? tgid_space : space);
3393        seq_printf(m, "#                          %s||| /     delay\n",
3394                   tgid ? tgid_space : space);
3395        seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3396                   tgid ? "   TGID   " : space);
3397        seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3398                   tgid ? "     |    " : space);
3399}
3400
3401void
3402print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3403{
3404        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3405        struct trace_buffer *buf = iter->trace_buffer;
3406        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3407        struct tracer *type = iter->trace;
3408        unsigned long entries;
3409        unsigned long total;
3410        const char *name = "preemption";
3411
3412        name = type->name;
3413
3414        get_total_entries(buf, &total, &entries);
3415
3416        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3417                   name, UTS_RELEASE);
3418        seq_puts(m, "# -----------------------------------"
3419                 "---------------------------------\n");
3420        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3421                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3422                   nsecs_to_usecs(data->saved_latency),
3423                   entries,
3424                   total,
3425                   buf->cpu,
3426#if defined(CONFIG_PREEMPT_NONE)
3427                   "server",
3428#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3429                   "desktop",
3430#elif defined(CONFIG_PREEMPT)
3431                   "preempt",
3432#else
3433                   "unknown",
3434#endif
3435                   /* These are reserved for later use */
3436                   0, 0, 0, 0);
3437#ifdef CONFIG_SMP
3438        seq_printf(m, " #P:%d)\n", num_online_cpus());
3439#else
3440        seq_puts(m, ")\n");
3441#endif
3442        seq_puts(m, "#    -----------------\n");
3443        seq_printf(m, "#    | task: %.16s-%d "
3444                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3445                   data->comm, data->pid,
3446                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3447                   data->policy, data->rt_priority);
3448        seq_puts(m, "#    -----------------\n");
3449
3450        if (data->critical_start) {
3451                seq_puts(m, "#  => started at: ");
3452                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3453                trace_print_seq(m, &iter->seq);
3454                seq_puts(m, "\n#  => ended at:   ");
3455                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3456                trace_print_seq(m, &iter->seq);
3457                seq_puts(m, "\n#\n");
3458        }
3459
3460        seq_puts(m, "#\n");
3461}
3462
3463static void test_cpu_buff_start(struct trace_iterator *iter)
3464{
3465        struct trace_seq *s = &iter->seq;
3466        struct trace_array *tr = iter->tr;
3467
3468        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3469                return;
3470
3471        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3472                return;
3473
3474        if (cpumask_available(iter->started) &&
3475            cpumask_test_cpu(iter->cpu, iter->started))
3476                return;
3477
3478        if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3479                return;
3480
3481        if (cpumask_available(iter->started))
3482                cpumask_set_cpu(iter->cpu, iter->started);
3483
3484        /* Don't print started cpu buffer for the first entry of the trace */
3485        if (iter->idx > 1)
3486                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3487                                iter->cpu);
3488}
3489
3490static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3491{
3492        struct trace_array *tr = iter->tr;
3493        struct trace_seq *s = &iter->seq;
3494        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3495        struct trace_entry *entry;
3496        struct trace_event *event;
3497
3498        entry = iter->ent;
3499
3500        test_cpu_buff_start(iter);
3501
3502        event = ftrace_find_event(entry->type);
3503
3504        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3505                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3506                        trace_print_lat_context(iter);
3507                else
3508                        trace_print_context(iter);
3509        }
3510
3511        if (trace_seq_has_overflowed(s))
3512                return TRACE_TYPE_PARTIAL_LINE;
3513
3514        if (event)
3515                return event->funcs->trace(iter, sym_flags, event);
3516
3517        trace_seq_printf(s, "Unknown type %d\n", entry->type);
3518
3519        return trace_handle_return(s);
3520}
3521
3522static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3523{
3524        struct trace_array *tr = iter->tr;
3525        struct trace_seq *s = &iter->seq;
3526        struct trace_entry *entry;
3527        struct trace_event *event;
3528
3529        entry = iter->ent;
3530
3531        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3532                trace_seq_printf(s, "%d %d %llu ",
3533                                 entry->pid, iter->cpu, iter->ts);
3534
3535        if (trace_seq_has_overflowed(s))
3536                return TRACE_TYPE_PARTIAL_LINE;
3537
3538        event = ftrace_find_event(entry->type);
3539        if (event)
3540                return event->funcs->raw(iter, 0, event);
3541
3542        trace_seq_printf(s, "%d ?\n", entry->type);
3543
3544        return trace_handle_return(s);
3545}
3546
3547static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3548{
3549        struct trace_array *tr = iter->tr;
3550        struct trace_seq *s = &iter->seq;
3551        unsigned char newline = '\n';
3552        struct trace_entry *entry;
3553        struct trace_event *event;
3554
3555        entry = iter->ent;
3556
3557        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3558                SEQ_PUT_HEX_FIELD(s, entry->pid);
3559                SEQ_PUT_HEX_FIELD(s, iter->cpu);
3560                SEQ_PUT_HEX_FIELD(s, iter->ts);
3561                if (trace_seq_has_overflowed(s))
3562                        return TRACE_TYPE_PARTIAL_LINE;
3563        }
3564
3565        event = ftrace_find_event(entry->type);
3566        if (event) {
3567                enum print_line_t ret = event->funcs->hex(iter, 0, event);
3568                if (ret != TRACE_TYPE_HANDLED)
3569                        return ret;
3570        }
3571
3572        SEQ_PUT_FIELD(s, newline);
3573
3574        return trace_handle_return(s);
3575}
3576
3577static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3578{
3579        struct trace_array *tr = iter->tr;
3580        struct trace_seq *s = &iter->seq;
3581        struct trace_entry *entry;
3582        struct trace_event *event;
3583
3584        entry = iter->ent;
3585
3586        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3587                SEQ_PUT_FIELD(s, entry->pid);
3588                SEQ_PUT_FIELD(s, iter->cpu);
3589                SEQ_PUT_FIELD(s, iter->ts);
3590                if (trace_seq_has_overflowed(s))
3591                        return TRACE_TYPE_PARTIAL_LINE;
3592        }
3593
3594        event = ftrace_find_event(entry->type);
3595        return event ? event->funcs->binary(iter, 0, event) :
3596                TRACE_TYPE_HANDLED;
3597}
3598
3599int trace_empty(struct trace_iterator *iter)
3600{
3601        struct ring_buffer_iter *buf_iter;
3602        int cpu;
3603
3604        /* If we are looking at one CPU buffer, only check that one */
3605        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3606                cpu = iter->cpu_file;
3607                buf_iter = trace_buffer_iter(iter, cpu);
3608                if (buf_iter) {
3609                        if (!ring_buffer_iter_empty(buf_iter))
3610                                return 0;
3611                } else {
3612                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3613                                return 0;
3614                }
3615                return 1;
3616        }
3617
3618        for_each_tracing_cpu(cpu) {
3619                buf_iter = trace_buffer_iter(iter, cpu);
3620                if (buf_iter) {
3621                        if (!ring_buffer_iter_empty(buf_iter))
3622                                return 0;
3623                } else {
3624                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3625                                return 0;
3626                }
3627        }
3628
3629        return 1;
3630}
3631
3632/*  Called with trace_event_read_lock() held. */
3633enum print_line_t print_trace_line(struct trace_iterator *iter)
3634{
3635        struct trace_array *tr = iter->tr;
3636        unsigned long trace_flags = tr->trace_flags;
3637        enum print_line_t ret;
3638
3639        if (iter->lost_events) {
3640                trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3641                                 iter->cpu, iter->lost_events);
3642                if (trace_seq_has_overflowed(&iter->seq))
3643                        return TRACE_TYPE_PARTIAL_LINE;
3644        }
3645
3646        if (iter->trace && iter->trace->print_line) {
3647                ret = iter->trace->print_line(iter);
3648                if (ret != TRACE_TYPE_UNHANDLED)
3649                        return ret;
3650        }
3651
3652        if (iter->ent->type == TRACE_BPUTS &&
3653                        trace_flags & TRACE_ITER_PRINTK &&
3654                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3655                return trace_print_bputs_msg_only(iter);
3656
3657        if (iter->ent->type == TRACE_BPRINT &&
3658                        trace_flags & TRACE_ITER_PRINTK &&
3659                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3660                return trace_print_bprintk_msg_only(iter);
3661
3662        if (iter->ent->type == TRACE_PRINT &&
3663                        trace_flags & TRACE_ITER_PRINTK &&
3664                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3665                return trace_print_printk_msg_only(iter);
3666
3667        if (trace_flags & TRACE_ITER_BIN)
3668                return print_bin_fmt(iter);
3669
3670        if (trace_flags & TRACE_ITER_HEX)
3671                return print_hex_fmt(iter);
3672
3673        if (trace_flags & TRACE_ITER_RAW)
3674                return print_raw_fmt(iter);
3675
3676        return print_trace_fmt(iter);
3677}
3678
3679void trace_latency_header(struct seq_file *m)
3680{
3681        struct trace_iterator *iter = m->private;
3682        struct trace_array *tr = iter->tr;
3683
3684        /* print nothing if the buffers are empty */
3685        if (trace_empty(iter))
3686                return;
3687
3688        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3689                print_trace_header(m, iter);
3690
3691        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3692                print_lat_help_header(m);
3693}
3694
3695void trace_default_header(struct seq_file *m)
3696{
3697        struct trace_iterator *iter = m->private;
3698        struct trace_array *tr = iter->tr;
3699        unsigned long trace_flags = tr->trace_flags;
3700
3701        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3702                return;
3703
3704        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3705                /* print nothing if the buffers are empty */
3706                if (trace_empty(iter))
3707                        return;
3708                print_trace_header(m, iter);
3709                if (!(trace_flags & TRACE_ITER_VERBOSE))
3710                        print_lat_help_header(m);
3711        } else {
3712                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3713                        if (trace_flags & TRACE_ITER_IRQ_INFO)
3714                                print_func_help_header_irq(iter->trace_buffer,
3715                                                           m, trace_flags);
3716                        else
3717                                print_func_help_header(iter->trace_buffer, m,
3718                                                       trace_flags);
3719                }
3720        }
3721}
3722
3723static void test_ftrace_alive(struct seq_file *m)
3724{
3725        if (!ftrace_is_dead())
3726                return;
3727        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3728                    "#          MAY BE MISSING FUNCTION EVENTS\n");
3729}
3730
3731#ifdef CONFIG_TRACER_MAX_TRACE
3732static void show_snapshot_main_help(struct seq_file *m)
3733{
3734        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3735                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3736                    "#                      Takes a snapshot of the main buffer.\n"
3737                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3738                    "#                      (Doesn't have to be '2' works with any number that\n"
3739                    "#                       is not a '0' or '1')\n");
3740}
3741
3742static void show_snapshot_percpu_help(struct seq_file *m)
3743{
3744        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3745#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3746        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3747                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3748#else
3749        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3750                    "#                     Must use main snapshot file to allocate.\n");
3751#endif
3752        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3753                    "#                      (Doesn't have to be '2' works with any number that\n"
3754                    "#                       is not a '0' or '1')\n");
3755}
3756
3757static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3758{
3759        if (iter->tr->allocated_snapshot)
3760                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3761        else
3762                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3763
3764        seq_puts(m, "# Snapshot commands:\n");
3765        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3766                show_snapshot_main_help(m);
3767        else
3768                show_snapshot_percpu_help(m);
3769}
3770#else
3771/* Should never be called */
3772static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3773#endif
3774
3775static int s_show(struct seq_file *m, void *v)
3776{
3777        struct trace_iterator *iter = v;
3778        int ret;
3779
3780        if (iter->ent == NULL) {
3781                if (iter->tr) {
3782                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
3783                        seq_puts(m, "#\n");
3784                        test_ftrace_alive(m);
3785                }
3786                if (iter->snapshot && trace_empty(iter))
3787                        print_snapshot_help(m, iter);
3788                else if (iter->trace && iter->trace->print_header)
3789                        iter->trace->print_header(m);
3790                else
3791                        trace_default_header(m);
3792
3793        } else if (iter->leftover) {
3794                /*
3795                 * If we filled the seq_file buffer earlier, we
3796                 * want to just show it now.
3797                 */
3798                ret = trace_print_seq(m, &iter->seq);
3799
3800                /* ret should this time be zero, but you never know */
3801                iter->leftover = ret;
3802
3803        } else {
3804                print_trace_line(iter);
3805                ret = trace_print_seq(m, &iter->seq);
3806                /*
3807                 * If we overflow the seq_file buffer, then it will
3808                 * ask us for this data again at start up.
3809                 * Use that instead.
3810                 *  ret is 0 if seq_file write succeeded.
3811                 *        -1 otherwise.
3812                 */
3813                iter->leftover = ret;
3814        }
3815
3816        return 0;
3817}
3818
3819/*
3820 * Should be used after trace_array_get(), trace_types_lock
3821 * ensures that i_cdev was already initialized.
3822 */
3823static inline int tracing_get_cpu(struct inode *inode)
3824{
3825        if (inode->i_cdev) /* See trace_create_cpu_file() */
3826                return (long)inode->i_cdev - 1;
3827        return RING_BUFFER_ALL_CPUS;
3828}
3829
3830static const struct seq_operations tracer_seq_ops = {
3831        .start          = s_start,
3832        .next           = s_next,
3833        .stop           = s_stop,
3834        .show           = s_show,
3835};
3836
3837static struct trace_iterator *
3838__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3839{
3840        struct trace_array *tr = inode->i_private;
3841        struct trace_iterator *iter;
3842        int cpu;
3843
3844        if (tracing_disabled)
3845                return ERR_PTR(-ENODEV);
3846
3847        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3848        if (!iter)
3849                return ERR_PTR(-ENOMEM);
3850
3851        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3852                                    GFP_KERNEL);
3853        if (!iter->buffer_iter)
3854                goto release;
3855
3856        /*
3857         * We make a copy of the current tracer to avoid concurrent
3858         * changes on it while we are reading.
3859         */
3860        mutex_lock(&trace_types_lock);
3861        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3862        if (!iter->trace)
3863                goto fail;
3864
3865        *iter->trace = *tr->current_trace;
3866
3867        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3868                goto fail;
3869
3870        iter->tr = tr;
3871
3872#ifdef CONFIG_TRACER_MAX_TRACE
3873        /* Currently only the top directory has a snapshot */
3874        if (tr->current_trace->print_max || snapshot)
3875                iter->trace_buffer = &tr->max_buffer;
3876        else
3877#endif
3878                iter->trace_buffer = &tr->trace_buffer;
3879        iter->snapshot = snapshot;
3880        iter->pos = -1;
3881        iter->cpu_file = tracing_get_cpu(inode);
3882        mutex_init(&iter->mutex);
3883
3884        /* Notify the tracer early; before we stop tracing. */
3885        if (iter->trace && iter->trace->open)
3886                iter->trace->open(iter);
3887
3888        /* Annotate start of buffers if we had overruns */
3889        if (ring_buffer_overruns(iter->trace_buffer->buffer))
3890                iter->iter_flags |= TRACE_FILE_ANNOTATE;
3891
3892        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3893        if (trace_clocks[tr->clock_id].in_ns)
3894                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3895
3896        /* stop the trace while dumping if we are not opening "snapshot" */
3897        if (!iter->snapshot)
3898                tracing_stop_tr(tr);
3899
3900        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3901                for_each_tracing_cpu(cpu) {
3902                        iter->buffer_iter[cpu] =
3903                                ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3904                }
3905                ring_buffer_read_prepare_sync();
3906                for_each_tracing_cpu(cpu) {
3907                        ring_buffer_read_start(iter->buffer_iter[cpu]);
3908                        tracing_iter_reset(iter, cpu);
3909                }
3910        } else {
3911                cpu = iter->cpu_file;
3912                iter->buffer_iter[cpu] =
3913                        ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3914                ring_buffer_read_prepare_sync();
3915                ring_buffer_read_start(iter->buffer_iter[cpu]);
3916                tracing_iter_reset(iter, cpu);
3917        }
3918
3919        mutex_unlock(&trace_types_lock);
3920
3921        return iter;
3922
3923 fail:
3924        mutex_unlock(&trace_types_lock);
3925        kfree(iter->trace);
3926        kfree(iter->buffer_iter);
3927release:
3928        seq_release_private(inode, file);
3929        return ERR_PTR(-ENOMEM);
3930}
3931
3932int tracing_open_generic(struct inode *inode, struct file *filp)
3933{
3934        if (tracing_disabled)
3935                return -ENODEV;
3936
3937        filp->private_data = inode->i_private;
3938        return 0;
3939}
3940
3941bool tracing_is_disabled(void)
3942{
3943        return (tracing_disabled) ? true: false;
3944}
3945
3946/*
3947 * Open and update trace_array ref count.
3948 * Must have the current trace_array passed to it.
3949 */
3950static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3951{
3952        struct trace_array *tr = inode->i_private;
3953
3954        if (tracing_disabled)
3955                return -ENODEV;
3956
3957        if (trace_array_get(tr) < 0)
3958                return -ENODEV;
3959
3960        filp->private_data = inode->i_private;
3961
3962        return 0;
3963}
3964
3965static int tracing_release(struct inode *inode, struct file *file)
3966{
3967        struct trace_array *tr = inode->i_private;
3968        struct seq_file *m = file->private_data;
3969        struct trace_iterator *iter;
3970        int cpu;
3971
3972        if (!(file->f_mode & FMODE_READ)) {
3973                trace_array_put(tr);
3974                return 0;
3975        }
3976
3977        /* Writes do not use seq_file */
3978        iter = m->private;
3979        mutex_lock(&trace_types_lock);
3980
3981        for_each_tracing_cpu(cpu) {
3982                if (iter->buffer_iter[cpu])
3983                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
3984        }
3985
3986        if (iter->trace && iter->trace->close)
3987                iter->trace->close(iter);
3988
3989        if (!iter->snapshot)
3990                /* reenable tracing if it was previously enabled */
3991                tracing_start_tr(tr);
3992
3993        __trace_array_put(tr);
3994
3995        mutex_unlock(&trace_types_lock);
3996
3997        mutex_destroy(&iter->mutex);
3998        free_cpumask_var(iter->started);
3999        kfree(iter->trace);
4000        kfree(iter->buffer_iter);
4001        seq_release_private(inode, file);
4002
4003        return 0;
4004}
4005
4006static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4007{
4008        struct trace_array *tr = inode->i_private;
4009
4010        trace_array_put(tr);
4011        return 0;
4012}
4013
4014static int tracing_single_release_tr(struct inode *inode, struct file *file)
4015{
4016        struct trace_array *tr = inode->i_private;
4017
4018        trace_array_put(tr);
4019
4020        return single_release(inode, file);
4021}
4022
4023static int tracing_open(struct inode *inode, struct file *file)
4024{
4025        struct trace_array *tr = inode->i_private;
4026        struct trace_iterator *iter;
4027        int ret = 0;
4028
4029        if (trace_array_get(tr) < 0)
4030                return -ENODEV;
4031
4032        /* If this file was open for write, then erase contents */
4033        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4034                int cpu = tracing_get_cpu(inode);
4035                struct trace_buffer *trace_buf = &tr->trace_buffer;
4036
4037#ifdef CONFIG_TRACER_MAX_TRACE
4038                if (tr->current_trace->print_max)
4039                        trace_buf = &tr->max_buffer;
4040#endif
4041
4042                if (cpu == RING_BUFFER_ALL_CPUS)
4043                        tracing_reset_online_cpus(trace_buf);
4044                else
4045                        tracing_reset(trace_buf, cpu);
4046        }
4047
4048        if (file->f_mode & FMODE_READ) {
4049                iter = __tracing_open(inode, file, false);
4050                if (IS_ERR(iter))
4051                        ret = PTR_ERR(iter);
4052                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4053                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4054        }
4055
4056        if (ret < 0)
4057                trace_array_put(tr);
4058
4059        return ret;
4060}
4061
4062/*
4063 * Some tracers are not suitable for instance buffers.
4064 * A tracer is always available for the global array (toplevel)
4065 * or if it explicitly states that it is.
4066 */
4067static bool
4068trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4069{
4070        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4071}
4072
4073/* Find the next tracer that this trace array may use */
4074static struct tracer *
4075get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4076{
4077        while (t && !trace_ok_for_array(t, tr))
4078                t = t->next;
4079
4080        return t;
4081}
4082
4083static void *
4084t_next(struct seq_file *m, void *v, loff_t *pos)
4085{
4086        struct trace_array *tr = m->private;
4087        struct tracer *t = v;
4088
4089        (*pos)++;
4090
4091        if (t)
4092                t = get_tracer_for_array(tr, t->next);
4093
4094        return t;
4095}
4096
4097static void *t_start(struct seq_file *m, loff_t *pos)
4098{
4099        struct trace_array *tr = m->private;
4100        struct tracer *t;
4101        loff_t l = 0;
4102
4103        mutex_lock(&trace_types_lock);
4104
4105        t = get_tracer_for_array(tr, trace_types);
4106        for (; t && l < *pos; t = t_next(m, t, &l))
4107                        ;
4108
4109        return t;
4110}
4111
4112static void t_stop(struct seq_file *m, void *p)
4113{
4114        mutex_unlock(&trace_types_lock);
4115}
4116
4117static int t_show(struct seq_file *m, void *v)
4118{
4119        struct tracer *t = v;
4120
4121        if (!t)
4122                return 0;
4123
4124        seq_puts(m, t->name);
4125        if (t->next)
4126                seq_putc(m, ' ');
4127        else
4128                seq_putc(m, '\n');
4129
4130        return 0;
4131}
4132
4133static const struct seq_operations show_traces_seq_ops = {
4134        .start          = t_start,
4135        .next           = t_next,
4136        .stop           = t_stop,
4137        .show           = t_show,
4138};
4139
4140static int show_traces_open(struct inode *inode, struct file *file)
4141{
4142        struct trace_array *tr = inode->i_private;
4143        struct seq_file *m;
4144        int ret;
4145
4146        if (tracing_disabled)
4147                return -ENODEV;
4148
4149        ret = seq_open(file, &show_traces_seq_ops);
4150        if (ret)
4151                return ret;
4152
4153        m = file->private_data;
4154        m->private = tr;
4155
4156        return 0;
4157}
4158
4159static ssize_t
4160tracing_write_stub(struct file *filp, const char __user *ubuf,
4161                   size_t count, loff_t *ppos)
4162{
4163        return count;
4164}
4165
4166loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4167{
4168        int ret;
4169
4170        if (file->f_mode & FMODE_READ)
4171                ret = seq_lseek(file, offset, whence);
4172        else
4173                file->f_pos = ret = 0;
4174
4175        return ret;
4176}
4177
4178static const struct file_operations tracing_fops = {
4179        .open           = tracing_open,
4180        .read           = seq_read,
4181        .write          = tracing_write_stub,
4182        .llseek         = tracing_lseek,
4183        .release        = tracing_release,
4184};
4185
4186static const struct file_operations show_traces_fops = {
4187        .open           = show_traces_open,
4188        .read           = seq_read,
4189        .release        = seq_release,
4190        .llseek         = seq_lseek,
4191};
4192
4193static ssize_t
4194tracing_cpumask_read(struct file *filp, char __user *ubuf,
4195                     size_t count, loff_t *ppos)
4196{
4197        struct trace_array *tr = file_inode(filp)->i_private;
4198        char *mask_str;
4199        int len;
4200
4201        len = snprintf(NULL, 0, "%*pb\n",
4202                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4203        mask_str = kmalloc(len, GFP_KERNEL);
4204        if (!mask_str)
4205                return -ENOMEM;
4206
4207        len = snprintf(mask_str, len, "%*pb\n",
4208                       cpumask_pr_args(tr->tracing_cpumask));
4209        if (len >= count) {
4210                count = -EINVAL;
4211                goto out_err;
4212        }
4213        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4214
4215out_err:
4216        kfree(mask_str);
4217
4218        return count;
4219}
4220
4221static ssize_t
4222tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4223                      size_t count, loff_t *ppos)
4224{
4225        struct trace_array *tr = file_inode(filp)->i_private;
4226        cpumask_var_t tracing_cpumask_new;
4227        int err, cpu;
4228
4229        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4230                return -ENOMEM;
4231
4232        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4233        if (err)
4234                goto err_unlock;
4235
4236        local_irq_disable();
4237        arch_spin_lock(&tr->max_lock);
4238        for_each_tracing_cpu(cpu) {
4239                /*
4240                 * Increase/decrease the disabled counter if we are
4241                 * about to flip a bit in the cpumask:
4242                 */
4243                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4244                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4245                        atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4246                        ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4247                }
4248                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4249                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4250                        atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4251                        ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4252                }
4253        }
4254        arch_spin_unlock(&tr->max_lock);
4255        local_irq_enable();
4256
4257        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4258        free_cpumask_var(tracing_cpumask_new);
4259
4260        return count;
4261
4262err_unlock:
4263        free_cpumask_var(tracing_cpumask_new);
4264
4265        return err;
4266}
4267
4268static const struct file_operations tracing_cpumask_fops = {
4269        .open           = tracing_open_generic_tr,
4270        .read           = tracing_cpumask_read,
4271        .write          = tracing_cpumask_write,
4272        .release        = tracing_release_generic_tr,
4273        .llseek         = generic_file_llseek,
4274};
4275
4276static int tracing_trace_options_show(struct seq_file *m, void *v)
4277{
4278        struct tracer_opt *trace_opts;
4279        struct trace_array *tr = m->private;
4280        u32 tracer_flags;
4281        int i;
4282
4283        mutex_lock(&trace_types_lock);
4284        tracer_flags = tr->current_trace->flags->val;
4285        trace_opts = tr->current_trace->flags->opts;
4286
4287        for (i = 0; trace_options[i]; i++) {
4288                if (tr->trace_flags & (1 << i))
4289                        seq_printf(m, "%s\n", trace_options[i]);
4290                else
4291                        seq_printf(m, "no%s\n", trace_options[i]);
4292        }
4293
4294        for (i = 0; trace_opts[i].name; i++) {
4295                if (tracer_flags & trace_opts[i].bit)
4296                        seq_printf(m, "%s\n", trace_opts[i].name);
4297                else
4298                        seq_printf(m, "no%s\n", trace_opts[i].name);
4299        }
4300        mutex_unlock(&trace_types_lock);
4301
4302        return 0;
4303}
4304
4305static int __set_tracer_option(struct trace_array *tr,
4306                               struct tracer_flags *tracer_flags,
4307                               struct tracer_opt *opts, int neg)
4308{
4309        struct tracer *trace = tracer_flags->trace;
4310        int ret;
4311
4312        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4313        if (ret)
4314                return ret;
4315
4316        if (neg)
4317                tracer_flags->val &= ~opts->bit;
4318        else
4319                tracer_flags->val |= opts->bit;
4320        return 0;
4321}
4322
4323/* Try to assign a tracer specific option */
4324static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4325{
4326        struct tracer *trace = tr->current_trace;
4327        struct tracer_flags *tracer_flags = trace->flags;
4328        struct tracer_opt *opts = NULL;
4329        int i;
4330
4331        for (i = 0; tracer_flags->opts[i].name; i++) {
4332                opts = &tracer_flags->opts[i];
4333
4334                if (strcmp(cmp, opts->name) == 0)
4335                        return __set_tracer_option(tr, trace->flags, opts, neg);
4336        }
4337
4338        return -EINVAL;
4339}
4340
4341/* Some tracers require overwrite to stay enabled */
4342int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4343{
4344        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4345                return -1;
4346
4347        return 0;
4348}
4349
4350int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4351{
4352        /* do nothing if flag is already set */
4353        if (!!(tr->trace_flags & mask) == !!enabled)
4354                return 0;
4355
4356        /* Give the tracer a chance to approve the change */
4357        if (tr->current_trace->flag_changed)
4358                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4359                        return -EINVAL;
4360
4361        if (enabled)
4362                tr->trace_flags |= mask;
4363        else
4364                tr->trace_flags &= ~mask;
4365
4366        if (mask == TRACE_ITER_RECORD_CMD)
4367                trace_event_enable_cmd_record(enabled);
4368
4369        if (mask == TRACE_ITER_RECORD_TGID) {
4370                if (!tgid_map)
4371                        tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4372                                           sizeof(*tgid_map),
4373                                           GFP_KERNEL);
4374                if (!tgid_map) {
4375                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4376                        return -ENOMEM;
4377                }
4378
4379                trace_event_enable_tgid_record(enabled);
4380        }
4381
4382        if (mask == TRACE_ITER_EVENT_FORK)
4383                trace_event_follow_fork(tr, enabled);
4384
4385        if (mask == TRACE_ITER_FUNC_FORK)
4386                ftrace_pid_follow_fork(tr, enabled);
4387
4388        if (mask == TRACE_ITER_OVERWRITE) {
4389                ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4390#ifdef CONFIG_TRACER_MAX_TRACE
4391                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4392#endif
4393        }
4394
4395        if (mask == TRACE_ITER_PRINTK) {
4396                trace_printk_start_stop_comm(enabled);
4397                trace_printk_control(enabled);
4398        }
4399
4400        return 0;
4401}
4402
4403static int trace_set_options(struct trace_array *tr, char *option)
4404{
4405        char *cmp;
4406        int neg = 0;
4407        int ret;
4408        size_t orig_len = strlen(option);
4409
4410        cmp = strstrip(option);
4411
4412        if (strncmp(cmp, "no", 2) == 0) {
4413                neg = 1;
4414                cmp += 2;
4415        }
4416
4417        mutex_lock(&trace_types_lock);
4418
4419        ret = match_string(trace_options, -1, cmp);
4420        /* If no option could be set, test the specific tracer options */
4421        if (ret < 0)
4422                ret = set_tracer_option(tr, cmp, neg);
4423        else
4424                ret = set_tracer_flag(tr, 1 << ret, !neg);
4425
4426        mutex_unlock(&trace_types_lock);
4427
4428        /*
4429         * If the first trailing whitespace is replaced with '\0' by strstrip,
4430         * turn it back into a space.
4431         */
4432        if (orig_len > strlen(option))
4433                option[strlen(option)] = ' ';
4434
4435        return ret;
4436}
4437
4438static void __init apply_trace_boot_options(void)
4439{
4440        char *buf = trace_boot_options_buf;
4441        char *option;
4442
4443        while (true) {
4444                option = strsep(&buf, ",");
4445
4446                if (!option)
4447                        break;
4448
4449                if (*option)
4450                        trace_set_options(&global_trace, option);
4451
4452                /* Put back the comma to allow this to be called again */
4453                if (buf)
4454                        *(buf - 1) = ',';
4455        }
4456}
4457
4458static ssize_t
4459tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4460                        size_t cnt, loff_t *ppos)
4461{
4462        struct seq_file *m = filp->private_data;
4463        struct trace_array *tr = m->private;
4464        char buf[64];
4465        int ret;
4466
4467        if (cnt >= sizeof(buf))
4468                return -EINVAL;
4469
4470        if (copy_from_user(buf, ubuf, cnt))
4471                return -EFAULT;
4472
4473        buf[cnt] = 0;
4474
4475        ret = trace_set_options(tr, buf);
4476        if (ret < 0)
4477                return ret;
4478
4479        *ppos += cnt;
4480
4481        return cnt;
4482}
4483
4484static int tracing_trace_options_open(struct inode *inode, struct file *file)
4485{
4486        struct trace_array *tr = inode->i_private;
4487        int ret;
4488
4489        if (tracing_disabled)
4490                return -ENODEV;
4491
4492        if (trace_array_get(tr) < 0)
4493                return -ENODEV;
4494
4495        ret = single_open(file, tracing_trace_options_show, inode->i_private);
4496        if (ret < 0)
4497                trace_array_put(tr);
4498
4499        return ret;
4500}
4501
4502static const struct file_operations tracing_iter_fops = {
4503        .open           = tracing_trace_options_open,
4504        .read           = seq_read,
4505        .llseek         = seq_lseek,
4506        .release        = tracing_single_release_tr,
4507        .write          = tracing_trace_options_write,
4508};
4509
4510static const char readme_msg[] =
4511        "tracing mini-HOWTO:\n\n"
4512        "# echo 0 > tracing_on : quick way to disable tracing\n"
4513        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4514        " Important files:\n"
4515        "  trace\t\t\t- The static contents of the buffer\n"
4516        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4517        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4518        "  current_tracer\t- function and latency tracers\n"
4519        "  available_tracers\t- list of configured tracers for current_tracer\n"
4520        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4521        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4522        "  trace_clock\t\t-change the clock used to order events\n"
4523        "       local:   Per cpu clock but may not be synced across CPUs\n"
4524        "      global:   Synced across CPUs but slows tracing down.\n"
4525        "     counter:   Not a clock, but just an increment\n"
4526        "      uptime:   Jiffy counter from time of boot\n"
4527        "        perf:   Same clock that perf events use\n"
4528#ifdef CONFIG_X86_64
4529        "     x86-tsc:   TSC cycle counter\n"
4530#endif
4531        "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4532        "       delta:   Delta difference against a buffer-wide timestamp\n"
4533        "    absolute:   Absolute (standalone) timestamp\n"
4534        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4535        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4536        "  tracing_cpumask\t- Limit which CPUs to trace\n"
4537        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4538        "\t\t\t  Remove sub-buffer with rmdir\n"
4539        "  trace_options\t\t- Set format or modify how tracing happens\n"
4540        "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4541        "\t\t\t  option name\n"
4542        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4543#ifdef CONFIG_DYNAMIC_FTRACE
4544        "\n  available_filter_functions - list of functions that can be filtered on\n"
4545        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4546        "\t\t\t  functions\n"
4547        "\t     accepts: func_full_name or glob-matching-pattern\n"
4548        "\t     modules: Can select a group via module\n"
4549        "\t      Format: :mod:<module-name>\n"
4550        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4551        "\t    triggers: a command to perform when function is hit\n"
4552        "\t      Format: <function>:<trigger>[:count]\n"
4553        "\t     trigger: traceon, traceoff\n"
4554        "\t\t      enable_event:<system>:<event>\n"
4555        "\t\t      disable_event:<system>:<event>\n"
4556#ifdef CONFIG_STACKTRACE
4557        "\t\t      stacktrace\n"
4558#endif
4559#ifdef CONFIG_TRACER_SNAPSHOT
4560        "\t\t      snapshot\n"
4561#endif
4562        "\t\t      dump\n"
4563        "\t\t      cpudump\n"
4564        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4565        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4566        "\t     The first one will disable tracing every time do_fault is hit\n"
4567        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4568        "\t       The first time do trap is hit and it disables tracing, the\n"
4569        "\t       counter will decrement to 2. If tracing is already disabled,\n"
4570        "\t       the counter will not decrement. It only decrements when the\n"
4571        "\t       trigger did work\n"
4572        "\t     To remove trigger without count:\n"
4573        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4574        "\t     To remove trigger with a count:\n"
4575        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4576        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4577        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4578        "\t    modules: Can select a group via module command :mod:\n"
4579        "\t    Does not accept triggers\n"
4580#endif /* CONFIG_DYNAMIC_FTRACE */
4581#ifdef CONFIG_FUNCTION_TRACER
4582        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4583        "\t\t    (function)\n"
4584#endif
4585#ifdef CONFIG_FUNCTION_GRAPH_TRACER
4586        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4587        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4588        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4589#endif
4590#ifdef CONFIG_TRACER_SNAPSHOT
4591        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4592        "\t\t\t  snapshot buffer. Read the contents for more\n"
4593        "\t\t\t  information\n"
4594#endif
4595#ifdef CONFIG_STACK_TRACER
4596        "  stack_trace\t\t- Shows the max stack trace when active\n"
4597        "  stack_max_size\t- Shows current max stack size that was traced\n"
4598        "\t\t\t  Write into this file to reset the max size (trigger a\n"
4599        "\t\t\t  new trace)\n"
4600#ifdef CONFIG_DYNAMIC_FTRACE
4601        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4602        "\t\t\t  traces\n"
4603#endif
4604#endif /* CONFIG_STACK_TRACER */
4605#ifdef CONFIG_KPROBE_EVENTS
4606        "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4607        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4608#endif
4609#ifdef CONFIG_UPROBE_EVENTS
4610        "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4611        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4612#endif
4613#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4614        "\t  accepts: event-definitions (one definition per line)\n"
4615        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4616        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4617        "\t           -:[<group>/]<event>\n"
4618#ifdef CONFIG_KPROBE_EVENTS
4619        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4620  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4621#endif
4622#ifdef CONFIG_UPROBE_EVENTS
4623  "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4624#endif
4625        "\t     args: <name>=fetcharg[:type]\n"
4626        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4627        "\t           $stack<index>, $stack, $retval, $comm\n"
4628        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4629        "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4630#endif
4631        "  events/\t\t- Directory containing all trace event subsystems:\n"
4632        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4633        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4634        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4635        "\t\t\t  events\n"
4636        "      filter\t\t- If set, only events passing filter are traced\n"
4637        "  events/<system>/<event>/\t- Directory containing control files for\n"
4638        "\t\t\t  <event>:\n"
4639        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4640        "      filter\t\t- If set, only events passing filter are traced\n"
4641        "      trigger\t\t- If set, a command to perform when event is hit\n"
4642        "\t    Format: <trigger>[:count][if <filter>]\n"
4643        "\t   trigger: traceon, traceoff\n"
4644        "\t            enable_event:<system>:<event>\n"
4645        "\t            disable_event:<system>:<event>\n"
4646#ifdef CONFIG_HIST_TRIGGERS
4647        "\t            enable_hist:<system>:<event>\n"
4648        "\t            disable_hist:<system>:<event>\n"
4649#endif
4650#ifdef CONFIG_STACKTRACE
4651        "\t\t    stacktrace\n"
4652#endif
4653#ifdef CONFIG_TRACER_SNAPSHOT
4654        "\t\t    snapshot\n"
4655#endif
4656#ifdef CONFIG_HIST_TRIGGERS
4657        "\t\t    hist (see below)\n"
4658#endif
4659        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4660        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4661        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4662        "\t                  events/block/block_unplug/trigger\n"
4663        "\t   The first disables tracing every time block_unplug is hit.\n"
4664        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4665        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4666        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4667        "\t   Like function triggers, the counter is only decremented if it\n"
4668        "\t    enabled or disabled tracing.\n"
4669        "\t   To remove a trigger without a count:\n"
4670        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4671        "\t   To remove a trigger with a count:\n"
4672        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4673        "\t   Filters can be ignored when removing a trigger.\n"
4674#ifdef CONFIG_HIST_TRIGGERS
4675        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4676        "\t    Format: hist:keys=<field1[,field2,...]>\n"
4677        "\t            [:values=<field1[,field2,...]>]\n"
4678        "\t            [:sort=<field1[,field2,...]>]\n"
4679        "\t            [:size=#entries]\n"
4680        "\t            [:pause][:continue][:clear]\n"
4681        "\t            [:name=histname1]\n"
4682        "\t            [if <filter>]\n\n"
4683        "\t    When a matching event is hit, an entry is added to a hash\n"
4684        "\t    table using the key(s) and value(s) named, and the value of a\n"
4685        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4686        "\t    correspond to fields in the event's format description.  Keys\n"
4687        "\t    can be any field, or the special string 'stacktrace'.\n"
4688        "\t    Compound keys consisting of up to two fields can be specified\n"
4689        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4690        "\t    fields.  Sort keys consisting of up to two fields can be\n"
4691        "\t    specified using the 'sort' keyword.  The sort direction can\n"
4692        "\t    be modified by appending '.descending' or '.ascending' to a\n"
4693        "\t    sort field.  The 'size' parameter can be used to specify more\n"
4694        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4695        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4696        "\t    its histogram data will be shared with other triggers of the\n"
4697        "\t    same name, and trigger hits will update this common data.\n\n"
4698        "\t    Reading the 'hist' file for the event will dump the hash\n"
4699        "\t    table in its entirety to stdout.  If there are multiple hist\n"
4700        "\t    triggers attached to an event, there will be a table for each\n"
4701        "\t    trigger in the output.  The table displayed for a named\n"
4702        "\t    trigger will be the same as any other instance having the\n"
4703        "\t    same name.  The default format used to display a given field\n"
4704        "\t    can be modified by appending any of the following modifiers\n"
4705        "\t    to the field name, as applicable:\n\n"
4706        "\t            .hex        display a number as a hex value\n"
4707        "\t            .sym        display an address as a symbol\n"
4708        "\t            .sym-offset display an address as a symbol and offset\n"
4709        "\t            .execname   display a common_pid as a program name\n"
4710        "\t            .syscall    display a syscall id as a syscall name\n"
4711        "\t            .log2       display log2 value rather than raw number\n"
4712        "\t            .usecs      display a common_timestamp in microseconds\n\n"
4713        "\t    The 'pause' parameter can be used to pause an existing hist\n"
4714        "\t    trigger or to start a hist trigger but not log any events\n"
4715        "\t    until told to do so.  'continue' can be used to start or\n"
4716        "\t    restart a paused hist trigger.\n\n"
4717        "\t    The 'clear' parameter will clear the contents of a running\n"
4718        "\t    hist trigger and leave its current paused/active state\n"
4719        "\t    unchanged.\n\n"
4720        "\t    The enable_hist and disable_hist triggers can be used to\n"
4721        "\t    have one event conditionally start and stop another event's\n"
4722        "\t    already-attached hist trigger.  The syntax is analagous to\n"
4723        "\t    the enable_event and disable_event triggers.\n"
4724#endif
4725;
4726
4727static ssize_t
4728tracing_readme_read(struct file *filp, char __user *ubuf,
4729                       size_t cnt, loff_t *ppos)
4730{
4731        return simple_read_from_buffer(ubuf, cnt, ppos,
4732                                        readme_msg, strlen(readme_msg));
4733}
4734
4735static const struct file_operations tracing_readme_fops = {
4736        .open           = tracing_open_generic,
4737        .read           = tracing_readme_read,
4738        .llseek         = generic_file_llseek,
4739};
4740
4741static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4742{
4743        int *ptr = v;
4744
4745        if (*pos || m->count)
4746                ptr++;
4747
4748        (*pos)++;
4749
4750        for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4751                if (trace_find_tgid(*ptr))
4752                        return ptr;
4753        }
4754
4755        return NULL;
4756}
4757
4758static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4759{
4760        void *v;
4761        loff_t l = 0;
4762
4763        if (!tgid_map)
4764                return NULL;
4765
4766        v = &tgid_map[0];
4767        while (l <= *pos) {
4768                v = saved_tgids_next(m, v, &l);
4769                if (!v)
4770                        return NULL;
4771        }
4772
4773        return v;
4774}
4775
4776static void saved_tgids_stop(struct seq_file *m, void *v)
4777{
4778}
4779
4780static int saved_tgids_show(struct seq_file *m, void *v)
4781{
4782        int pid = (int *)v - tgid_map;
4783
4784        seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4785        return 0;
4786}
4787
4788static const struct seq_operations tracing_saved_tgids_seq_ops = {
4789        .start          = saved_tgids_start,
4790        .stop           = saved_tgids_stop,
4791        .next           = saved_tgids_next,
4792        .show           = saved_tgids_show,
4793};
4794
4795static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4796{
4797        if (tracing_disabled)
4798                return -ENODEV;
4799
4800        return seq_open(filp, &tracing_saved_tgids_seq_ops);
4801}
4802
4803
4804static const struct file_operations tracing_saved_tgids_fops = {
4805        .open           = tracing_saved_tgids_open,
4806        .read           = seq_read,
4807        .llseek         = seq_lseek,
4808        .release        = seq_release,
4809};
4810
4811static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4812{
4813        unsigned int *ptr = v;
4814
4815        if (*pos || m->count)
4816                ptr++;
4817
4818        (*pos)++;
4819
4820        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4821             ptr++) {
4822                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4823                        continue;
4824
4825                return ptr;
4826        }
4827
4828        return NULL;
4829}
4830
4831static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4832{
4833        void *v;
4834        loff_t l = 0;
4835
4836        preempt_disable();
4837        arch_spin_lock(&trace_cmdline_lock);
4838
4839        v = &savedcmd->map_cmdline_to_pid[0];
4840        while (l <= *pos) {
4841                v = saved_cmdlines_next(m, v, &l);
4842                if (!v)
4843                        return NULL;
4844        }
4845
4846        return v;
4847}
4848
4849static void saved_cmdlines_stop(struct seq_file *m, void *v)
4850{
4851        arch_spin_unlock(&trace_cmdline_lock);
4852        preempt_enable();
4853}
4854
4855static int saved_cmdlines_show(struct seq_file *m, void *v)
4856{
4857        char buf[TASK_COMM_LEN];
4858        unsigned int *pid = v;
4859
4860        __trace_find_cmdline(*pid, buf);
4861        seq_printf(m, "%d %s\n", *pid, buf);
4862        return 0;
4863}
4864
4865static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4866        .start          = saved_cmdlines_start,
4867        .next           = saved_cmdlines_next,
4868        .stop           = saved_cmdlines_stop,
4869        .show           = saved_cmdlines_show,
4870};
4871
4872static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4873{
4874        if (tracing_disabled)
4875                return -ENODEV;
4876
4877        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4878}
4879
4880static const struct file_operations tracing_saved_cmdlines_fops = {
4881        .open           = tracing_saved_cmdlines_open,
4882        .read           = seq_read,
4883        .llseek         = seq_lseek,
4884        .release        = seq_release,
4885};
4886
4887static ssize_t
4888tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4889                                 size_t cnt, loff_t *ppos)
4890{
4891        char buf[64];
4892        int r;
4893
4894        arch_spin_lock(&trace_cmdline_lock);
4895        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4896        arch_spin_unlock(&trace_cmdline_lock);
4897
4898        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4899}
4900
4901static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4902{
4903        kfree(s->saved_cmdlines);
4904        kfree(s->map_cmdline_to_pid);
4905        kfree(s);
4906}
4907
4908static int tracing_resize_saved_cmdlines(unsigned int val)
4909{
4910        struct saved_cmdlines_buffer *s, *savedcmd_temp;
4911
4912        s = kmalloc(sizeof(*s), GFP_KERNEL);
4913        if (!s)
4914                return -ENOMEM;
4915
4916        if (allocate_cmdlines_buffer(val, s) < 0) {
4917                kfree(s);
4918                return -ENOMEM;
4919        }
4920
4921        arch_spin_lock(&trace_cmdline_lock);
4922        savedcmd_temp = savedcmd;
4923        savedcmd = s;
4924        arch_spin_unlock(&trace_cmdline_lock);
4925        free_saved_cmdlines_buffer(savedcmd_temp);
4926
4927        return 0;
4928}
4929
4930static ssize_t
4931tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4932                                  size_t cnt, loff_t *ppos)
4933{
4934        unsigned long val;
4935        int ret;
4936
4937        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4938        if (ret)
4939                return ret;
4940
4941        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4942        if (!val || val > PID_MAX_DEFAULT)
4943                return -EINVAL;
4944
4945        ret = tracing_resize_saved_cmdlines((unsigned int)val);
4946        if (ret < 0)
4947                return ret;
4948
4949        *ppos += cnt;
4950
4951        return cnt;
4952}
4953
4954static const struct file_operations tracing_saved_cmdlines_size_fops = {
4955        .open           = tracing_open_generic,
4956        .read           = tracing_saved_cmdlines_size_read,
4957        .write          = tracing_saved_cmdlines_size_write,
4958};
4959
4960#ifdef CONFIG_TRACE_EVAL_MAP_FILE
4961static union trace_eval_map_item *
4962update_eval_map(union trace_eval_map_item *ptr)
4963{
4964        if (!ptr->map.eval_string) {
4965                if (ptr->tail.next) {
4966                        ptr = ptr->tail.next;
4967                        /* Set ptr to the next real item (skip head) */
4968                        ptr++;
4969                } else
4970                        return NULL;
4971        }
4972        return ptr;
4973}
4974
4975static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4976{
4977        union trace_eval_map_item *ptr = v;
4978
4979        /*
4980         * Paranoid! If ptr points to end, we don't want to increment past it.
4981         * This really should never happen.
4982         */
4983        ptr = update_eval_map(ptr);
4984        if (WARN_ON_ONCE(!ptr))
4985                return NULL;
4986
4987        ptr++;
4988
4989        (*pos)++;
4990
4991        ptr = update_eval_map(ptr);
4992
4993        return ptr;
4994}
4995
4996static void *eval_map_start(struct seq_file *m, loff_t *pos)
4997{
4998        union trace_eval_map_item *v;
4999        loff_t l = 0;
5000
5001        mutex_lock(&trace_eval_mutex);
5002
5003        v = trace_eval_maps;
5004        if (v)
5005                v++;
5006
5007        while (v && l < *pos) {
5008                v = eval_map_next(m, v, &l);
5009        }
5010
5011        return v;
5012}
5013
5014static void eval_map_stop(struct seq_file *m, void *v)
5015{
5016        mutex_unlock(&trace_eval_mutex);
5017}
5018
5019static int eval_map_show(struct seq_file *m, void *v)
5020{
5021        union trace_eval_map_item *ptr = v;
5022
5023        seq_printf(m, "%s %ld (%s)\n",
5024                   ptr->map.eval_string, ptr->map.eval_value,
5025                   ptr->map.system);
5026
5027        return 0;
5028}
5029
5030static const struct seq_operations tracing_eval_map_seq_ops = {
5031        .start          = eval_map_start,
5032        .next           = eval_map_next,
5033        .stop           = eval_map_stop,
5034        .show           = eval_map_show,
5035};
5036
5037static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5038{
5039        if (tracing_disabled)
5040                return -ENODEV;
5041
5042        return seq_open(filp, &tracing_eval_map_seq_ops);
5043}
5044
5045static const struct file_operations tracing_eval_map_fops = {
5046        .open           = tracing_eval_map_open,
5047        .read           = seq_read,
5048        .llseek         = seq_lseek,
5049        .release        = seq_release,
5050};
5051
5052static inline union trace_eval_map_item *
5053trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5054{
5055        /* Return tail of array given the head */
5056        return ptr + ptr->head.length + 1;
5057}
5058
5059static void
5060trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5061                           int len)
5062{
5063        struct trace_eval_map **stop;
5064        struct trace_eval_map **map;
5065        union trace_eval_map_item *map_array;
5066        union trace_eval_map_item *ptr;
5067
5068        stop = start + len;
5069
5070        /*
5071         * The trace_eval_maps contains the map plus a head and tail item,
5072         * where the head holds the module and length of array, and the
5073         * tail holds a pointer to the next list.
5074         */
5075        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5076        if (!map_array) {
5077                pr_warn("Unable to allocate trace eval mapping\n");
5078                return;
5079        }
5080
5081        mutex_lock(&trace_eval_mutex);
5082
5083        if (!trace_eval_maps)
5084                trace_eval_maps = map_array;
5085        else {
5086                ptr = trace_eval_maps;
5087                for (;;) {
5088                        ptr = trace_eval_jmp_to_tail(ptr);
5089                        if (!ptr->tail.next)
5090                                break;
5091                        ptr = ptr->tail.next;
5092
5093                }
5094                ptr->tail.next = map_array;
5095        }
5096        map_array->head.mod = mod;
5097        map_array->head.length = len;
5098        map_array++;
5099
5100        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5101                map_array->map = **map;
5102                map_array++;
5103        }
5104        memset(map_array, 0, sizeof(*map_array));
5105
5106        mutex_unlock(&trace_eval_mutex);
5107}
5108
5109static void trace_create_eval_file(struct dentry *d_tracer)
5110{
5111        trace_create_file("eval_map", 0444, d_tracer,
5112                          NULL, &tracing_eval_map_fops);
5113}
5114
5115#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5116static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5117static inline void trace_insert_eval_map_file(struct module *mod,
5118                              struct trace_eval_map **start, int len) { }
5119#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5120
5121static void trace_insert_eval_map(struct module *mod,
5122                                  struct trace_eval_map **start, int len)
5123{
5124        struct trace_eval_map **map;
5125
5126        if (len <= 0)
5127                return;
5128
5129        map = start;
5130
5131        trace_event_eval_update(map, len);
5132
5133        trace_insert_eval_map_file(mod, start, len);
5134}
5135
5136static ssize_t
5137tracing_set_trace_read(struct file *filp, char __user *ubuf,
5138                       size_t cnt, loff_t *ppos)
5139{
5140        struct trace_array *tr = filp->private_data;
5141        char buf[MAX_TRACER_SIZE+2];
5142        int r;
5143
5144        mutex_lock(&trace_types_lock);
5145        r = sprintf(buf, "%s\n", tr->current_trace->name);
5146        mutex_unlock(&trace_types_lock);
5147
5148        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5149}
5150
5151int tracer_init(struct tracer *t, struct trace_array *tr)
5152{
5153        tracing_reset_online_cpus(&tr->trace_buffer);
5154        return t->init(tr);
5155}
5156
5157static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5158{
5159        int cpu;
5160
5161        for_each_tracing_cpu(cpu)
5162                per_cpu_ptr(buf->data, cpu)->entries = val;
5163}
5164
5165#ifdef CONFIG_TRACER_MAX_TRACE
5166/* resize @tr's buffer to the size of @size_tr's entries */
5167static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5168                                        struct trace_buffer *size_buf, int cpu_id)
5169{
5170        int cpu, ret = 0;
5171
5172        if (cpu_id == RING_BUFFER_ALL_CPUS) {
5173                for_each_tracing_cpu(cpu) {
5174                        ret = ring_buffer_resize(trace_buf->buffer,
5175                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5176                        if (ret < 0)
5177                                break;
5178                        per_cpu_ptr(trace_buf->data, cpu)->entries =
5179                                per_cpu_ptr(size_buf->data, cpu)->entries;
5180                }
5181        } else {
5182                ret = ring_buffer_resize(trace_buf->buffer,
5183                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5184                if (ret == 0)
5185                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5186                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
5187        }
5188
5189        return ret;
5190}
5191#endif /* CONFIG_TRACER_MAX_TRACE */
5192
5193static int __tracing_resize_ring_buffer(struct trace_array *tr,
5194                                        unsigned long size, int cpu)
5195{
5196        int ret;
5197
5198        /*
5199         * If kernel or user changes the size of the ring buffer
5200         * we use the size that was given, and we can forget about
5201         * expanding it later.
5202         */
5203        ring_buffer_expanded = true;
5204
5205        /* May be called before buffers are initialized */
5206        if (!tr->trace_buffer.buffer)
5207                return 0;
5208
5209        ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5210        if (ret < 0)
5211                return ret;
5212
5213#ifdef CONFIG_TRACER_MAX_TRACE
5214        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5215            !tr->current_trace->use_max_tr)
5216                goto out;
5217
5218        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5219        if (ret < 0) {
5220                int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5221                                                     &tr->trace_buffer, cpu);
5222                if (r < 0) {
5223                        /*
5224                         * AARGH! We are left with different
5225                         * size max buffer!!!!
5226                         * The max buffer is our "snapshot" buffer.
5227                         * When a tracer needs a snapshot (one of the
5228                         * latency tracers), it swaps the max buffer
5229                         * with the saved snap shot. We succeeded to
5230                         * update the size of the main buffer, but failed to
5231                         * update the size of the max buffer. But when we tried
5232                         * to reset the main buffer to the original size, we
5233                         * failed there too. This is very unlikely to
5234                         * happen, but if it does, warn and kill all
5235                         * tracing.
5236                         */
5237                        WARN_ON(1);
5238                        tracing_disabled = 1;
5239                }
5240                return ret;
5241        }
5242
5243        if (cpu == RING_BUFFER_ALL_CPUS)
5244                set_buffer_entries(&tr->max_buffer, size);
5245        else
5246                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5247
5248 out:
5249#endif /* CONFIG_TRACER_MAX_TRACE */
5250
5251        if (cpu == RING_BUFFER_ALL_CPUS)
5252                set_buffer_entries(&tr->trace_buffer, size);
5253        else
5254                per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5255
5256        return ret;
5257}
5258
5259static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5260                                          unsigned long size, int cpu_id)
5261{
5262        int ret = size;
5263
5264        mutex_lock(&trace_types_lock);
5265
5266        if (cpu_id != RING_BUFFER_ALL_CPUS) {
5267                /* make sure, this cpu is enabled in the mask */
5268                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5269                        ret = -EINVAL;
5270                        goto out;
5271                }
5272        }
5273
5274        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5275        if (ret < 0)
5276                ret = -ENOMEM;
5277
5278out:
5279        mutex_unlock(&trace_types_lock);
5280
5281        return ret;
5282}
5283
5284
5285/**
5286 * tracing_update_buffers - used by tracing facility to expand ring buffers
5287 *
5288 * To save on memory when the tracing is never used on a system with it
5289 * configured in. The ring buffers are set to a minimum size. But once
5290 * a user starts to use the tracing facility, then they need to grow
5291 * to their default size.
5292 *
5293 * This function is to be called when a tracer is about to be used.
5294 */
5295int tracing_update_buffers(void)
5296{
5297        int ret = 0;
5298
5299        mutex_lock(&trace_types_lock);
5300        if (!ring_buffer_expanded)
5301                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5302                                                RING_BUFFER_ALL_CPUS);
5303        mutex_unlock(&trace_types_lock);
5304
5305        return ret;
5306}
5307
5308struct trace_option_dentry;
5309
5310static void
5311create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5312
5313/*
5314 * Used to clear out the tracer before deletion of an instance.
5315 * Must have trace_types_lock held.
5316 */
5317static void tracing_set_nop(struct trace_array *tr)
5318{
5319        if (tr->current_trace == &nop_trace)
5320                return;
5321        
5322        tr->current_trace->enabled--;
5323
5324        if (tr->current_trace->reset)
5325                tr->current_trace->reset(tr);
5326
5327        tr->current_trace = &nop_trace;
5328}
5329
5330static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5331{
5332        /* Only enable if the directory has been created already. */
5333        if (!tr->dir)
5334                return;
5335
5336        create_trace_option_files(tr, t);
5337}
5338
5339static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5340{
5341        struct tracer *t;
5342#ifdef CONFIG_TRACER_MAX_TRACE
5343        bool had_max_tr;
5344#endif
5345        int ret = 0;
5346
5347        mutex_lock(&trace_types_lock);
5348
5349        if (!ring_buffer_expanded) {
5350                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5351                                                RING_BUFFER_ALL_CPUS);
5352                if (ret < 0)
5353                        goto out;
5354                ret = 0;
5355        }
5356
5357        for (t = trace_types; t; t = t->next) {
5358                if (strcmp(t->name, buf) == 0)
5359                        break;
5360        }
5361        if (!t) {
5362                ret = -EINVAL;
5363                goto out;
5364        }
5365        if (t == tr->current_trace)
5366                goto out;
5367
5368        /* Some tracers won't work on kernel command line */
5369        if (system_state < SYSTEM_RUNNING && t->noboot) {
5370                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5371                        t->name);
5372                goto out;
5373        }
5374
5375        /* Some tracers are only allowed for the top level buffer */
5376        if (!trace_ok_for_array(t, tr)) {
5377                ret = -EINVAL;
5378                goto out;
5379        }
5380
5381        /* If trace pipe files are being read, we can't change the tracer */
5382        if (tr->current_trace->ref) {
5383                ret = -EBUSY;
5384                goto out;
5385        }
5386
5387        trace_branch_disable();
5388
5389        tr->current_trace->enabled--;
5390
5391        if (tr->current_trace->reset)
5392                tr->current_trace->reset(tr);
5393
5394        /* Current trace needs to be nop_trace before synchronize_sched */
5395        tr->current_trace = &nop_trace;
5396
5397#ifdef CONFIG_TRACER_MAX_TRACE
5398        had_max_tr = tr->allocated_snapshot;
5399
5400        if (had_max_tr && !t->use_max_tr) {
5401                /*
5402                 * We need to make sure that the update_max_tr sees that
5403                 * current_trace changed to nop_trace to keep it from
5404                 * swapping the buffers after we resize it.
5405                 * The update_max_tr is called from interrupts disabled
5406                 * so a synchronized_sched() is sufficient.
5407                 */
5408                synchronize_sched();
5409                free_snapshot(tr);
5410        }
5411#endif
5412
5413#ifdef CONFIG_TRACER_MAX_TRACE
5414        if (t->use_max_tr && !had_max_tr) {
5415                ret = tracing_alloc_snapshot_instance(tr);
5416                if (ret < 0)
5417                        goto out;
5418        }
5419#endif
5420
5421        if (t->init) {
5422                ret = tracer_init(t, tr);
5423                if (ret)
5424                        goto out;
5425        }
5426
5427        tr->current_trace = t;
5428        tr->current_trace->enabled++;
5429        trace_branch_enable(tr);
5430 out:
5431        mutex_unlock(&trace_types_lock);
5432
5433        return ret;
5434}
5435
5436static ssize_t
5437tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5438                        size_t cnt, loff_t *ppos)
5439{
5440        struct trace_array *tr = filp->private_data;
5441        char buf[MAX_TRACER_SIZE+1];
5442        int i;
5443        size_t ret;
5444        int err;
5445
5446        ret = cnt;
5447
5448        if (cnt > MAX_TRACER_SIZE)
5449                cnt = MAX_TRACER_SIZE;
5450
5451        if (copy_from_user(buf, ubuf, cnt))
5452                return -EFAULT;
5453
5454        buf[cnt] = 0;
5455
5456        /* strip ending whitespace. */
5457        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5458                buf[i] = 0;
5459
5460        err = tracing_set_tracer(tr, buf);
5461        if (err)
5462                return err;
5463
5464        *ppos += ret;
5465
5466        return ret;
5467}
5468
5469static ssize_t
5470tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5471                   size_t cnt, loff_t *ppos)
5472{
5473        char buf[64];
5474        int r;
5475
5476        r = snprintf(buf, sizeof(buf), "%ld\n",
5477                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5478        if (r > sizeof(buf))
5479                r = sizeof(buf);
5480        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5481}
5482
5483static ssize_t
5484tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5485                    size_t cnt, loff_t *ppos)
5486{
5487        unsigned long val;
5488        int ret;
5489
5490        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5491        if (ret)
5492                return ret;
5493
5494        *ptr = val * 1000;
5495
5496        return cnt;
5497}
5498
5499static ssize_t
5500tracing_thresh_read(struct file *filp, char __user *ubuf,
5501                    size_t cnt, loff_t *ppos)
5502{
5503        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5504}
5505
5506static ssize_t
5507tracing_thresh_write(struct file *filp, const char __user *ubuf,
5508                     size_t cnt, loff_t *ppos)
5509{
5510        struct trace_array *tr = filp->private_data;
5511        int ret;
5512
5513        mutex_lock(&trace_types_lock);
5514        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5515        if (ret < 0)
5516                goto out;
5517
5518        if (tr->current_trace->update_thresh) {
5519                ret = tr->current_trace->update_thresh(tr);
5520                if (ret < 0)
5521                        goto out;
5522        }
5523
5524        ret = cnt;
5525out:
5526        mutex_unlock(&trace_types_lock);
5527
5528        return ret;
5529}
5530
5531#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5532
5533static ssize_t
5534tracing_max_lat_read(struct file *filp, char __user *ubuf,
5535                     size_t cnt, loff_t *ppos)
5536{
5537        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5538}
5539
5540static ssize_t
5541tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5542                      size_t cnt, loff_t *ppos)
5543{
5544        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5545}
5546
5547#endif
5548
5549static int tracing_open_pipe(struct inode *inode, struct file *filp)
5550{
5551        struct trace_array *tr = inode->i_private;
5552        struct trace_iterator *iter;
5553        int ret = 0;
5554
5555        if (tracing_disabled)
5556                return -ENODEV;
5557
5558        if (trace_array_get(tr) < 0)
5559                return -ENODEV;
5560
5561        mutex_lock(&trace_types_lock);
5562
5563        /* create a buffer to store the information to pass to userspace */
5564        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5565        if (!iter) {
5566                ret = -ENOMEM;
5567                __trace_array_put(tr);
5568                goto out;
5569        }
5570
5571        trace_seq_init(&iter->seq);
5572        iter->trace = tr->current_trace;
5573
5574        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5575                ret = -ENOMEM;
5576                goto fail;
5577        }
5578
5579        /* trace pipe does not show start of buffer */
5580        cpumask_setall(iter->started);
5581
5582        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5583                iter->iter_flags |= TRACE_FILE_LAT_FMT;
5584
5585        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5586        if (trace_clocks[tr->clock_id].in_ns)
5587                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5588
5589        iter->tr = tr;
5590        iter->trace_buffer = &tr->trace_buffer;
5591        iter->cpu_file = tracing_get_cpu(inode);
5592        mutex_init(&iter->mutex);
5593        filp->private_data = iter;
5594
5595        if (iter->trace->pipe_open)
5596                iter->trace->pipe_open(iter);
5597
5598        nonseekable_open(inode, filp);
5599
5600        tr->current_trace->ref++;
5601out:
5602        mutex_unlock(&trace_types_lock);
5603        return ret;
5604
5605fail:
5606        kfree(iter->trace);
5607        kfree(iter);
5608        __trace_array_put(tr);
5609        mutex_unlock(&trace_types_lock);
5610        return ret;
5611}
5612
5613static int tracing_release_pipe(struct inode *inode, struct file *file)
5614{
5615        struct trace_iterator *iter = file->private_data;
5616        struct trace_array *tr = inode->i_private;
5617
5618        mutex_lock(&trace_types_lock);
5619
5620        tr->current_trace->ref--;
5621
5622        if (iter->trace->pipe_close)
5623                iter->trace->pipe_close(iter);
5624
5625        mutex_unlock(&trace_types_lock);
5626
5627        free_cpumask_var(iter->started);
5628        mutex_destroy(&iter->mutex);
5629        kfree(iter);
5630
5631        trace_array_put(tr);
5632
5633        return 0;
5634}
5635
5636static __poll_t
5637trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5638{
5639        struct trace_array *tr = iter->tr;
5640
5641        /* Iterators are static, they should be filled or empty */
5642        if (trace_buffer_iter(iter, iter->cpu_file))
5643                return EPOLLIN | EPOLLRDNORM;
5644
5645        if (tr->trace_flags & TRACE_ITER_BLOCK)
5646                /*
5647                 * Always select as readable when in blocking mode
5648                 */
5649                return EPOLLIN | EPOLLRDNORM;
5650        else
5651                return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5652                                             filp, poll_table);
5653}
5654
5655static __poll_t
5656tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5657{
5658        struct trace_iterator *iter = filp->private_data;
5659
5660        return trace_poll(iter, filp, poll_table);
5661}
5662
5663/* Must be called with iter->mutex held. */
5664static int tracing_wait_pipe(struct file *filp)
5665{
5666        struct trace_iterator *iter = filp->private_data;
5667        int ret;
5668
5669        while (trace_empty(iter)) {
5670
5671                if ((filp->f_flags & O_NONBLOCK)) {
5672                        return -EAGAIN;
5673                }
5674
5675                /*
5676                 * We block until we read something and tracing is disabled.
5677                 * We still block if tracing is disabled, but we have never
5678                 * read anything. This allows a user to cat this file, and
5679                 * then enable tracing. But after we have read something,
5680                 * we give an EOF when tracing is again disabled.
5681                 *
5682                 * iter->pos will be 0 if we haven't read anything.
5683                 */
5684                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5685                        break;
5686
5687                mutex_unlock(&iter->mutex);
5688
5689                ret = wait_on_pipe(iter, false);
5690
5691                mutex_lock(&iter->mutex);
5692
5693                if (ret)
5694                        return ret;
5695        }
5696
5697        return 1;
5698}
5699
5700/*
5701 * Consumer reader.
5702 */
5703static ssize_t
5704tracing_read_pipe(struct file *filp, char __user *ubuf,
5705                  size_t cnt, loff_t *ppos)
5706{
5707        struct trace_iterator *iter = filp->private_data;
5708        ssize_t sret;
5709
5710        /*
5711         * Avoid more than one consumer on a single file descriptor
5712         * This is just a matter of traces coherency, the ring buffer itself
5713         * is protected.
5714         */
5715        mutex_lock(&iter->mutex);
5716
5717        /* return any leftover data */
5718        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5719        if (sret != -EBUSY)
5720                goto out;
5721
5722        trace_seq_init(&iter->seq);
5723
5724        if (iter->trace->read) {
5725                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5726                if (sret)
5727                        goto out;
5728        }
5729
5730waitagain:
5731        sret = tracing_wait_pipe(filp);
5732        if (sret <= 0)
5733                goto out;
5734
5735        /* stop when tracing is finished */
5736        if (trace_empty(iter)) {
5737                sret = 0;
5738                goto out;
5739        }
5740
5741        if (cnt >= PAGE_SIZE)
5742                cnt = PAGE_SIZE - 1;
5743
5744        /* reset all but tr, trace, and overruns */
5745        memset(&iter->seq, 0,
5746               sizeof(struct trace_iterator) -
5747               offsetof(struct trace_iterator, seq));
5748        cpumask_clear(iter->started);
5749        iter->pos = -1;
5750
5751        trace_event_read_lock();
5752        trace_access_lock(iter->cpu_file);
5753        while (trace_find_next_entry_inc(iter) != NULL) {
5754                enum print_line_t ret;
5755                int save_len = iter->seq.seq.len;
5756
5757                ret = print_trace_line(iter);
5758                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5759                        /* don't print partial lines */
5760                        iter->seq.seq.len = save_len;
5761                        break;
5762                }
5763                if (ret != TRACE_TYPE_NO_CONSUME)
5764                        trace_consume(iter);
5765
5766                if (trace_seq_used(&iter->seq) >= cnt)
5767                        break;
5768
5769                /*
5770                 * Setting the full flag means we reached the trace_seq buffer
5771                 * size and we should leave by partial output condition above.
5772                 * One of the trace_seq_* functions is not used properly.
5773                 */
5774                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5775                          iter->ent->type);
5776        }
5777        trace_access_unlock(iter->cpu_file);
5778        trace_event_read_unlock();
5779
5780        /* Now copy what we have to the user */
5781        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5782        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5783                trace_seq_init(&iter->seq);
5784
5785        /*
5786         * If there was nothing to send to user, in spite of consuming trace
5787         * entries, go back to wait for more entries.
5788         */
5789        if (sret == -EBUSY)
5790                goto waitagain;
5791
5792out:
5793        mutex_unlock(&iter->mutex);
5794
5795        return sret;
5796}
5797
5798static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5799                                     unsigned int idx)
5800{
5801        __free_page(spd->pages[idx]);
5802}
5803
5804static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5805        .can_merge              = 0,
5806        .confirm                = generic_pipe_buf_confirm,
5807        .release                = generic_pipe_buf_release,
5808        .steal                  = generic_pipe_buf_steal,
5809        .get                    = generic_pipe_buf_get,
5810};
5811
5812static size_t
5813tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5814{
5815        size_t count;
5816        int save_len;
5817        int ret;
5818
5819        /* Seq buffer is page-sized, exactly what we need. */
5820        for (;;) {
5821                save_len = iter->seq.seq.len;
5822                ret = print_trace_line(iter);
5823
5824                if (trace_seq_has_overflowed(&iter->seq)) {
5825                        iter->seq.seq.len = save_len;
5826                        break;
5827                }
5828
5829                /*
5830                 * This should not be hit, because it should only
5831                 * be set if the iter->seq overflowed. But check it
5832                 * anyway to be safe.
5833                 */
5834                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5835                        iter->seq.seq.len = save_len;
5836                        break;
5837                }
5838
5839                count = trace_seq_used(&iter->seq) - save_len;
5840                if (rem < count) {
5841                        rem = 0;
5842                        iter->seq.seq.len = save_len;
5843                        break;
5844                }
5845
5846                if (ret != TRACE_TYPE_NO_CONSUME)
5847                        trace_consume(iter);
5848                rem -= count;
5849                if (!trace_find_next_entry_inc(iter))   {
5850                        rem = 0;
5851                        iter->ent = NULL;
5852                        break;
5853                }
5854        }
5855
5856        return rem;
5857}
5858
5859static ssize_t tracing_splice_read_pipe(struct file *filp,
5860                                        loff_t *ppos,
5861                                        struct pipe_inode_info *pipe,
5862                                        size_t len,
5863                                        unsigned int flags)
5864{
5865        struct page *pages_def[PIPE_DEF_BUFFERS];
5866        struct partial_page partial_def[PIPE_DEF_BUFFERS];
5867        struct trace_iterator *iter = filp->private_data;
5868        struct splice_pipe_desc spd = {
5869                .pages          = pages_def,
5870                .partial        = partial_def,
5871                .nr_pages       = 0, /* This gets updated below. */
5872                .nr_pages_max   = PIPE_DEF_BUFFERS,
5873                .ops            = &tracing_pipe_buf_ops,
5874                .spd_release    = tracing_spd_release_pipe,
5875        };
5876        ssize_t ret;
5877        size_t rem;
5878        unsigned int i;
5879
5880        if (splice_grow_spd(pipe, &spd))
5881                return -ENOMEM;
5882
5883        mutex_lock(&iter->mutex);
5884
5885        if (iter->trace->splice_read) {
5886                ret = iter->trace->splice_read(iter, filp,
5887                                               ppos, pipe, len, flags);
5888                if (ret)
5889                        goto out_err;
5890        }
5891
5892        ret = tracing_wait_pipe(filp);
5893        if (ret <= 0)
5894                goto out_err;
5895
5896        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5897                ret = -EFAULT;
5898                goto out_err;
5899        }
5900
5901        trace_event_read_lock();
5902        trace_access_lock(iter->cpu_file);
5903
5904        /* Fill as many pages as possible. */
5905        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5906                spd.pages[i] = alloc_page(GFP_KERNEL);
5907                if (!spd.pages[i])
5908                        break;
5909
5910                rem = tracing_fill_pipe_page(rem, iter);
5911
5912                /* Copy the data into the page, so we can start over. */
5913                ret = trace_seq_to_buffer(&iter->seq,
5914                                          page_address(spd.pages[i]),
5915                                          trace_seq_used(&iter->seq));
5916                if (ret < 0) {
5917                        __free_page(spd.pages[i]);
5918                        break;
5919                }
5920                spd.partial[i].offset = 0;
5921                spd.partial[i].len = trace_seq_used(&iter->seq);
5922
5923                trace_seq_init(&iter->seq);
5924        }
5925
5926        trace_access_unlock(iter->cpu_file);
5927        trace_event_read_unlock();
5928        mutex_unlock(&iter->mutex);
5929
5930        spd.nr_pages = i;
5931
5932        if (i)
5933                ret = splice_to_pipe(pipe, &spd);
5934        else
5935                ret = 0;
5936out:
5937        splice_shrink_spd(&spd);
5938        return ret;
5939
5940out_err:
5941        mutex_unlock(&iter->mutex);
5942        goto out;
5943}
5944
5945static ssize_t
5946tracing_entries_read(struct file *filp, char __user *ubuf,
5947                     size_t cnt, loff_t *ppos)
5948{
5949        struct inode *inode = file_inode(filp);
5950        struct trace_array *tr = inode->i_private;
5951        int cpu = tracing_get_cpu(inode);
5952        char buf[64];
5953        int r = 0;
5954        ssize_t ret;
5955
5956        mutex_lock(&trace_types_lock);
5957
5958        if (cpu == RING_BUFFER_ALL_CPUS) {
5959                int cpu, buf_size_same;
5960                unsigned long size;
5961
5962                size = 0;
5963                buf_size_same = 1;
5964                /* check if all cpu sizes are same */
5965                for_each_tracing_cpu(cpu) {
5966                        /* fill in the size from first enabled cpu */
5967                        if (size == 0)
5968                                size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5969                        if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5970                                buf_size_same = 0;
5971                                break;
5972                        }
5973                }
5974
5975                if (buf_size_same) {
5976                        if (!ring_buffer_expanded)
5977                                r = sprintf(buf, "%lu (expanded: %lu)\n",
5978                                            size >> 10,
5979                                            trace_buf_size >> 10);
5980                        else
5981                                r = sprintf(buf, "%lu\n", size >> 10);
5982                } else
5983                        r = sprintf(buf, "X\n");
5984        } else
5985                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5986
5987        mutex_unlock(&trace_types_lock);
5988
5989        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5990        return ret;
5991}
5992
5993static ssize_t
5994tracing_entries_write(struct file *filp, const char __user *ubuf,
5995                      size_t cnt, loff_t *ppos)
5996{
5997        struct inode *inode = file_inode(filp);
5998        struct trace_array *tr = inode->i_private;
5999        unsigned long val;
6000        int ret;
6001
6002        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6003        if (ret)
6004                return ret;
6005
6006        /* must have at least 1 entry */
6007        if (!val)
6008                return -EINVAL;
6009
6010        /* value is in KB */
6011        val <<= 10;
6012        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6013        if (ret < 0)
6014                return ret;
6015
6016        *ppos += cnt;
6017
6018        return cnt;
6019}
6020
6021static ssize_t
6022tracing_total_entries_read(struct file *filp, char __user *ubuf,
6023                                size_t cnt, loff_t *ppos)
6024{
6025        struct trace_array *tr = filp->private_data;
6026        char buf[64];
6027        int r, cpu;
6028        unsigned long size = 0, expanded_size = 0;
6029
6030        mutex_lock(&trace_types_lock);
6031        for_each_tracing_cpu(cpu) {
6032                size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6033                if (!ring_buffer_expanded)
6034                        expanded_size += trace_buf_size >> 10;
6035        }
6036        if (ring_buffer_expanded)
6037                r = sprintf(buf, "%lu\n", size);
6038        else
6039                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6040        mutex_unlock(&trace_types_lock);
6041
6042        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6043}
6044
6045static ssize_t
6046tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6047                          size_t cnt, loff_t *ppos)
6048{
6049        /*
6050         * There is no need to read what the user has written, this function
6051         * is just to make sure that there is no error when "echo" is used
6052         */
6053
6054        *ppos += cnt;
6055
6056        return cnt;
6057}
6058
6059static int
6060tracing_free_buffer_release(struct inode *inode, struct file *filp)
6061{
6062        struct trace_array *tr = inode->i_private;
6063
6064        /* disable tracing ? */
6065        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6066                tracer_tracing_off(tr);
6067        /* resize the ring buffer to 0 */
6068        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6069
6070        trace_array_put(tr);
6071
6072        return 0;
6073}
6074
6075static ssize_t
6076tracing_mark_write(struct file *filp, const char __user *ubuf,
6077                                        size_t cnt, loff_t *fpos)
6078{
6079        struct trace_array *tr = filp->private_data;
6080        struct ring_buffer_event *event;
6081        enum event_trigger_type tt = ETT_NONE;
6082        struct ring_buffer *buffer;
6083        struct print_entry *entry;
6084        unsigned long irq_flags;
6085        const char faulted[] = "<faulted>";
6086        ssize_t written;
6087        int size;
6088        int len;
6089
6090/* Used in tracing_mark_raw_write() as well */
6091#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6092
6093        if (tracing_disabled)
6094                return -EINVAL;
6095
6096        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6097                return -EINVAL;
6098
6099        if (cnt > TRACE_BUF_SIZE)
6100                cnt = TRACE_BUF_SIZE;
6101
6102        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6103
6104        local_save_flags(irq_flags);
6105        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6106
6107        /* If less than "<faulted>", then make sure we can still add that */
6108        if (cnt < FAULTED_SIZE)
6109                size += FAULTED_SIZE - cnt;
6110
6111        buffer = tr->trace_buffer.buffer;
6112        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6113                                            irq_flags, preempt_count());
6114        if (unlikely(!event))
6115                /* Ring buffer disabled, return as if not open for write */
6116                return -EBADF;
6117
6118        entry = ring_buffer_event_data(event);
6119        entry->ip = _THIS_IP_;
6120
6121        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6122        if (len) {
6123                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6124                cnt = FAULTED_SIZE;
6125                written = -EFAULT;
6126        } else
6127                written = cnt;
6128        len = cnt;
6129
6130        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6131                /* do not add \n before testing triggers, but add \0 */
6132                entry->buf[cnt] = '\0';
6133                tt = event_triggers_call(tr->trace_marker_file, entry, event);
6134        }
6135
6136        if (entry->buf[cnt - 1] != '\n') {
6137                entry->buf[cnt] = '\n';
6138                entry->buf[cnt + 1] = '\0';
6139        } else
6140                entry->buf[cnt] = '\0';
6141
6142        __buffer_unlock_commit(buffer, event);
6143
6144        if (tt)
6145                event_triggers_post_call(tr->trace_marker_file, tt);
6146
6147        if (written > 0)
6148                *fpos += written;
6149
6150        return written;
6151}
6152
6153/* Limit it for now to 3K (including tag) */
6154#define RAW_DATA_MAX_SIZE (1024*3)
6155
6156static ssize_t
6157tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6158                                        size_t cnt, loff_t *fpos)
6159{
6160        struct trace_array *tr = filp->private_data;
6161        struct ring_buffer_event *event;
6162        struct ring_buffer *buffer;
6163        struct raw_data_entry *entry;
6164        const char faulted[] = "<faulted>";
6165        unsigned long irq_flags;
6166        ssize_t written;
6167        int size;
6168        int len;
6169
6170#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6171
6172        if (tracing_disabled)
6173                return -EINVAL;
6174
6175        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6176                return -EINVAL;
6177
6178        /* The marker must at least have a tag id */
6179        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6180                return -EINVAL;
6181
6182        if (cnt > TRACE_BUF_SIZE)
6183                cnt = TRACE_BUF_SIZE;
6184
6185        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6186
6187        local_save_flags(irq_flags);
6188        size = sizeof(*entry) + cnt;
6189        if (cnt < FAULT_SIZE_ID)
6190                size += FAULT_SIZE_ID - cnt;
6191
6192        buffer = tr->trace_buffer.buffer;
6193        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6194                                            irq_flags, preempt_count());
6195        if (!event)
6196                /* Ring buffer disabled, return as if not open for write */
6197                return -EBADF;
6198
6199        entry = ring_buffer_event_data(event);
6200
6201        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6202        if (len) {
6203                entry->id = -1;
6204                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6205                written = -EFAULT;
6206        } else
6207                written = cnt;
6208
6209        __buffer_unlock_commit(buffer, event);
6210
6211        if (written > 0)
6212                *fpos += written;
6213
6214        return written;
6215}
6216
6217static int tracing_clock_show(struct seq_file *m, void *v)
6218{
6219        struct trace_array *tr = m->private;
6220        int i;
6221
6222        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6223                seq_printf(m,
6224                        "%s%s%s%s", i ? " " : "",
6225                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6226                        i == tr->clock_id ? "]" : "");
6227        seq_putc(m, '\n');
6228
6229        return 0;
6230}
6231
6232int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6233{
6234        int i;
6235
6236        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6237                if (strcmp(trace_clocks[i].name, clockstr) == 0)
6238                        break;
6239        }
6240        if (i == ARRAY_SIZE(trace_clocks))
6241                return -EINVAL;
6242
6243        mutex_lock(&trace_types_lock);
6244
6245        tr->clock_id = i;
6246
6247        ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6248
6249        /*
6250         * New clock may not be consistent with the previous clock.
6251         * Reset the buffer so that it doesn't have incomparable timestamps.
6252         */
6253        tracing_reset_online_cpus(&tr->trace_buffer);
6254
6255#ifdef CONFIG_TRACER_MAX_TRACE
6256        if (tr->max_buffer.buffer)
6257                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6258        tracing_reset_online_cpus(&tr->max_buffer);
6259#endif
6260
6261        mutex_unlock(&trace_types_lock);
6262
6263        return 0;
6264}
6265
6266static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6267                                   size_t cnt, loff_t *fpos)
6268{
6269        struct seq_file *m = filp->private_data;
6270        struct trace_array *tr = m->private;
6271        char buf[64];
6272        const char *clockstr;
6273        int ret;
6274
6275        if (cnt >= sizeof(buf))
6276                return -EINVAL;
6277
6278        if (copy_from_user(buf, ubuf, cnt))
6279                return -EFAULT;
6280
6281        buf[cnt] = 0;
6282
6283        clockstr = strstrip(buf);
6284
6285        ret = tracing_set_clock(tr, clockstr);
6286        if (ret)
6287                return ret;
6288
6289        *fpos += cnt;
6290
6291        return cnt;
6292}
6293
6294static int tracing_clock_open(struct inode *inode, struct file *file)
6295{
6296        struct trace_array *tr = inode->i_private;
6297        int ret;
6298
6299        if (tracing_disabled)
6300                return -ENODEV;
6301
6302        if (trace_array_get(tr))
6303                return -ENODEV;
6304
6305        ret = single_open(file, tracing_clock_show, inode->i_private);
6306        if (ret < 0)
6307                trace_array_put(tr);
6308
6309        return ret;
6310}
6311
6312static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6313{
6314        struct trace_array *tr = m->private;
6315
6316        mutex_lock(&trace_types_lock);
6317
6318        if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6319                seq_puts(m, "delta [absolute]\n");
6320        else
6321                seq_puts(m, "[delta] absolute\n");
6322
6323        mutex_unlock(&trace_types_lock);
6324
6325        return 0;
6326}
6327
6328static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6329{
6330        struct trace_array *tr = inode->i_private;
6331        int ret;
6332
6333        if (tracing_disabled)
6334                return -ENODEV;
6335
6336        if (trace_array_get(tr))
6337                return -ENODEV;
6338
6339        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6340        if (ret < 0)
6341                trace_array_put(tr);
6342
6343        return ret;
6344}
6345
6346int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6347{
6348        int ret = 0;
6349
6350        mutex_lock(&trace_types_lock);
6351
6352        if (abs && tr->time_stamp_abs_ref++)
6353                goto out;
6354
6355        if (!abs) {
6356                if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6357                        ret = -EINVAL;
6358                        goto out;
6359                }
6360
6361                if (--tr->time_stamp_abs_ref)
6362                        goto out;
6363        }
6364
6365        ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6366
6367#ifdef CONFIG_TRACER_MAX_TRACE
6368        if (tr->max_buffer.buffer)
6369                ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6370#endif
6371 out:
6372        mutex_unlock(&trace_types_lock);
6373
6374        return ret;
6375}
6376
6377struct ftrace_buffer_info {
6378        struct trace_iterator   iter;
6379        void                    *spare;
6380        unsigned int            spare_cpu;
6381        unsigned int            read;
6382};
6383
6384#ifdef CONFIG_TRACER_SNAPSHOT
6385static int tracing_snapshot_open(struct inode *inode, struct file *file)
6386{
6387        struct trace_array *tr = inode->i_private;
6388        struct trace_iterator *iter;
6389        struct seq_file *m;
6390        int ret = 0;
6391
6392        if (trace_array_get(tr) < 0)
6393                return -ENODEV;
6394
6395        if (file->f_mode & FMODE_READ) {
6396                iter = __tracing_open(inode, file, true);
6397                if (IS_ERR(iter))
6398                        ret = PTR_ERR(iter);
6399        } else {
6400                /* Writes still need the seq_file to hold the private data */
6401                ret = -ENOMEM;
6402                m = kzalloc(sizeof(*m), GFP_KERNEL);
6403                if (!m)
6404                        goto out;
6405                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6406                if (!iter) {
6407                        kfree(m);
6408                        goto out;
6409                }
6410                ret = 0;
6411
6412                iter->tr = tr;
6413                iter->trace_buffer = &tr->max_buffer;
6414                iter->cpu_file = tracing_get_cpu(inode);
6415                m->private = iter;
6416                file->private_data = m;
6417        }
6418out:
6419        if (ret < 0)
6420                trace_array_put(tr);
6421
6422        return ret;
6423}
6424
6425static ssize_t
6426tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6427                       loff_t *ppos)
6428{
6429        struct seq_file *m = filp->private_data;
6430        struct trace_iterator *iter = m->private;
6431        struct trace_array *tr = iter->tr;
6432        unsigned long val;
6433        int ret;
6434
6435        ret = tracing_update_buffers();
6436        if (ret < 0)
6437                return ret;
6438
6439        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6440        if (ret)
6441                return ret;
6442
6443        mutex_lock(&trace_types_lock);
6444
6445        if (tr->current_trace->use_max_tr) {
6446                ret = -EBUSY;
6447                goto out;
6448        }
6449
6450        switch (val) {
6451        case 0:
6452                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6453                        ret = -EINVAL;
6454                        break;
6455                }
6456                if (tr->allocated_snapshot)
6457                        free_snapshot(tr);
6458                break;
6459        case 1:
6460/* Only allow per-cpu swap if the ring buffer supports it */
6461#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6462                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6463                        ret = -EINVAL;
6464                        break;
6465                }
6466#endif
6467                if (!tr->allocated_snapshot) {
6468                        ret = tracing_alloc_snapshot_instance(tr);
6469                        if (ret < 0)
6470                                break;
6471                }
6472                local_irq_disable();
6473                /* Now, we're going to swap */
6474                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6475                        update_max_tr(tr, current, smp_processor_id());
6476                else
6477                        update_max_tr_single(tr, current, iter->cpu_file);
6478                local_irq_enable();
6479                break;
6480        default:
6481                if (tr->allocated_snapshot) {
6482                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6483                                tracing_reset_online_cpus(&tr->max_buffer);
6484                        else
6485                                tracing_reset(&tr->max_buffer, iter->cpu_file);
6486                }
6487                break;
6488        }
6489
6490        if (ret >= 0) {
6491                *ppos += cnt;
6492                ret = cnt;
6493        }
6494out:
6495        mutex_unlock(&trace_types_lock);
6496        return ret;
6497}
6498
6499static int tracing_snapshot_release(struct inode *inode, struct file *file)
6500{
6501        struct seq_file *m = file->private_data;
6502        int ret;
6503
6504        ret = tracing_release(inode, file);
6505
6506        if (file->f_mode & FMODE_READ)
6507                return ret;
6508
6509        /* If write only, the seq_file is just a stub */
6510        if (m)
6511                kfree(m->private);
6512        kfree(m);
6513
6514        return 0;
6515}
6516
6517static int tracing_buffers_open(struct inode *inode, struct file *filp);
6518static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6519                                    size_t count, loff_t *ppos);
6520static int tracing_buffers_release(struct inode *inode, struct file *file);
6521static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6522                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6523
6524static int snapshot_raw_open(struct inode *inode, struct file *filp)
6525{
6526        struct ftrace_buffer_info *info;
6527        int ret;
6528
6529        ret = tracing_buffers_open(inode, filp);
6530        if (ret < 0)
6531                return ret;
6532
6533        info = filp->private_data;
6534
6535        if (info->iter.trace->use_max_tr) {
6536                tracing_buffers_release(inode, filp);
6537                return -EBUSY;
6538        }
6539
6540        info->iter.snapshot = true;
6541        info->iter.trace_buffer = &info->iter.tr->max_buffer;
6542
6543        return ret;
6544}
6545
6546#endif /* CONFIG_TRACER_SNAPSHOT */
6547
6548
6549static const struct file_operations tracing_thresh_fops = {
6550        .open           = tracing_open_generic,
6551        .read           = tracing_thresh_read,
6552        .write          = tracing_thresh_write,
6553        .llseek         = generic_file_llseek,
6554};
6555
6556#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6557static const struct file_operations tracing_max_lat_fops = {
6558        .open           = tracing_open_generic,
6559        .read           = tracing_max_lat_read,
6560        .write          = tracing_max_lat_write,
6561        .llseek         = generic_file_llseek,
6562};
6563#endif
6564
6565static const struct file_operations set_tracer_fops = {
6566        .open           = tracing_open_generic,
6567        .read           = tracing_set_trace_read,
6568        .write          = tracing_set_trace_write,
6569        .llseek         = generic_file_llseek,
6570};
6571
6572static const struct file_operations tracing_pipe_fops = {
6573        .open           = tracing_open_pipe,
6574        .poll           = tracing_poll_pipe,
6575        .read           = tracing_read_pipe,
6576        .splice_read    = tracing_splice_read_pipe,
6577        .release        = tracing_release_pipe,
6578        .llseek         = no_llseek,
6579};
6580
6581static const struct file_operations tracing_entries_fops = {
6582        .open           = tracing_open_generic_tr,
6583        .read           = tracing_entries_read,
6584        .write          = tracing_entries_write,
6585        .llseek         = generic_file_llseek,
6586        .release        = tracing_release_generic_tr,
6587};
6588
6589static const struct file_operations tracing_total_entries_fops = {
6590        .open           = tracing_open_generic_tr,
6591        .read           = tracing_total_entries_read,
6592        .llseek         = generic_file_llseek,
6593        .release        = tracing_release_generic_tr,
6594};
6595
6596static const struct file_operations tracing_free_buffer_fops = {
6597        .open           = tracing_open_generic_tr,
6598        .write          = tracing_free_buffer_write,
6599        .release        = tracing_free_buffer_release,
6600};
6601
6602static const struct file_operations tracing_mark_fops = {
6603        .open           = tracing_open_generic_tr,
6604        .write          = tracing_mark_write,
6605        .llseek         = generic_file_llseek,
6606        .release        = tracing_release_generic_tr,
6607};
6608
6609static const struct file_operations tracing_mark_raw_fops = {
6610        .open           = tracing_open_generic_tr,
6611        .write          = tracing_mark_raw_write,
6612        .llseek         = generic_file_llseek,
6613        .release        = tracing_release_generic_tr,
6614};
6615
6616static const struct file_operations trace_clock_fops = {
6617        .open           = tracing_clock_open,
6618        .read           = seq_read,
6619        .llseek         = seq_lseek,
6620        .release        = tracing_single_release_tr,
6621        .write          = tracing_clock_write,
6622};
6623
6624static const struct file_operations trace_time_stamp_mode_fops = {
6625        .open           = tracing_time_stamp_mode_open,
6626        .read           = seq_read,
6627        .llseek         = seq_lseek,
6628        .release        = tracing_single_release_tr,
6629};
6630
6631#ifdef CONFIG_TRACER_SNAPSHOT
6632static const struct file_operations snapshot_fops = {
6633        .open           = tracing_snapshot_open,
6634        .read           = seq_read,
6635        .write          = tracing_snapshot_write,
6636        .llseek         = tracing_lseek,
6637        .release        = tracing_snapshot_release,
6638};
6639
6640static const struct file_operations snapshot_raw_fops = {
6641        .open           = snapshot_raw_open,
6642        .read           = tracing_buffers_read,
6643        .release        = tracing_buffers_release,
6644        .splice_read    = tracing_buffers_splice_read,
6645        .llseek         = no_llseek,
6646};
6647
6648#endif /* CONFIG_TRACER_SNAPSHOT */
6649
6650static int tracing_buffers_open(struct inode *inode, struct file *filp)
6651{
6652        struct trace_array *tr = inode->i_private;
6653        struct ftrace_buffer_info *info;
6654        int ret;
6655
6656        if (tracing_disabled)
6657                return -ENODEV;
6658
6659        if (trace_array_get(tr) < 0)
6660                return -ENODEV;
6661
6662        info = kzalloc(sizeof(*info), GFP_KERNEL);
6663        if (!info) {
6664                trace_array_put(tr);
6665                return -ENOMEM;
6666        }
6667
6668        mutex_lock(&trace_types_lock);
6669
6670        info->iter.tr           = tr;
6671        info->iter.cpu_file     = tracing_get_cpu(inode);
6672        info->iter.trace        = tr->current_trace;
6673        info->iter.trace_buffer = &tr->trace_buffer;
6674        info->spare             = NULL;
6675        /* Force reading ring buffer for first read */
6676        info->read              = (unsigned int)-1;
6677
6678        filp->private_data = info;
6679
6680        tr->current_trace->ref++;
6681
6682        mutex_unlock(&trace_types_lock);
6683
6684        ret = nonseekable_open(inode, filp);
6685        if (ret < 0)
6686                trace_array_put(tr);
6687
6688        return ret;
6689}
6690
6691static __poll_t
6692tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6693{
6694        struct ftrace_buffer_info *info = filp->private_data;
6695        struct trace_iterator *iter = &info->iter;
6696
6697        return trace_poll(iter, filp, poll_table);
6698}
6699
6700static ssize_t
6701tracing_buffers_read(struct file *filp, char __user *ubuf,
6702                     size_t count, loff_t *ppos)
6703{
6704        struct ftrace_buffer_info *info = filp->private_data;
6705        struct trace_iterator *iter = &info->iter;
6706        ssize_t ret = 0;
6707        ssize_t size;
6708
6709        if (!count)
6710                return 0;
6711
6712#ifdef CONFIG_TRACER_MAX_TRACE
6713        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6714                return -EBUSY;
6715#endif
6716
6717        if (!info->spare) {
6718                info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6719                                                          iter->cpu_file);
6720                if (IS_ERR(info->spare)) {
6721                        ret = PTR_ERR(info->spare);
6722                        info->spare = NULL;
6723                } else {
6724                        info->spare_cpu = iter->cpu_file;
6725                }
6726        }
6727        if (!info->spare)
6728                return ret;
6729
6730        /* Do we have previous read data to read? */
6731        if (info->read < PAGE_SIZE)
6732                goto read;
6733
6734 again:
6735        trace_access_lock(iter->cpu_file);
6736        ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6737                                    &info->spare,
6738                                    count,
6739                                    iter->cpu_file, 0);
6740        trace_access_unlock(iter->cpu_file);
6741
6742        if (ret < 0) {
6743                if (trace_empty(iter)) {
6744                        if ((filp->f_flags & O_NONBLOCK))
6745                                return -EAGAIN;
6746
6747                        ret = wait_on_pipe(iter, false);
6748                        if (ret)
6749                                return ret;
6750
6751                        goto again;
6752                }
6753                return 0;
6754        }
6755
6756        info->read = 0;
6757 read:
6758        size = PAGE_SIZE - info->read;
6759        if (size > count)
6760                size = count;
6761
6762        ret = copy_to_user(ubuf, info->spare + info->read, size);
6763        if (ret == size)
6764                return -EFAULT;
6765
6766        size -= ret;
6767
6768        *ppos += size;
6769        info->read += size;
6770
6771        return size;
6772}
6773
6774static int tracing_buffers_release(struct inode *inode, struct file *file)
6775{
6776        struct ftrace_buffer_info *info = file->private_data;
6777        struct trace_iterator *iter = &info->iter;
6778
6779        mutex_lock(&trace_types_lock);
6780
6781        iter->tr->current_trace->ref--;
6782
6783        __trace_array_put(iter->tr);
6784
6785        if (info->spare)
6786                ring_buffer_free_read_page(iter->trace_buffer->buffer,
6787                                           info->spare_cpu, info->spare);
6788        kfree(info);
6789
6790        mutex_unlock(&trace_types_lock);
6791
6792        return 0;
6793}
6794
6795struct buffer_ref {
6796        struct ring_buffer      *buffer;
6797        void                    *page;
6798        int                     cpu;
6799        int                     ref;
6800};
6801
6802static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6803                                    struct pipe_buffer *buf)
6804{
6805        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6806
6807        if (--ref->ref)
6808                return;
6809
6810        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6811        kfree(ref);
6812        buf->private = 0;
6813}
6814
6815static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6816                                struct pipe_buffer *buf)
6817{
6818        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6819
6820        if (ref->ref > INT_MAX/2)
6821                return false;
6822
6823        ref->ref++;
6824        return true;
6825}
6826
6827/* Pipe buffer operations for a buffer. */
6828static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6829        .can_merge              = 0,
6830        .confirm                = generic_pipe_buf_confirm,
6831        .release                = buffer_pipe_buf_release,
6832        .steal                  = generic_pipe_buf_steal,
6833        .get                    = buffer_pipe_buf_get,
6834};
6835
6836/*
6837 * Callback from splice_to_pipe(), if we need to release some pages
6838 * at the end of the spd in case we error'ed out in filling the pipe.
6839 */
6840static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6841{
6842        struct buffer_ref *ref =
6843                (struct buffer_ref *)spd->partial[i].private;
6844
6845        if (--ref->ref)
6846                return;
6847
6848        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6849        kfree(ref);
6850        spd->partial[i].private = 0;
6851}
6852
6853static ssize_t
6854tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6855                            struct pipe_inode_info *pipe, size_t len,
6856                            unsigned int flags)
6857{
6858        struct ftrace_buffer_info *info = file->private_data;
6859        struct trace_iterator *iter = &info->iter;
6860        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6861        struct page *pages_def[PIPE_DEF_BUFFERS];
6862        struct splice_pipe_desc spd = {
6863                .pages          = pages_def,
6864                .partial        = partial_def,
6865                .nr_pages_max   = PIPE_DEF_BUFFERS,
6866                .ops            = &buffer_pipe_buf_ops,
6867                .spd_release    = buffer_spd_release,
6868        };
6869        struct buffer_ref *ref;
6870        int entries, i;
6871        ssize_t ret = 0;
6872
6873#ifdef CONFIG_TRACER_MAX_TRACE
6874        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6875                return -EBUSY;
6876#endif
6877
6878        if (*ppos & (PAGE_SIZE - 1))
6879                return -EINVAL;
6880
6881        if (len & (PAGE_SIZE - 1)) {
6882                if (len < PAGE_SIZE)
6883                        return -EINVAL;
6884                len &= PAGE_MASK;
6885        }
6886
6887        if (splice_grow_spd(pipe, &spd))
6888                return -ENOMEM;
6889
6890 again:
6891        trace_access_lock(iter->cpu_file);
6892        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6893
6894        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6895                struct page *page;
6896                int r;
6897
6898                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6899                if (!ref) {
6900                        ret = -ENOMEM;
6901                        break;
6902                }
6903
6904                ref->ref = 1;
6905                ref->buffer = iter->trace_buffer->buffer;
6906                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6907                if (IS_ERR(ref->page)) {
6908                        ret = PTR_ERR(ref->page);
6909                        ref->page = NULL;
6910                        kfree(ref);
6911                        break;
6912                }
6913                ref->cpu = iter->cpu_file;
6914
6915                r = ring_buffer_read_page(ref->buffer, &ref->page,
6916                                          len, iter->cpu_file, 1);
6917                if (r < 0) {
6918                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
6919                                                   ref->page);
6920                        kfree(ref);
6921                        break;
6922                }
6923
6924                page = virt_to_page(ref->page);
6925
6926                spd.pages[i] = page;
6927                spd.partial[i].len = PAGE_SIZE;
6928                spd.partial[i].offset = 0;
6929                spd.partial[i].private = (unsigned long)ref;
6930                spd.nr_pages++;
6931                *ppos += PAGE_SIZE;
6932
6933                entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6934        }
6935
6936        trace_access_unlock(iter->cpu_file);
6937        spd.nr_pages = i;
6938
6939        /* did we read anything? */
6940        if (!spd.nr_pages) {
6941                if (ret)
6942                        goto out;
6943
6944                ret = -EAGAIN;
6945                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6946                        goto out;
6947
6948                ret = wait_on_pipe(iter, true);
6949                if (ret)
6950                        goto out;
6951
6952                goto again;
6953        }
6954
6955        ret = splice_to_pipe(pipe, &spd);
6956out:
6957        splice_shrink_spd(&spd);
6958
6959        return ret;
6960}
6961
6962static const struct file_operations tracing_buffers_fops = {
6963        .open           = tracing_buffers_open,
6964        .read           = tracing_buffers_read,
6965        .poll           = tracing_buffers_poll,
6966        .release        = tracing_buffers_release,
6967        .splice_read    = tracing_buffers_splice_read,
6968        .llseek         = no_llseek,
6969};
6970
6971static ssize_t
6972tracing_stats_read(struct file *filp, char __user *ubuf,
6973                   size_t count, loff_t *ppos)
6974{
6975        struct inode *inode = file_inode(filp);
6976        struct trace_array *tr = inode->i_private;
6977        struct trace_buffer *trace_buf = &tr->trace_buffer;
6978        int cpu = tracing_get_cpu(inode);
6979        struct trace_seq *s;
6980        unsigned long cnt;
6981        unsigned long long t;
6982        unsigned long usec_rem;
6983
6984        s = kmalloc(sizeof(*s), GFP_KERNEL);
6985        if (!s)
6986                return -ENOMEM;
6987
6988        trace_seq_init(s);
6989
6990        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6991        trace_seq_printf(s, "entries: %ld\n", cnt);
6992
6993        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6994        trace_seq_printf(s, "overrun: %ld\n", cnt);
6995
6996        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6997        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6998
6999        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7000        trace_seq_printf(s, "bytes: %ld\n", cnt);
7001
7002        if (trace_clocks[tr->clock_id].in_ns) {
7003                /* local or global for trace_clock */
7004                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7005                usec_rem = do_div(t, USEC_PER_SEC);
7006                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7007                                                                t, usec_rem);
7008
7009                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7010                usec_rem = do_div(t, USEC_PER_SEC);
7011                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7012        } else {
7013                /* counter or tsc mode for trace_clock */
7014                trace_seq_printf(s, "oldest event ts: %llu\n",
7015                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7016
7017                trace_seq_printf(s, "now ts: %llu\n",
7018                                ring_buffer_time_stamp(trace_buf->buffer, cpu));
7019        }
7020
7021        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7022        trace_seq_printf(s, "dropped events: %ld\n", cnt);
7023
7024        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7025        trace_seq_printf(s, "read events: %ld\n", cnt);
7026
7027        count = simple_read_from_buffer(ubuf, count, ppos,
7028                                        s->buffer, trace_seq_used(s));
7029
7030        kfree(s);
7031
7032        return count;
7033}
7034
7035static const struct file_operations tracing_stats_fops = {
7036        .open           = tracing_open_generic_tr,
7037        .read           = tracing_stats_read,
7038        .llseek         = generic_file_llseek,
7039        .release        = tracing_release_generic_tr,
7040};
7041
7042#ifdef CONFIG_DYNAMIC_FTRACE
7043
7044static ssize_t
7045tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7046                  size_t cnt, loff_t *ppos)
7047{
7048        unsigned long *p = filp->private_data;
7049        char buf[64]; /* Not too big for a shallow stack */
7050        int r;
7051
7052        r = scnprintf(buf, 63, "%ld", *p);
7053        buf[r++] = '\n';
7054
7055        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7056}
7057
7058static const struct file_operations tracing_dyn_info_fops = {
7059        .open           = tracing_open_generic,
7060        .read           = tracing_read_dyn_info,
7061        .llseek         = generic_file_llseek,
7062};
7063#endif /* CONFIG_DYNAMIC_FTRACE */
7064
7065#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7066static void
7067ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7068                struct trace_array *tr, struct ftrace_probe_ops *ops,
7069                void *data)
7070{
7071        tracing_snapshot_instance(tr);
7072}
7073
7074static void
7075ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7076                      struct trace_array *tr, struct ftrace_probe_ops *ops,
7077                      void *data)
7078{
7079        struct ftrace_func_mapper *mapper = data;
7080        long *count = NULL;
7081
7082        if (mapper)
7083                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7084
7085        if (count) {
7086
7087                if (*count <= 0)
7088                        return;
7089
7090                (*count)--;
7091        }
7092
7093        tracing_snapshot_instance(tr);
7094}
7095
7096static int
7097ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7098                      struct ftrace_probe_ops *ops, void *data)
7099{
7100        struct ftrace_func_mapper *mapper = data;
7101        long *count = NULL;
7102
7103        seq_printf(m, "%ps:", (void *)ip);
7104
7105        seq_puts(m, "snapshot");
7106
7107        if (mapper)
7108                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7109
7110        if (count)
7111                seq_printf(m, ":count=%ld\n", *count);
7112        else
7113                seq_puts(m, ":unlimited\n");
7114
7115        return 0;
7116}
7117
7118static int
7119ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7120                     unsigned long ip, void *init_data, void **data)
7121{
7122        struct ftrace_func_mapper *mapper = *data;
7123
7124        if (!mapper) {
7125                mapper = allocate_ftrace_func_mapper();
7126                if (!mapper)
7127                        return -ENOMEM;
7128                *data = mapper;
7129        }
7130
7131        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7132}
7133
7134static void
7135ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7136                     unsigned long ip, void *data)
7137{
7138        struct ftrace_func_mapper *mapper = data;
7139
7140        if (!ip) {
7141                if (!mapper)
7142                        return;
7143                free_ftrace_func_mapper(mapper, NULL);
7144                return;
7145        }
7146
7147        ftrace_func_mapper_remove_ip(mapper, ip);
7148}
7149
7150static struct ftrace_probe_ops snapshot_probe_ops = {
7151        .func                   = ftrace_snapshot,
7152        .print                  = ftrace_snapshot_print,
7153};
7154
7155static struct ftrace_probe_ops snapshot_count_probe_ops = {
7156        .func                   = ftrace_count_snapshot,
7157        .print                  = ftrace_snapshot_print,
7158        .init                   = ftrace_snapshot_init,
7159        .free                   = ftrace_snapshot_free,
7160};
7161
7162static int
7163ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7164                               char *glob, char *cmd, char *param, int enable)
7165{
7166        struct ftrace_probe_ops *ops;
7167        void *count = (void *)-1;
7168        char *number;
7169        int ret;
7170
7171        if (!tr)
7172                return -ENODEV;
7173
7174        /* hash funcs only work with set_ftrace_filter */
7175        if (!enable)
7176                return -EINVAL;
7177
7178        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7179
7180        if (glob[0] == '!')
7181                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7182
7183        if (!param)
7184                goto out_reg;
7185
7186        number = strsep(&param, ":");
7187
7188        if (!strlen(number))
7189                goto out_reg;
7190
7191        /*
7192         * We use the callback data field (which is a pointer)
7193         * as our counter.
7194         */
7195        ret = kstrtoul(number, 0, (unsigned long *)&count);
7196        if (ret)
7197                return ret;
7198
7199 out_reg:
7200        ret = tracing_alloc_snapshot_instance(tr);
7201        if (ret < 0)
7202                goto out;
7203
7204        ret = register_ftrace_function_probe(glob, tr, ops, count);
7205
7206 out:
7207        return ret < 0 ? ret : 0;
7208}
7209
7210static struct ftrace_func_command ftrace_snapshot_cmd = {
7211        .name                   = "snapshot",
7212        .func                   = ftrace_trace_snapshot_callback,
7213};
7214
7215static __init int register_snapshot_cmd(void)
7216{
7217        return register_ftrace_command(&ftrace_snapshot_cmd);
7218}
7219#else
7220static inline __init int register_snapshot_cmd(void) { return 0; }
7221#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7222
7223static struct dentry *tracing_get_dentry(struct trace_array *tr)
7224{
7225        if (WARN_ON(!tr->dir))
7226                return ERR_PTR(-ENODEV);
7227
7228        /* Top directory uses NULL as the parent */
7229        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7230                return NULL;
7231
7232        /* All sub buffers have a descriptor */
7233        return tr->dir;
7234}
7235
7236static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7237{
7238        struct dentry *d_tracer;
7239
7240        if (tr->percpu_dir)
7241                return tr->percpu_dir;
7242
7243        d_tracer = tracing_get_dentry(tr);
7244        if (IS_ERR(d_tracer))
7245                return NULL;
7246
7247        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7248
7249        WARN_ONCE(!tr->percpu_dir,
7250                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7251
7252        return tr->percpu_dir;
7253}
7254
7255static struct dentry *
7256trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7257                      void *data, long cpu, const struct file_operations *fops)
7258{
7259        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7260
7261        if (ret) /* See tracing_get_cpu() */
7262                d_inode(ret)->i_cdev = (void *)(cpu + 1);
7263        return ret;
7264}
7265
7266static void
7267tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7268{
7269        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7270        struct dentry *d_cpu;
7271        char cpu_dir[30]; /* 30 characters should be more than enough */
7272
7273        if (!d_percpu)
7274                return;
7275
7276        snprintf(cpu_dir, 30, "cpu%ld", cpu);
7277        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7278        if (!d_cpu) {
7279                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7280                return;
7281        }
7282
7283        /* per cpu trace_pipe */
7284        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7285                                tr, cpu, &tracing_pipe_fops);
7286
7287        /* per cpu trace */
7288        trace_create_cpu_file("trace", 0644, d_cpu,
7289                                tr, cpu, &tracing_fops);
7290
7291        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7292                                tr, cpu, &tracing_buffers_fops);
7293
7294        trace_create_cpu_file("stats", 0444, d_cpu,
7295                                tr, cpu, &tracing_stats_fops);
7296
7297        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7298                                tr, cpu, &tracing_entries_fops);
7299
7300#ifdef CONFIG_TRACER_SNAPSHOT
7301        trace_create_cpu_file("snapshot", 0644, d_cpu,
7302                                tr, cpu, &snapshot_fops);
7303
7304        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7305                                tr, cpu, &snapshot_raw_fops);
7306#endif
7307}
7308
7309#ifdef CONFIG_FTRACE_SELFTEST
7310/* Let selftest have access to static functions in this file */
7311#include "trace_selftest.c"
7312#endif
7313
7314static ssize_t
7315trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7316                        loff_t *ppos)
7317{
7318        struct trace_option_dentry *topt = filp->private_data;
7319        char *buf;
7320
7321        if (topt->flags->val & topt->opt->bit)
7322                buf = "1\n";
7323        else
7324                buf = "0\n";
7325
7326        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7327}
7328
7329static ssize_t
7330trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7331                         loff_t *ppos)
7332{
7333        struct trace_option_dentry *topt = filp->private_data;
7334        unsigned long val;
7335        int ret;
7336
7337        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7338        if (ret)
7339                return ret;
7340
7341        if (val != 0 && val != 1)
7342                return -EINVAL;
7343
7344        if (!!(topt->flags->val & topt->opt->bit) != val) {
7345                mutex_lock(&trace_types_lock);
7346                ret = __set_tracer_option(topt->tr, topt->flags,
7347                                          topt->opt, !val);
7348                mutex_unlock(&trace_types_lock);
7349                if (ret)
7350                        return ret;
7351        }
7352
7353        *ppos += cnt;
7354
7355        return cnt;
7356}
7357
7358
7359static const struct file_operations trace_options_fops = {
7360        .open = tracing_open_generic,
7361        .read = trace_options_read,
7362        .write = trace_options_write,
7363        .llseek = generic_file_llseek,
7364};
7365
7366/*
7367 * In order to pass in both the trace_array descriptor as well as the index
7368 * to the flag that the trace option file represents, the trace_array
7369 * has a character array of trace_flags_index[], which holds the index
7370 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7371 * The address of this character array is passed to the flag option file
7372 * read/write callbacks.
7373 *
7374 * In order to extract both the index and the trace_array descriptor,
7375 * get_tr_index() uses the following algorithm.
7376 *
7377 *   idx = *ptr;
7378 *
7379 * As the pointer itself contains the address of the index (remember
7380 * index[1] == 1).
7381 *
7382 * Then to get the trace_array descriptor, by subtracting that index
7383 * from the ptr, we get to the start of the index itself.
7384 *
7385 *   ptr - idx == &index[0]
7386 *
7387 * Then a simple container_of() from that pointer gets us to the
7388 * trace_array descriptor.
7389 */
7390static void get_tr_index(void *data, struct trace_array **ptr,
7391                         unsigned int *pindex)
7392{
7393        *pindex = *(unsigned char *)data;
7394
7395        *ptr = container_of(data - *pindex, struct trace_array,
7396                            trace_flags_index);
7397}
7398
7399static ssize_t
7400trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7401                        loff_t *ppos)
7402{
7403        void *tr_index = filp->private_data;
7404        struct trace_array *tr;
7405        unsigned int index;
7406        char *buf;
7407
7408        get_tr_index(tr_index, &tr, &index);
7409
7410        if (tr->trace_flags & (1 << index))
7411                buf = "1\n";
7412        else
7413                buf = "0\n";
7414
7415        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7416}
7417
7418static ssize_t
7419trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7420                         loff_t *ppos)
7421{
7422        void *tr_index = filp->private_data;
7423        struct trace_array *tr;
7424        unsigned int index;
7425        unsigned long val;
7426        int ret;
7427
7428        get_tr_index(tr_index, &tr, &index);
7429
7430        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7431        if (ret)
7432                return ret;
7433
7434        if (val != 0 && val != 1)
7435                return -EINVAL;
7436
7437        mutex_lock(&trace_types_lock);
7438        ret = set_tracer_flag(tr, 1 << index, val);
7439        mutex_unlock(&trace_types_lock);
7440
7441        if (ret < 0)
7442                return ret;
7443
7444        *ppos += cnt;
7445
7446        return cnt;
7447}
7448
7449static const struct file_operations trace_options_core_fops = {
7450        .open = tracing_open_generic,
7451        .read = trace_options_core_read,
7452        .write = trace_options_core_write,
7453        .llseek = generic_file_llseek,
7454};
7455
7456struct dentry *trace_create_file(const char *name,
7457                                 umode_t mode,
7458                                 struct dentry *parent,
7459                                 void *data,
7460                                 const struct file_operations *fops)
7461{
7462        struct dentry *ret;
7463
7464        ret = tracefs_create_file(name, mode, parent, data, fops);
7465        if (!ret)
7466                pr_warn("Could not create tracefs '%s' entry\n", name);
7467
7468        return ret;
7469}
7470
7471
7472static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7473{
7474        struct dentry *d_tracer;
7475
7476        if (tr->options)
7477                return tr->options;
7478
7479        d_tracer = tracing_get_dentry(tr);
7480        if (IS_ERR(d_tracer))
7481                return NULL;
7482
7483        tr->options = tracefs_create_dir("options", d_tracer);
7484        if (!tr->options) {
7485                pr_warn("Could not create tracefs directory 'options'\n");
7486                return NULL;
7487        }
7488
7489        return tr->options;
7490}
7491
7492static void
7493create_trace_option_file(struct trace_array *tr,
7494                         struct trace_option_dentry *topt,
7495                         struct tracer_flags *flags,
7496                         struct tracer_opt *opt)
7497{
7498        struct dentry *t_options;
7499
7500        t_options = trace_options_init_dentry(tr);
7501        if (!t_options)
7502                return;
7503
7504        topt->flags = flags;
7505        topt->opt = opt;
7506        topt->tr = tr;
7507
7508        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7509                                    &trace_options_fops);
7510
7511}
7512
7513static void
7514create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7515{
7516        struct trace_option_dentry *topts;
7517        struct trace_options *tr_topts;
7518        struct tracer_flags *flags;
7519        struct tracer_opt *opts;
7520        int cnt;
7521        int i;
7522
7523        if (!tracer)
7524                return;
7525
7526        flags = tracer->flags;
7527
7528        if (!flags || !flags->opts)
7529                return;
7530
7531        /*
7532         * If this is an instance, only create flags for tracers
7533         * the instance may have.
7534         */
7535        if (!trace_ok_for_array(tracer, tr))
7536                return;
7537
7538        for (i = 0; i < tr->nr_topts; i++) {
7539                /* Make sure there's no duplicate flags. */
7540                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7541                        return;
7542        }
7543
7544        opts = flags->opts;
7545
7546        for (cnt = 0; opts[cnt].name; cnt++)
7547                ;
7548
7549        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7550        if (!topts)
7551                return;
7552
7553        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7554                            GFP_KERNEL);
7555        if (!tr_topts) {
7556                kfree(topts);
7557                return;
7558        }
7559
7560        tr->topts = tr_topts;
7561        tr->topts[tr->nr_topts].tracer = tracer;
7562        tr->topts[tr->nr_topts].topts = topts;
7563        tr->nr_topts++;
7564
7565        for (cnt = 0; opts[cnt].name; cnt++) {
7566                create_trace_option_file(tr, &topts[cnt], flags,
7567                                         &opts[cnt]);
7568                WARN_ONCE(topts[cnt].entry == NULL,
7569                          "Failed to create trace option: %s",
7570                          opts[cnt].name);
7571        }
7572}
7573
7574static struct dentry *
7575create_trace_option_core_file(struct trace_array *tr,
7576                              const char *option, long index)
7577{
7578        struct dentry *t_options;
7579
7580        t_options = trace_options_init_dentry(tr);
7581        if (!t_options)
7582                return NULL;
7583
7584        return trace_create_file(option, 0644, t_options,
7585                                 (void *)&tr->trace_flags_index[index],
7586                                 &trace_options_core_fops);
7587}
7588
7589static void create_trace_options_dir(struct trace_array *tr)
7590{
7591        struct dentry *t_options;
7592        bool top_level = tr == &global_trace;
7593        int i;
7594
7595        t_options = trace_options_init_dentry(tr);
7596        if (!t_options)
7597                return;
7598
7599        for (i = 0; trace_options[i]; i++) {
7600                if (top_level ||
7601                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7602                        create_trace_option_core_file(tr, trace_options[i], i);
7603        }
7604}
7605
7606static ssize_t
7607rb_simple_read(struct file *filp, char __user *ubuf,
7608               size_t cnt, loff_t *ppos)
7609{
7610        struct trace_array *tr = filp->private_data;
7611        char buf[64];
7612        int r;
7613
7614        r = tracer_tracing_is_on(tr);
7615        r = sprintf(buf, "%d\n", r);
7616
7617        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7618}
7619
7620static ssize_t
7621rb_simple_write(struct file *filp, const char __user *ubuf,
7622                size_t cnt, loff_t *ppos)
7623{
7624        struct trace_array *tr = filp->private_data;
7625        struct ring_buffer *buffer = tr->trace_buffer.buffer;
7626        unsigned long val;
7627        int ret;
7628
7629        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7630        if (ret)
7631                return ret;
7632
7633        if (buffer) {
7634                mutex_lock(&trace_types_lock);
7635                if (!!val == tracer_tracing_is_on(tr)) {
7636                        val = 0; /* do nothing */
7637                } else if (val) {
7638                        tracer_tracing_on(tr);
7639                        if (tr->current_trace->start)
7640                                tr->current_trace->start(tr);
7641                } else {
7642                        tracer_tracing_off(tr);
7643                        if (tr->current_trace->stop)
7644                                tr->current_trace->stop(tr);
7645                }
7646                mutex_unlock(&trace_types_lock);
7647        }
7648
7649        (*ppos)++;
7650
7651        return cnt;
7652}
7653
7654static const struct file_operations rb_simple_fops = {
7655        .open           = tracing_open_generic_tr,
7656        .read           = rb_simple_read,
7657        .write          = rb_simple_write,
7658        .release        = tracing_release_generic_tr,
7659        .llseek         = default_llseek,
7660};
7661
7662struct dentry *trace_instance_dir;
7663
7664static void
7665init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7666
7667static int
7668allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7669{
7670        enum ring_buffer_flags rb_flags;
7671
7672        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7673
7674        buf->tr = tr;
7675
7676        buf->buffer = ring_buffer_alloc(size, rb_flags);
7677        if (!buf->buffer)
7678                return -ENOMEM;
7679
7680        buf->data = alloc_percpu(struct trace_array_cpu);
7681        if (!buf->data) {
7682                ring_buffer_free(buf->buffer);
7683                buf->buffer = NULL;
7684                return -ENOMEM;
7685        }
7686
7687        /* Allocate the first page for all buffers */
7688        set_buffer_entries(&tr->trace_buffer,
7689                           ring_buffer_size(tr->trace_buffer.buffer, 0));
7690
7691        return 0;
7692}
7693
7694static int allocate_trace_buffers(struct trace_array *tr, int size)
7695{
7696        int ret;
7697
7698        ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7699        if (ret)
7700                return ret;
7701
7702#ifdef CONFIG_TRACER_MAX_TRACE
7703        ret = allocate_trace_buffer(tr, &tr->max_buffer,
7704                                    allocate_snapshot ? size : 1);
7705        if (WARN_ON(ret)) {
7706                ring_buffer_free(tr->trace_buffer.buffer);
7707                tr->trace_buffer.buffer = NULL;
7708                free_percpu(tr->trace_buffer.data);
7709                tr->trace_buffer.data = NULL;
7710                return -ENOMEM;
7711        }
7712        tr->allocated_snapshot = allocate_snapshot;
7713
7714        /*
7715         * Only the top level trace array gets its snapshot allocated
7716         * from the kernel command line.
7717         */
7718        allocate_snapshot = false;
7719#endif
7720        return 0;
7721}
7722
7723static void free_trace_buffer(struct trace_buffer *buf)
7724{
7725        if (buf->buffer) {
7726                ring_buffer_free(buf->buffer);
7727                buf->buffer = NULL;
7728                free_percpu(buf->data);
7729                buf->data = NULL;
7730        }
7731}
7732
7733static void free_trace_buffers(struct trace_array *tr)
7734{
7735        if (!tr)
7736                return;
7737
7738        free_trace_buffer(&tr->trace_buffer);
7739
7740#ifdef CONFIG_TRACER_MAX_TRACE
7741        free_trace_buffer(&tr->max_buffer);
7742#endif
7743}
7744
7745static void init_trace_flags_index(struct trace_array *tr)
7746{
7747        int i;
7748
7749        /* Used by the trace options files */
7750        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7751                tr->trace_flags_index[i] = i;
7752}
7753
7754static void __update_tracer_options(struct trace_array *tr)
7755{
7756        struct tracer *t;
7757
7758        for (t = trace_types; t; t = t->next)
7759                add_tracer_options(tr, t);
7760}
7761
7762static void update_tracer_options(struct trace_array *tr)
7763{
7764        mutex_lock(&trace_types_lock);
7765        __update_tracer_options(tr);
7766        mutex_unlock(&trace_types_lock);
7767}
7768
7769static int instance_mkdir(const char *name)
7770{
7771        struct trace_array *tr;
7772        int ret;
7773
7774        mutex_lock(&event_mutex);
7775        mutex_lock(&trace_types_lock);
7776
7777        ret = -EEXIST;
7778        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7779                if (tr->name && strcmp(tr->name, name) == 0)
7780                        goto out_unlock;
7781        }
7782
7783        ret = -ENOMEM;
7784        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7785        if (!tr)
7786                goto out_unlock;
7787
7788        tr->name = kstrdup(name, GFP_KERNEL);
7789        if (!tr->name)
7790                goto out_free_tr;
7791
7792        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7793                goto out_free_tr;
7794
7795        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7796
7797        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7798
7799        raw_spin_lock_init(&tr->start_lock);
7800
7801        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7802
7803        tr->current_trace = &nop_trace;
7804
7805        INIT_LIST_HEAD(&tr->systems);
7806        INIT_LIST_HEAD(&tr->events);
7807        INIT_LIST_HEAD(&tr->hist_vars);
7808
7809        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7810                goto out_free_tr;
7811
7812        tr->dir = tracefs_create_dir(name, trace_instance_dir);
7813        if (!tr->dir)
7814                goto out_free_tr;
7815
7816        ret = event_trace_add_tracer(tr->dir, tr);
7817        if (ret) {
7818                tracefs_remove_recursive(tr->dir);
7819                goto out_free_tr;
7820        }
7821
7822        ftrace_init_trace_array(tr);
7823
7824        init_tracer_tracefs(tr, tr->dir);
7825        init_trace_flags_index(tr);
7826        __update_tracer_options(tr);
7827
7828        list_add(&tr->list, &ftrace_trace_arrays);
7829
7830        mutex_unlock(&trace_types_lock);
7831        mutex_unlock(&event_mutex);
7832
7833        return 0;
7834
7835 out_free_tr:
7836        free_trace_buffers(tr);
7837        free_cpumask_var(tr->tracing_cpumask);
7838        kfree(tr->name);
7839        kfree(tr);
7840
7841 out_unlock:
7842        mutex_unlock(&trace_types_lock);
7843        mutex_unlock(&event_mutex);
7844
7845        return ret;
7846
7847}
7848
7849static int instance_rmdir(const char *name)
7850{
7851        struct trace_array *tr;
7852        int found = 0;
7853        int ret;
7854        int i;
7855
7856        mutex_lock(&event_mutex);
7857        mutex_lock(&trace_types_lock);
7858
7859        ret = -ENODEV;
7860        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7861                if (tr->name && strcmp(tr->name, name) == 0) {
7862                        found = 1;
7863                        break;
7864                }
7865        }
7866        if (!found)
7867                goto out_unlock;
7868
7869        ret = -EBUSY;
7870        if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7871                goto out_unlock;
7872
7873        list_del(&tr->list);
7874
7875        /* Disable all the flags that were enabled coming in */
7876        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7877                if ((1 << i) & ZEROED_TRACE_FLAGS)
7878                        set_tracer_flag(tr, 1 << i, 0);
7879        }
7880
7881        tracing_set_nop(tr);
7882        clear_ftrace_function_probes(tr);
7883        event_trace_del_tracer(tr);
7884        ftrace_clear_pids(tr);
7885        ftrace_destroy_function_files(tr);
7886        tracefs_remove_recursive(tr->dir);
7887        free_trace_buffers(tr);
7888
7889        for (i = 0; i < tr->nr_topts; i++) {
7890                kfree(tr->topts[i].topts);
7891        }
7892        kfree(tr->topts);
7893
7894        free_cpumask_var(tr->tracing_cpumask);
7895        kfree(tr->name);
7896        kfree(tr);
7897
7898        ret = 0;
7899
7900 out_unlock:
7901        mutex_unlock(&trace_types_lock);
7902        mutex_unlock(&event_mutex);
7903
7904        return ret;
7905}
7906
7907static __init void create_trace_instances(struct dentry *d_tracer)
7908{
7909        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7910                                                         instance_mkdir,
7911                                                         instance_rmdir);
7912        if (WARN_ON(!trace_instance_dir))
7913                return;
7914}
7915
7916static void
7917init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7918{
7919        struct trace_event_file *file;
7920        int cpu;
7921
7922        trace_create_file("available_tracers", 0444, d_tracer,
7923                        tr, &show_traces_fops);
7924
7925        trace_create_file("current_tracer", 0644, d_tracer,
7926                        tr, &set_tracer_fops);
7927
7928        trace_create_file("tracing_cpumask", 0644, d_tracer,
7929                          tr, &tracing_cpumask_fops);
7930
7931        trace_create_file("trace_options", 0644, d_tracer,
7932                          tr, &tracing_iter_fops);
7933
7934        trace_create_file("trace", 0644, d_tracer,
7935                          tr, &tracing_fops);
7936
7937        trace_create_file("trace_pipe", 0444, d_tracer,
7938                          tr, &tracing_pipe_fops);
7939
7940        trace_create_file("buffer_size_kb", 0644, d_tracer,
7941                          tr, &tracing_entries_fops);
7942
7943        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7944                          tr, &tracing_total_entries_fops);
7945
7946        trace_create_file("free_buffer", 0200, d_tracer,
7947                          tr, &tracing_free_buffer_fops);
7948
7949        trace_create_file("trace_marker", 0220, d_tracer,
7950                          tr, &tracing_mark_fops);
7951
7952        file = __find_event_file(tr, "ftrace", "print");
7953        if (file && file->dir)
7954                trace_create_file("trigger", 0644, file->dir, file,
7955                                  &event_trigger_fops);
7956        tr->trace_marker_file = file;
7957
7958        trace_create_file("trace_marker_raw", 0220, d_tracer,
7959                          tr, &tracing_mark_raw_fops);
7960
7961        trace_create_file("trace_clock", 0644, d_tracer, tr,
7962                          &trace_clock_fops);
7963
7964        trace_create_file("tracing_on", 0644, d_tracer,
7965                          tr, &rb_simple_fops);
7966
7967        trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7968                          &trace_time_stamp_mode_fops);
7969
7970        create_trace_options_dir(tr);
7971
7972#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7973        trace_create_file("tracing_max_latency", 0644, d_tracer,
7974                        &tr->max_latency, &tracing_max_lat_fops);
7975#endif
7976
7977        if (ftrace_create_function_files(tr, d_tracer))
7978                WARN(1, "Could not allocate function filter files");
7979
7980#ifdef CONFIG_TRACER_SNAPSHOT
7981        trace_create_file("snapshot", 0644, d_tracer,
7982                          tr, &snapshot_fops);
7983#endif
7984
7985        for_each_tracing_cpu(cpu)
7986                tracing_init_tracefs_percpu(tr, cpu);
7987
7988        ftrace_init_tracefs(tr, d_tracer);
7989}
7990
7991static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7992{
7993        struct vfsmount *mnt;
7994        struct file_system_type *type;
7995
7996        /*
7997         * To maintain backward compatibility for tools that mount
7998         * debugfs to get to the tracing facility, tracefs is automatically
7999         * mounted to the debugfs/tracing directory.
8000         */
8001        type = get_fs_type("tracefs");
8002        if (!type)
8003                return NULL;
8004        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8005        put_filesystem(type);
8006        if (IS_ERR(mnt))
8007                return NULL;
8008        mntget(mnt);
8009
8010        return mnt;
8011}
8012
8013/**
8014 * tracing_init_dentry - initialize top level trace array
8015 *
8016 * This is called when creating files or directories in the tracing
8017 * directory. It is called via fs_initcall() by any of the boot up code
8018 * and expects to return the dentry of the top level tracing directory.
8019 */
8020struct dentry *tracing_init_dentry(void)
8021{
8022        struct trace_array *tr = &global_trace;
8023
8024        /* The top level trace array uses  NULL as parent */
8025        if (tr->dir)
8026                return NULL;
8027
8028        if (WARN_ON(!tracefs_initialized()) ||
8029                (IS_ENABLED(CONFIG_DEBUG_FS) &&
8030                 WARN_ON(!debugfs_initialized())))
8031                return ERR_PTR(-ENODEV);
8032
8033        /*
8034         * As there may still be users that expect the tracing
8035         * files to exist in debugfs/tracing, we must automount
8036         * the tracefs file system there, so older tools still
8037         * work with the newer kerenl.
8038         */
8039        tr->dir = debugfs_create_automount("tracing", NULL,
8040                                           trace_automount, NULL);
8041        if (!tr->dir) {
8042                pr_warn_once("Could not create debugfs directory 'tracing'\n");
8043                return ERR_PTR(-ENOMEM);
8044        }
8045
8046        return NULL;
8047}
8048
8049extern struct trace_eval_map *__start_ftrace_eval_maps[];
8050extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8051
8052static void __init trace_eval_init(void)
8053{
8054        int len;
8055
8056        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8057        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8058}
8059
8060#ifdef CONFIG_MODULES
8061static void trace_module_add_evals(struct module *mod)
8062{
8063        if (!mod->num_trace_evals)
8064                return;
8065
8066        /*
8067         * Modules with bad taint do not have events created, do
8068         * not bother with enums either.
8069         */
8070        if (trace_module_has_bad_taint(mod))
8071                return;
8072
8073        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8074}
8075
8076#ifdef CONFIG_TRACE_EVAL_MAP_FILE
8077static void trace_module_remove_evals(struct module *mod)
8078{
8079        union trace_eval_map_item *map;
8080        union trace_eval_map_item **last = &trace_eval_maps;
8081
8082        if (!mod->num_trace_evals)
8083                return;
8084
8085        mutex_lock(&trace_eval_mutex);
8086
8087        map = trace_eval_maps;
8088
8089        while (map) {
8090                if (map->head.mod == mod)
8091                        break;
8092                map = trace_eval_jmp_to_tail(map);
8093                last = &map->tail.next;
8094                map = map->tail.next;
8095        }
8096        if (!map)
8097                goto out;
8098
8099        *last = trace_eval_jmp_to_tail(map)->tail.next;
8100        kfree(map);
8101 out:
8102        mutex_unlock(&trace_eval_mutex);
8103}
8104#else
8105static inline void trace_module_remove_evals(struct module *mod) { }
8106#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8107
8108static int trace_module_notify(struct notifier_block *self,
8109                               unsigned long val, void *data)
8110{
8111        struct module *mod = data;
8112
8113        switch (val) {
8114        case MODULE_STATE_COMING:
8115                trace_module_add_evals(mod);
8116                break;
8117        case MODULE_STATE_GOING:
8118                trace_module_remove_evals(mod);
8119                break;
8120        }
8121
8122        return 0;
8123}
8124
8125static struct notifier_block trace_module_nb = {
8126        .notifier_call = trace_module_notify,
8127        .priority = 0,
8128};
8129#endif /* CONFIG_MODULES */
8130
8131static __init int tracer_init_tracefs(void)
8132{
8133        struct dentry *d_tracer;
8134
8135        trace_access_lock_init();
8136
8137        d_tracer = tracing_init_dentry();
8138        if (IS_ERR(d_tracer))
8139                return 0;
8140
8141        event_trace_init();
8142
8143        init_tracer_tracefs(&global_trace, d_tracer);
8144        ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8145
8146        trace_create_file("tracing_thresh", 0644, d_tracer,
8147                        &global_trace, &tracing_thresh_fops);
8148
8149        trace_create_file("README", 0444, d_tracer,
8150                        NULL, &tracing_readme_fops);
8151
8152        trace_create_file("saved_cmdlines", 0444, d_tracer,
8153                        NULL, &tracing_saved_cmdlines_fops);
8154
8155        trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8156                          NULL, &tracing_saved_cmdlines_size_fops);
8157
8158        trace_create_file("saved_tgids", 0444, d_tracer,
8159                        NULL, &tracing_saved_tgids_fops);
8160
8161        trace_eval_init();
8162
8163        trace_create_eval_file(d_tracer);
8164
8165#ifdef CONFIG_MODULES
8166        register_module_notifier(&trace_module_nb);
8167#endif
8168
8169#ifdef CONFIG_DYNAMIC_FTRACE
8170        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8171                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8172#endif
8173
8174        create_trace_instances(d_tracer);
8175
8176        update_tracer_options(&global_trace);
8177
8178        return 0;
8179}
8180
8181static int trace_panic_handler(struct notifier_block *this,
8182                               unsigned long event, void *unused)
8183{
8184        if (ftrace_dump_on_oops)
8185                ftrace_dump(ftrace_dump_on_oops);
8186        return NOTIFY_OK;
8187}
8188
8189static struct notifier_block trace_panic_notifier = {
8190        .notifier_call  = trace_panic_handler,
8191        .next           = NULL,
8192        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8193};
8194
8195static int trace_die_handler(struct notifier_block *self,
8196                             unsigned long val,
8197                             void *data)
8198{
8199        switch (val) {
8200        case DIE_OOPS:
8201                if (ftrace_dump_on_oops)
8202                        ftrace_dump(ftrace_dump_on_oops);
8203                break;
8204        default:
8205                break;
8206        }
8207        return NOTIFY_OK;
8208}
8209
8210static struct notifier_block trace_die_notifier = {
8211        .notifier_call = trace_die_handler,
8212        .priority = 200
8213};
8214
8215/*
8216 * printk is set to max of 1024, we really don't need it that big.
8217 * Nothing should be printing 1000 characters anyway.
8218 */
8219#define TRACE_MAX_PRINT         1000
8220
8221/*
8222 * Define here KERN_TRACE so that we have one place to modify
8223 * it if we decide to change what log level the ftrace dump
8224 * should be at.
8225 */
8226#define KERN_TRACE              KERN_EMERG
8227
8228void
8229trace_printk_seq(struct trace_seq *s)
8230{
8231        /* Probably should print a warning here. */
8232        if (s->seq.len >= TRACE_MAX_PRINT)
8233                s->seq.len = TRACE_MAX_PRINT;
8234
8235        /*
8236         * More paranoid code. Although the buffer size is set to
8237         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8238         * an extra layer of protection.
8239         */
8240        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8241                s->seq.len = s->seq.size - 1;
8242
8243        /* should be zero ended, but we are paranoid. */
8244        s->buffer[s->seq.len] = 0;
8245
8246        printk(KERN_TRACE "%s", s->buffer);
8247
8248        trace_seq_init(s);
8249}
8250
8251void trace_init_global_iter(struct trace_iterator *iter)
8252{
8253        iter->tr = &global_trace;
8254        iter->trace = iter->tr->current_trace;
8255        iter->cpu_file = RING_BUFFER_ALL_CPUS;
8256        iter->trace_buffer = &global_trace.trace_buffer;
8257
8258        if (iter->trace && iter->trace->open)
8259                iter->trace->open(iter);
8260
8261        /* Annotate start of buffers if we had overruns */
8262        if (ring_buffer_overruns(iter->trace_buffer->buffer))
8263                iter->iter_flags |= TRACE_FILE_ANNOTATE;
8264
8265        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8266        if (trace_clocks[iter->tr->clock_id].in_ns)
8267                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8268}
8269
8270void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8271{
8272        /* use static because iter can be a bit big for the stack */
8273        static struct trace_iterator iter;
8274        static atomic_t dump_running;
8275        struct trace_array *tr = &global_trace;
8276        unsigned int old_userobj;
8277        unsigned long flags;
8278        int cnt = 0, cpu;
8279
8280        /* Only allow one dump user at a time. */
8281        if (atomic_inc_return(&dump_running) != 1) {
8282                atomic_dec(&dump_running);
8283                return;
8284        }
8285
8286        /*
8287         * Always turn off tracing when we dump.
8288         * We don't need to show trace output of what happens
8289         * between multiple crashes.
8290         *
8291         * If the user does a sysrq-z, then they can re-enable
8292         * tracing with echo 1 > tracing_on.
8293         */
8294        tracing_off();
8295
8296        local_irq_save(flags);
8297
8298        /* Simulate the iterator */
8299        trace_init_global_iter(&iter);
8300
8301        for_each_tracing_cpu(cpu) {
8302                atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8303        }
8304
8305        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8306
8307        /* don't look at user memory in panic mode */
8308        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8309
8310        switch (oops_dump_mode) {
8311        case DUMP_ALL:
8312                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8313                break;
8314        case DUMP_ORIG:
8315                iter.cpu_file = raw_smp_processor_id();
8316                break;
8317        case DUMP_NONE:
8318                goto out_enable;
8319        default:
8320                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8321                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8322        }
8323
8324        printk(KERN_TRACE "Dumping ftrace buffer:\n");
8325
8326        /* Did function tracer already get disabled? */
8327        if (ftrace_is_dead()) {
8328                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8329                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8330        }
8331
8332        /*
8333         * We need to stop all tracing on all CPUS to read the
8334         * the next buffer. This is a bit expensive, but is
8335         * not done often. We fill all what we can read,
8336         * and then release the locks again.
8337         */
8338
8339        while (!trace_empty(&iter)) {
8340
8341                if (!cnt)
8342                        printk(KERN_TRACE "---------------------------------\n");
8343
8344                cnt++;
8345
8346                /* reset all but tr, trace, and overruns */
8347                memset(&iter.seq, 0,
8348                       sizeof(struct trace_iterator) -
8349                       offsetof(struct trace_iterator, seq));
8350                iter.iter_flags |= TRACE_FILE_LAT_FMT;
8351                iter.pos = -1;
8352
8353                if (trace_find_next_entry_inc(&iter) != NULL) {
8354                        int ret;
8355
8356                        ret = print_trace_line(&iter);
8357                        if (ret != TRACE_TYPE_NO_CONSUME)
8358                                trace_consume(&iter);
8359                }
8360                touch_nmi_watchdog();
8361
8362                trace_printk_seq(&iter.seq);
8363        }
8364
8365        if (!cnt)
8366                printk(KERN_TRACE "   (ftrace buffer empty)\n");
8367        else
8368                printk(KERN_TRACE "---------------------------------\n");
8369
8370 out_enable:
8371        tr->trace_flags |= old_userobj;
8372
8373        for_each_tracing_cpu(cpu) {
8374                atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8375        }
8376        atomic_dec(&dump_running);
8377        local_irq_restore(flags);
8378}
8379EXPORT_SYMBOL_GPL(ftrace_dump);
8380
8381int trace_run_command(const char *buf, int (*createfn)(int, char **))
8382{
8383        char **argv;
8384        int argc, ret;
8385
8386        argc = 0;
8387        ret = 0;
8388        argv = argv_split(GFP_KERNEL, buf, &argc);
8389        if (!argv)
8390                return -ENOMEM;
8391
8392        if (argc)
8393                ret = createfn(argc, argv);
8394
8395        argv_free(argv);
8396
8397        return ret;
8398}
8399
8400#define WRITE_BUFSIZE  4096
8401
8402ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8403                                size_t count, loff_t *ppos,
8404                                int (*createfn)(int, char **))
8405{
8406        char *kbuf, *buf, *tmp;
8407        int ret = 0;
8408        size_t done = 0;
8409        size_t size;
8410
8411        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8412        if (!kbuf)
8413                return -ENOMEM;
8414
8415        while (done < count) {
8416                size = count - done;
8417
8418                if (size >= WRITE_BUFSIZE)
8419                        size = WRITE_BUFSIZE - 1;
8420
8421                if (copy_from_user(kbuf, buffer + done, size)) {
8422                        ret = -EFAULT;
8423                        goto out;
8424                }
8425                kbuf[size] = '\0';
8426                buf = kbuf;
8427                do {
8428                        tmp = strchr(buf, '\n');
8429                        if (tmp) {
8430                                *tmp = '\0';
8431                                size = tmp - buf + 1;
8432                        } else {
8433                                size = strlen(buf);
8434                                if (done + size < count) {
8435                                        if (buf != kbuf)
8436                                                break;
8437                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8438                                        pr_warn("Line length is too long: Should be less than %d\n",
8439                                                WRITE_BUFSIZE - 2);
8440                                        ret = -EINVAL;
8441                                        goto out;
8442                                }
8443                        }
8444                        done += size;
8445
8446                        /* Remove comments */
8447                        tmp = strchr(buf, '#');
8448
8449                        if (tmp)
8450                                *tmp = '\0';
8451
8452                        ret = trace_run_command(buf, createfn);
8453                        if (ret)
8454                                goto out;
8455                        buf += size;
8456
8457                } while (done < count);
8458        }
8459        ret = done;
8460
8461out:
8462        kfree(kbuf);
8463
8464        return ret;
8465}
8466
8467__init static int tracer_alloc_buffers(void)
8468{
8469        int ring_buf_size;
8470        int ret = -ENOMEM;
8471
8472        /*
8473         * Make sure we don't accidently add more trace options
8474         * than we have bits for.
8475         */
8476        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8477
8478        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8479                goto out;
8480
8481        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8482                goto out_free_buffer_mask;
8483
8484        /* Only allocate trace_printk buffers if a trace_printk exists */
8485        if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8486                /* Must be called before global_trace.buffer is allocated */
8487                trace_printk_init_buffers();
8488
8489        /* To save memory, keep the ring buffer size to its minimum */
8490        if (ring_buffer_expanded)
8491                ring_buf_size = trace_buf_size;
8492        else
8493                ring_buf_size = 1;
8494
8495        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8496        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8497
8498        raw_spin_lock_init(&global_trace.start_lock);
8499
8500        /*
8501         * The prepare callbacks allocates some memory for the ring buffer. We
8502         * don't free the buffer if the if the CPU goes down. If we were to free
8503         * the buffer, then the user would lose any trace that was in the
8504         * buffer. The memory will be removed once the "instance" is removed.
8505         */
8506        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8507                                      "trace/RB:preapre", trace_rb_cpu_prepare,
8508                                      NULL);
8509        if (ret < 0)
8510                goto out_free_cpumask;
8511        /* Used for event triggers */
8512        ret = -ENOMEM;
8513        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8514        if (!temp_buffer)
8515                goto out_rm_hp_state;
8516
8517        if (trace_create_savedcmd() < 0)
8518                goto out_free_temp_buffer;
8519
8520        /* TODO: make the number of buffers hot pluggable with CPUS */
8521        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8522                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8523                WARN_ON(1);
8524                goto out_free_savedcmd;
8525        }
8526
8527        if (global_trace.buffer_disabled)
8528                tracing_off();
8529
8530        if (trace_boot_clock) {
8531                ret = tracing_set_clock(&global_trace, trace_boot_clock);
8532                if (ret < 0)
8533                        pr_warn("Trace clock %s not defined, going back to default\n",
8534                                trace_boot_clock);
8535        }
8536
8537        /*
8538         * register_tracer() might reference current_trace, so it
8539         * needs to be set before we register anything. This is
8540         * just a bootstrap of current_trace anyway.
8541         */
8542        global_trace.current_trace = &nop_trace;
8543
8544        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8545
8546        ftrace_init_global_array_ops(&global_trace);
8547
8548        init_trace_flags_index(&global_trace);
8549
8550        register_tracer(&nop_trace);
8551
8552        /* Function tracing may start here (via kernel command line) */
8553        init_function_trace();
8554
8555        /* All seems OK, enable tracing */
8556        tracing_disabled = 0;
8557
8558        atomic_notifier_chain_register(&panic_notifier_list,
8559                                       &trace_panic_notifier);
8560
8561        register_die_notifier(&trace_die_notifier);
8562
8563        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8564
8565        INIT_LIST_HEAD(&global_trace.systems);
8566        INIT_LIST_HEAD(&global_trace.events);
8567        INIT_LIST_HEAD(&global_trace.hist_vars);
8568        list_add(&global_trace.list, &ftrace_trace_arrays);
8569
8570        apply_trace_boot_options();
8571
8572        register_snapshot_cmd();
8573
8574        return 0;
8575
8576out_free_savedcmd:
8577        free_saved_cmdlines_buffer(savedcmd);
8578out_free_temp_buffer:
8579        ring_buffer_free(temp_buffer);
8580out_rm_hp_state:
8581        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8582out_free_cpumask:
8583        free_cpumask_var(global_trace.tracing_cpumask);
8584out_free_buffer_mask:
8585        free_cpumask_var(tracing_buffer_mask);
8586out:
8587        return ret;
8588}
8589
8590void __init early_trace_init(void)
8591{
8592        if (tracepoint_printk) {
8593                tracepoint_print_iter =
8594                        kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8595                if (WARN_ON(!tracepoint_print_iter))
8596                        tracepoint_printk = 0;
8597                else
8598                        static_key_enable(&tracepoint_printk_key.key);
8599        }
8600        tracer_alloc_buffers();
8601}
8602
8603void __init trace_init(void)
8604{
8605        trace_event_init();
8606}
8607
8608__init static int clear_boot_tracer(void)
8609{
8610        /*
8611         * The default tracer at boot buffer is an init section.
8612         * This function is called in lateinit. If we did not
8613         * find the boot tracer, then clear it out, to prevent
8614         * later registration from accessing the buffer that is
8615         * about to be freed.
8616         */
8617        if (!default_bootup_tracer)
8618                return 0;
8619
8620        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8621               default_bootup_tracer);
8622        default_bootup_tracer = NULL;
8623
8624        return 0;
8625}
8626
8627fs_initcall(tracer_init_tracefs);
8628late_initcall_sync(clear_boot_tracer);
8629
8630#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8631__init static int tracing_set_default_clock(void)
8632{
8633        /* sched_clock_stable() is determined in late_initcall */
8634        if (!trace_boot_clock && !sched_clock_stable()) {
8635                printk(KERN_WARNING
8636                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8637                       "If you want to keep using the local clock, then add:\n"
8638                       "  \"trace_clock=local\"\n"
8639                       "on the kernel command line\n");
8640                tracing_set_clock(&global_trace, "global");
8641        }
8642
8643        return 0;
8644}
8645late_initcall_sync(tracing_set_default_clock);
8646#endif
8647