linux/kernel/trace/trace.c
<<
>>
Prefs
   1/*
   2 * ring buffer based function tracer
   3 *
   4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   6 *
   7 * Originally taken from the RT patch by:
   8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
   9 *
  10 * Based on code from the latency_tracer, that is:
  11 *  Copyright (C) 2004-2006 Ingo Molnar
  12 *  Copyright (C) 2004 Nadia Yvette Chambers
  13 */
  14#include <linux/ring_buffer.h>
  15#include <generated/utsrelease.h>
  16#include <linux/stacktrace.h>
  17#include <linux/writeback.h>
  18#include <linux/kallsyms.h>
  19#include <linux/seq_file.h>
  20#include <linux/notifier.h>
  21#include <linux/irqflags.h>
  22#include <linux/debugfs.h>
  23#include <linux/tracefs.h>
  24#include <linux/pagemap.h>
  25#include <linux/hardirq.h>
  26#include <linux/linkage.h>
  27#include <linux/uaccess.h>
  28#include <linux/vmalloc.h>
  29#include <linux/ftrace.h>
  30#include <linux/module.h>
  31#include <linux/percpu.h>
  32#include <linux/splice.h>
  33#include <linux/kdebug.h>
  34#include <linux/string.h>
  35#include <linux/mount.h>
  36#include <linux/rwsem.h>
  37#include <linux/slab.h>
  38#include <linux/ctype.h>
  39#include <linux/init.h>
  40#include <linux/poll.h>
  41#include <linux/nmi.h>
  42#include <linux/fs.h>
  43#include <linux/trace.h>
  44#include <linux/sched/rt.h>
  45
  46#include "trace.h"
  47#include "trace_output.h"
  48
  49/*
  50 * On boot up, the ring buffer is set to the minimum size, so that
  51 * we do not waste memory on systems that are not using tracing.
  52 */
  53bool ring_buffer_expanded;
  54
  55/*
  56 * We need to change this state when a selftest is running.
  57 * A selftest will lurk into the ring-buffer to count the
  58 * entries inserted during the selftest although some concurrent
  59 * insertions into the ring-buffer such as trace_printk could occurred
  60 * at the same time, giving false positive or negative results.
  61 */
  62static bool __read_mostly tracing_selftest_running;
  63
  64/*
  65 * If a tracer is running, we do not want to run SELFTEST.
  66 */
  67bool __read_mostly tracing_selftest_disabled;
  68
  69/* Pipe tracepoints to printk */
  70struct trace_iterator *tracepoint_print_iter;
  71int tracepoint_printk;
  72static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  73
  74/* For tracers that don't implement custom flags */
  75static struct tracer_opt dummy_tracer_opt[] = {
  76        { }
  77};
  78
  79static int
  80dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  81{
  82        return 0;
  83}
  84
  85/*
  86 * To prevent the comm cache from being overwritten when no
  87 * tracing is active, only save the comm when a trace event
  88 * occurred.
  89 */
  90static DEFINE_PER_CPU(bool, trace_taskinfo_save);
  91
  92/*
  93 * Kill all tracing for good (never come back).
  94 * It is initialized to 1 but will turn to zero if the initialization
  95 * of the tracer is successful. But that is the only place that sets
  96 * this back to zero.
  97 */
  98static int tracing_disabled = 1;
  99
 100cpumask_var_t __read_mostly     tracing_buffer_mask;
 101
 102/*
 103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 104 *
 105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 106 * is set, then ftrace_dump is called. This will output the contents
 107 * of the ftrace buffers to the console.  This is very useful for
 108 * capturing traces that lead to crashes and outputing it to a
 109 * serial console.
 110 *
 111 * It is default off, but you can enable it with either specifying
 112 * "ftrace_dump_on_oops" in the kernel command line, or setting
 113 * /proc/sys/kernel/ftrace_dump_on_oops
 114 * Set 1 if you want to dump buffers of all CPUs
 115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 116 */
 117
 118enum ftrace_dump_mode ftrace_dump_on_oops;
 119
 120/* When set, tracing will stop when a WARN*() is hit */
 121int __disable_trace_on_warning;
 122
 123#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 124/* Map of enums to their values, for "eval_map" file */
 125struct trace_eval_map_head {
 126        struct module                   *mod;
 127        unsigned long                   length;
 128};
 129
 130union trace_eval_map_item;
 131
 132struct trace_eval_map_tail {
 133        /*
 134         * "end" is first and points to NULL as it must be different
 135         * than "mod" or "eval_string"
 136         */
 137        union trace_eval_map_item       *next;
 138        const char                      *end;   /* points to NULL */
 139};
 140
 141static DEFINE_MUTEX(trace_eval_mutex);
 142
 143/*
 144 * The trace_eval_maps are saved in an array with two extra elements,
 145 * one at the beginning, and one at the end. The beginning item contains
 146 * the count of the saved maps (head.length), and the module they
 147 * belong to if not built in (head.mod). The ending item contains a
 148 * pointer to the next array of saved eval_map items.
 149 */
 150union trace_eval_map_item {
 151        struct trace_eval_map           map;
 152        struct trace_eval_map_head      head;
 153        struct trace_eval_map_tail      tail;
 154};
 155
 156static union trace_eval_map_item *trace_eval_maps;
 157#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 158
 159static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 160
 161#define MAX_TRACER_SIZE         100
 162static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 163static char *default_bootup_tracer;
 164
 165static bool allocate_snapshot;
 166
 167static int __init set_cmdline_ftrace(char *str)
 168{
 169        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 170        default_bootup_tracer = bootup_tracer_buf;
 171        /* We are using ftrace early, expand it */
 172        ring_buffer_expanded = true;
 173        return 1;
 174}
 175__setup("ftrace=", set_cmdline_ftrace);
 176
 177static int __init set_ftrace_dump_on_oops(char *str)
 178{
 179        if (*str++ != '=' || !*str) {
 180                ftrace_dump_on_oops = DUMP_ALL;
 181                return 1;
 182        }
 183
 184        if (!strcmp("orig_cpu", str)) {
 185                ftrace_dump_on_oops = DUMP_ORIG;
 186                return 1;
 187        }
 188
 189        return 0;
 190}
 191__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 192
 193static int __init stop_trace_on_warning(char *str)
 194{
 195        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 196                __disable_trace_on_warning = 1;
 197        return 1;
 198}
 199__setup("traceoff_on_warning", stop_trace_on_warning);
 200
 201static int __init boot_alloc_snapshot(char *str)
 202{
 203        allocate_snapshot = true;
 204        /* We also need the main ring buffer expanded */
 205        ring_buffer_expanded = true;
 206        return 1;
 207}
 208__setup("alloc_snapshot", boot_alloc_snapshot);
 209
 210
 211static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 212
 213static int __init set_trace_boot_options(char *str)
 214{
 215        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 216        return 0;
 217}
 218__setup("trace_options=", set_trace_boot_options);
 219
 220static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 221static char *trace_boot_clock __initdata;
 222
 223static int __init set_trace_boot_clock(char *str)
 224{
 225        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 226        trace_boot_clock = trace_boot_clock_buf;
 227        return 0;
 228}
 229__setup("trace_clock=", set_trace_boot_clock);
 230
 231static int __init set_tracepoint_printk(char *str)
 232{
 233        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 234                tracepoint_printk = 1;
 235        return 1;
 236}
 237__setup("tp_printk", set_tracepoint_printk);
 238
 239unsigned long long ns2usecs(u64 nsec)
 240{
 241        nsec += 500;
 242        do_div(nsec, 1000);
 243        return nsec;
 244}
 245
 246/* trace_flags holds trace_options default values */
 247#define TRACE_DEFAULT_FLAGS                                             \
 248        (FUNCTION_DEFAULT_FLAGS |                                       \
 249         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 250         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 251         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 252         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
 253
 254/* trace_options that are only supported by global_trace */
 255#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 256               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 257
 258/* trace_flags that are default zero for instances */
 259#define ZEROED_TRACE_FLAGS \
 260        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 261
 262/*
 263 * The global_trace is the descriptor that holds the top-level tracing
 264 * buffers for the live tracing.
 265 */
 266static struct trace_array global_trace = {
 267        .trace_flags = TRACE_DEFAULT_FLAGS,
 268};
 269
 270LIST_HEAD(ftrace_trace_arrays);
 271
 272int trace_array_get(struct trace_array *this_tr)
 273{
 274        struct trace_array *tr;
 275        int ret = -ENODEV;
 276
 277        mutex_lock(&trace_types_lock);
 278        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 279                if (tr == this_tr) {
 280                        tr->ref++;
 281                        ret = 0;
 282                        break;
 283                }
 284        }
 285        mutex_unlock(&trace_types_lock);
 286
 287        return ret;
 288}
 289
 290static void __trace_array_put(struct trace_array *this_tr)
 291{
 292        WARN_ON(!this_tr->ref);
 293        this_tr->ref--;
 294}
 295
 296void trace_array_put(struct trace_array *this_tr)
 297{
 298        mutex_lock(&trace_types_lock);
 299        __trace_array_put(this_tr);
 300        mutex_unlock(&trace_types_lock);
 301}
 302
 303int call_filter_check_discard(struct trace_event_call *call, void *rec,
 304                              struct ring_buffer *buffer,
 305                              struct ring_buffer_event *event)
 306{
 307        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 308            !filter_match_preds(call->filter, rec)) {
 309                __trace_event_discard_commit(buffer, event);
 310                return 1;
 311        }
 312
 313        return 0;
 314}
 315
 316void trace_free_pid_list(struct trace_pid_list *pid_list)
 317{
 318        vfree(pid_list->pids);
 319        kfree(pid_list);
 320}
 321
 322/**
 323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 324 * @filtered_pids: The list of pids to check
 325 * @search_pid: The PID to find in @filtered_pids
 326 *
 327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
 328 */
 329bool
 330trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 331{
 332        /*
 333         * If pid_max changed after filtered_pids was created, we
 334         * by default ignore all pids greater than the previous pid_max.
 335         */
 336        if (search_pid >= filtered_pids->pid_max)
 337                return false;
 338
 339        return test_bit(search_pid, filtered_pids->pids);
 340}
 341
 342/**
 343 * trace_ignore_this_task - should a task be ignored for tracing
 344 * @filtered_pids: The list of pids to check
 345 * @task: The task that should be ignored if not filtered
 346 *
 347 * Checks if @task should be traced or not from @filtered_pids.
 348 * Returns true if @task should *NOT* be traced.
 349 * Returns false if @task should be traced.
 350 */
 351bool
 352trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 353{
 354        /*
 355         * Return false, because if filtered_pids does not exist,
 356         * all pids are good to trace.
 357         */
 358        if (!filtered_pids)
 359                return false;
 360
 361        return !trace_find_filtered_pid(filtered_pids, task->pid);
 362}
 363
 364/**
 365 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
 366 * @pid_list: The list to modify
 367 * @self: The current task for fork or NULL for exit
 368 * @task: The task to add or remove
 369 *
 370 * If adding a task, if @self is defined, the task is only added if @self
 371 * is also included in @pid_list. This happens on fork and tasks should
 372 * only be added when the parent is listed. If @self is NULL, then the
 373 * @task pid will be removed from the list, which would happen on exit
 374 * of a task.
 375 */
 376void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 377                                  struct task_struct *self,
 378                                  struct task_struct *task)
 379{
 380        if (!pid_list)
 381                return;
 382
 383        /* For forks, we only add if the forking task is listed */
 384        if (self) {
 385                if (!trace_find_filtered_pid(pid_list, self->pid))
 386                        return;
 387        }
 388
 389        /* Sorry, but we don't support pid_max changing after setting */
 390        if (task->pid >= pid_list->pid_max)
 391                return;
 392
 393        /* "self" is set for forks, and NULL for exits */
 394        if (self)
 395                set_bit(task->pid, pid_list->pids);
 396        else
 397                clear_bit(task->pid, pid_list->pids);
 398}
 399
 400/**
 401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 402 * @pid_list: The pid list to show
 403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 404 * @pos: The position of the file
 405 *
 406 * This is used by the seq_file "next" operation to iterate the pids
 407 * listed in a trace_pid_list structure.
 408 *
 409 * Returns the pid+1 as we want to display pid of zero, but NULL would
 410 * stop the iteration.
 411 */
 412void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 413{
 414        unsigned long pid = (unsigned long)v;
 415
 416        (*pos)++;
 417
 418        /* pid already is +1 of the actual prevous bit */
 419        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 420
 421        /* Return pid + 1 to allow zero to be represented */
 422        if (pid < pid_list->pid_max)
 423                return (void *)(pid + 1);
 424
 425        return NULL;
 426}
 427
 428/**
 429 * trace_pid_start - Used for seq_file to start reading pid lists
 430 * @pid_list: The pid list to show
 431 * @pos: The position of the file
 432 *
 433 * This is used by seq_file "start" operation to start the iteration
 434 * of listing pids.
 435 *
 436 * Returns the pid+1 as we want to display pid of zero, but NULL would
 437 * stop the iteration.
 438 */
 439void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 440{
 441        unsigned long pid;
 442        loff_t l = 0;
 443
 444        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 445        if (pid >= pid_list->pid_max)
 446                return NULL;
 447
 448        /* Return pid + 1 so that zero can be the exit value */
 449        for (pid++; pid && l < *pos;
 450             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 451                ;
 452        return (void *)pid;
 453}
 454
 455/**
 456 * trace_pid_show - show the current pid in seq_file processing
 457 * @m: The seq_file structure to write into
 458 * @v: A void pointer of the pid (+1) value to display
 459 *
 460 * Can be directly used by seq_file operations to display the current
 461 * pid value.
 462 */
 463int trace_pid_show(struct seq_file *m, void *v)
 464{
 465        unsigned long pid = (unsigned long)v - 1;
 466
 467        seq_printf(m, "%lu\n", pid);
 468        return 0;
 469}
 470
 471/* 128 should be much more than enough */
 472#define PID_BUF_SIZE            127
 473
 474int trace_pid_write(struct trace_pid_list *filtered_pids,
 475                    struct trace_pid_list **new_pid_list,
 476                    const char __user *ubuf, size_t cnt)
 477{
 478        struct trace_pid_list *pid_list;
 479        struct trace_parser parser;
 480        unsigned long val;
 481        int nr_pids = 0;
 482        ssize_t read = 0;
 483        ssize_t ret = 0;
 484        loff_t pos;
 485        pid_t pid;
 486
 487        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 488                return -ENOMEM;
 489
 490        /*
 491         * Always recreate a new array. The write is an all or nothing
 492         * operation. Always create a new array when adding new pids by
 493         * the user. If the operation fails, then the current list is
 494         * not modified.
 495         */
 496        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 497        if (!pid_list)
 498                return -ENOMEM;
 499
 500        pid_list->pid_max = READ_ONCE(pid_max);
 501
 502        /* Only truncating will shrink pid_max */
 503        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 504                pid_list->pid_max = filtered_pids->pid_max;
 505
 506        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 507        if (!pid_list->pids) {
 508                kfree(pid_list);
 509                return -ENOMEM;
 510        }
 511
 512        if (filtered_pids) {
 513                /* copy the current bits to the new max */
 514                for_each_set_bit(pid, filtered_pids->pids,
 515                                 filtered_pids->pid_max) {
 516                        set_bit(pid, pid_list->pids);
 517                        nr_pids++;
 518                }
 519        }
 520
 521        while (cnt > 0) {
 522
 523                pos = 0;
 524
 525                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 526                if (ret < 0 || !trace_parser_loaded(&parser))
 527                        break;
 528
 529                read += ret;
 530                ubuf += ret;
 531                cnt -= ret;
 532
 533                parser.buffer[parser.idx] = 0;
 534
 535                ret = -EINVAL;
 536                if (kstrtoul(parser.buffer, 0, &val))
 537                        break;
 538                if (val >= pid_list->pid_max)
 539                        break;
 540
 541                pid = (pid_t)val;
 542
 543                set_bit(pid, pid_list->pids);
 544                nr_pids++;
 545
 546                trace_parser_clear(&parser);
 547                ret = 0;
 548        }
 549        trace_parser_put(&parser);
 550
 551        if (ret < 0) {
 552                trace_free_pid_list(pid_list);
 553                return ret;
 554        }
 555
 556        if (!nr_pids) {
 557                /* Cleared the list of pids */
 558                trace_free_pid_list(pid_list);
 559                read = ret;
 560                pid_list = NULL;
 561        }
 562
 563        *new_pid_list = pid_list;
 564
 565        return read;
 566}
 567
 568static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 569{
 570        u64 ts;
 571
 572        /* Early boot up does not have a buffer yet */
 573        if (!buf->buffer)
 574                return trace_clock_local();
 575
 576        ts = ring_buffer_time_stamp(buf->buffer, cpu);
 577        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 578
 579        return ts;
 580}
 581
 582u64 ftrace_now(int cpu)
 583{
 584        return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
 585}
 586
 587/**
 588 * tracing_is_enabled - Show if global_trace has been disabled
 589 *
 590 * Shows if the global trace has been enabled or not. It uses the
 591 * mirror flag "buffer_disabled" to be used in fast paths such as for
 592 * the irqsoff tracer. But it may be inaccurate due to races. If you
 593 * need to know the accurate state, use tracing_is_on() which is a little
 594 * slower, but accurate.
 595 */
 596int tracing_is_enabled(void)
 597{
 598        /*
 599         * For quick access (irqsoff uses this in fast path), just
 600         * return the mirror variable of the state of the ring buffer.
 601         * It's a little racy, but we don't really care.
 602         */
 603        smp_rmb();
 604        return !global_trace.buffer_disabled;
 605}
 606
 607/*
 608 * trace_buf_size is the size in bytes that is allocated
 609 * for a buffer. Note, the number of bytes is always rounded
 610 * to page size.
 611 *
 612 * This number is purposely set to a low number of 16384.
 613 * If the dump on oops happens, it will be much appreciated
 614 * to not have to wait for all that output. Anyway this can be
 615 * boot time and run time configurable.
 616 */
 617#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 618
 619static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 620
 621/* trace_types holds a link list of available tracers. */
 622static struct tracer            *trace_types __read_mostly;
 623
 624/*
 625 * trace_types_lock is used to protect the trace_types list.
 626 */
 627DEFINE_MUTEX(trace_types_lock);
 628
 629/*
 630 * serialize the access of the ring buffer
 631 *
 632 * ring buffer serializes readers, but it is low level protection.
 633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 634 * are not protected by ring buffer.
 635 *
 636 * The content of events may become garbage if we allow other process consumes
 637 * these events concurrently:
 638 *   A) the page of the consumed events may become a normal page
 639 *      (not reader page) in ring buffer, and this page will be rewrited
 640 *      by events producer.
 641 *   B) The page of the consumed events may become a page for splice_read,
 642 *      and this page will be returned to system.
 643 *
 644 * These primitives allow multi process access to different cpu ring buffer
 645 * concurrently.
 646 *
 647 * These primitives don't distinguish read-only and read-consume access.
 648 * Multi read-only access are also serialized.
 649 */
 650
 651#ifdef CONFIG_SMP
 652static DECLARE_RWSEM(all_cpu_access_lock);
 653static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 654
 655static inline void trace_access_lock(int cpu)
 656{
 657        if (cpu == RING_BUFFER_ALL_CPUS) {
 658                /* gain it for accessing the whole ring buffer. */
 659                down_write(&all_cpu_access_lock);
 660        } else {
 661                /* gain it for accessing a cpu ring buffer. */
 662
 663                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 664                down_read(&all_cpu_access_lock);
 665
 666                /* Secondly block other access to this @cpu ring buffer. */
 667                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 668        }
 669}
 670
 671static inline void trace_access_unlock(int cpu)
 672{
 673        if (cpu == RING_BUFFER_ALL_CPUS) {
 674                up_write(&all_cpu_access_lock);
 675        } else {
 676                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 677                up_read(&all_cpu_access_lock);
 678        }
 679}
 680
 681static inline void trace_access_lock_init(void)
 682{
 683        int cpu;
 684
 685        for_each_possible_cpu(cpu)
 686                mutex_init(&per_cpu(cpu_access_lock, cpu));
 687}
 688
 689#else
 690
 691static DEFINE_MUTEX(access_lock);
 692
 693static inline void trace_access_lock(int cpu)
 694{
 695        (void)cpu;
 696        mutex_lock(&access_lock);
 697}
 698
 699static inline void trace_access_unlock(int cpu)
 700{
 701        (void)cpu;
 702        mutex_unlock(&access_lock);
 703}
 704
 705static inline void trace_access_lock_init(void)
 706{
 707}
 708
 709#endif
 710
 711#ifdef CONFIG_STACKTRACE
 712static void __ftrace_trace_stack(struct ring_buffer *buffer,
 713                                 unsigned long flags,
 714                                 int skip, int pc, struct pt_regs *regs);
 715static inline void ftrace_trace_stack(struct trace_array *tr,
 716                                      struct ring_buffer *buffer,
 717                                      unsigned long flags,
 718                                      int skip, int pc, struct pt_regs *regs);
 719
 720#else
 721static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
 722                                        unsigned long flags,
 723                                        int skip, int pc, struct pt_regs *regs)
 724{
 725}
 726static inline void ftrace_trace_stack(struct trace_array *tr,
 727                                      struct ring_buffer *buffer,
 728                                      unsigned long flags,
 729                                      int skip, int pc, struct pt_regs *regs)
 730{
 731}
 732
 733#endif
 734
 735static __always_inline void
 736trace_event_setup(struct ring_buffer_event *event,
 737                  int type, unsigned long flags, int pc)
 738{
 739        struct trace_entry *ent = ring_buffer_event_data(event);
 740
 741        tracing_generic_entry_update(ent, flags, pc);
 742        ent->type = type;
 743}
 744
 745static __always_inline struct ring_buffer_event *
 746__trace_buffer_lock_reserve(struct ring_buffer *buffer,
 747                          int type,
 748                          unsigned long len,
 749                          unsigned long flags, int pc)
 750{
 751        struct ring_buffer_event *event;
 752
 753        event = ring_buffer_lock_reserve(buffer, len);
 754        if (event != NULL)
 755                trace_event_setup(event, type, flags, pc);
 756
 757        return event;
 758}
 759
 760void tracer_tracing_on(struct trace_array *tr)
 761{
 762        if (tr->trace_buffer.buffer)
 763                ring_buffer_record_on(tr->trace_buffer.buffer);
 764        /*
 765         * This flag is looked at when buffers haven't been allocated
 766         * yet, or by some tracers (like irqsoff), that just want to
 767         * know if the ring buffer has been disabled, but it can handle
 768         * races of where it gets disabled but we still do a record.
 769         * As the check is in the fast path of the tracers, it is more
 770         * important to be fast than accurate.
 771         */
 772        tr->buffer_disabled = 0;
 773        /* Make the flag seen by readers */
 774        smp_wmb();
 775}
 776
 777/**
 778 * tracing_on - enable tracing buffers
 779 *
 780 * This function enables tracing buffers that may have been
 781 * disabled with tracing_off.
 782 */
 783void tracing_on(void)
 784{
 785        tracer_tracing_on(&global_trace);
 786}
 787EXPORT_SYMBOL_GPL(tracing_on);
 788
 789
 790static __always_inline void
 791__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 792{
 793        __this_cpu_write(trace_taskinfo_save, true);
 794
 795        /* If this is the temp buffer, we need to commit fully */
 796        if (this_cpu_read(trace_buffered_event) == event) {
 797                /* Length is in event->array[0] */
 798                ring_buffer_write(buffer, event->array[0], &event->array[1]);
 799                /* Release the temp buffer */
 800                this_cpu_dec(trace_buffered_event_cnt);
 801        } else
 802                ring_buffer_unlock_commit(buffer, event);
 803}
 804
 805/**
 806 * __trace_puts - write a constant string into the trace buffer.
 807 * @ip:    The address of the caller
 808 * @str:   The constant string to write
 809 * @size:  The size of the string.
 810 */
 811int __trace_puts(unsigned long ip, const char *str, int size)
 812{
 813        struct ring_buffer_event *event;
 814        struct ring_buffer *buffer;
 815        struct print_entry *entry;
 816        unsigned long irq_flags;
 817        int alloc;
 818        int pc;
 819
 820        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 821                return 0;
 822
 823        pc = preempt_count();
 824
 825        if (unlikely(tracing_selftest_running || tracing_disabled))
 826                return 0;
 827
 828        alloc = sizeof(*entry) + size + 2; /* possible \n added */
 829
 830        local_save_flags(irq_flags);
 831        buffer = global_trace.trace_buffer.buffer;
 832        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
 833                                            irq_flags, pc);
 834        if (!event)
 835                return 0;
 836
 837        entry = ring_buffer_event_data(event);
 838        entry->ip = ip;
 839
 840        memcpy(&entry->buf, str, size);
 841
 842        /* Add a newline if necessary */
 843        if (entry->buf[size - 1] != '\n') {
 844                entry->buf[size] = '\n';
 845                entry->buf[size + 1] = '\0';
 846        } else
 847                entry->buf[size] = '\0';
 848
 849        __buffer_unlock_commit(buffer, event);
 850        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 851
 852        return size;
 853}
 854EXPORT_SYMBOL_GPL(__trace_puts);
 855
 856/**
 857 * __trace_bputs - write the pointer to a constant string into trace buffer
 858 * @ip:    The address of the caller
 859 * @str:   The constant string to write to the buffer to
 860 */
 861int __trace_bputs(unsigned long ip, const char *str)
 862{
 863        struct ring_buffer_event *event;
 864        struct ring_buffer *buffer;
 865        struct bputs_entry *entry;
 866        unsigned long irq_flags;
 867        int size = sizeof(struct bputs_entry);
 868        int pc;
 869
 870        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 871                return 0;
 872
 873        pc = preempt_count();
 874
 875        if (unlikely(tracing_selftest_running || tracing_disabled))
 876                return 0;
 877
 878        local_save_flags(irq_flags);
 879        buffer = global_trace.trace_buffer.buffer;
 880        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
 881                                            irq_flags, pc);
 882        if (!event)
 883                return 0;
 884
 885        entry = ring_buffer_event_data(event);
 886        entry->ip                       = ip;
 887        entry->str                      = str;
 888
 889        __buffer_unlock_commit(buffer, event);
 890        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 891
 892        return 1;
 893}
 894EXPORT_SYMBOL_GPL(__trace_bputs);
 895
 896#ifdef CONFIG_TRACER_SNAPSHOT
 897static void tracing_snapshot_instance(struct trace_array *tr)
 898{
 899        struct tracer *tracer = tr->current_trace;
 900        unsigned long flags;
 901
 902        if (in_nmi()) {
 903                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
 904                internal_trace_puts("*** snapshot is being ignored        ***\n");
 905                return;
 906        }
 907
 908        if (!tr->allocated_snapshot) {
 909                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
 910                internal_trace_puts("*** stopping trace here!   ***\n");
 911                tracing_off();
 912                return;
 913        }
 914
 915        /* Note, snapshot can not be used when the tracer uses it */
 916        if (tracer->use_max_tr) {
 917                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
 918                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
 919                return;
 920        }
 921
 922        local_irq_save(flags);
 923        update_max_tr(tr, current, smp_processor_id());
 924        local_irq_restore(flags);
 925}
 926
 927/**
 928 * tracing_snapshot - take a snapshot of the current buffer.
 929 *
 930 * This causes a swap between the snapshot buffer and the current live
 931 * tracing buffer. You can use this to take snapshots of the live
 932 * trace when some condition is triggered, but continue to trace.
 933 *
 934 * Note, make sure to allocate the snapshot with either
 935 * a tracing_snapshot_alloc(), or by doing it manually
 936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 937 *
 938 * If the snapshot buffer is not allocated, it will stop tracing.
 939 * Basically making a permanent snapshot.
 940 */
 941void tracing_snapshot(void)
 942{
 943        struct trace_array *tr = &global_trace;
 944
 945        tracing_snapshot_instance(tr);
 946}
 947EXPORT_SYMBOL_GPL(tracing_snapshot);
 948
 949static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
 950                                        struct trace_buffer *size_buf, int cpu_id);
 951static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
 952
 953static int alloc_snapshot(struct trace_array *tr)
 954{
 955        int ret;
 956
 957        if (!tr->allocated_snapshot) {
 958
 959                /* allocate spare buffer */
 960                ret = resize_buffer_duplicate_size(&tr->max_buffer,
 961                                   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
 962                if (ret < 0)
 963                        return ret;
 964
 965                tr->allocated_snapshot = true;
 966        }
 967
 968        return 0;
 969}
 970
 971static void free_snapshot(struct trace_array *tr)
 972{
 973        /*
 974         * We don't free the ring buffer. instead, resize it because
 975         * The max_tr ring buffer has some state (e.g. ring->clock) and
 976         * we want preserve it.
 977         */
 978        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
 979        set_buffer_entries(&tr->max_buffer, 1);
 980        tracing_reset_online_cpus(&tr->max_buffer);
 981        tr->allocated_snapshot = false;
 982}
 983
 984/**
 985 * tracing_alloc_snapshot - allocate snapshot buffer.
 986 *
 987 * This only allocates the snapshot buffer if it isn't already
 988 * allocated - it doesn't also take a snapshot.
 989 *
 990 * This is meant to be used in cases where the snapshot buffer needs
 991 * to be set up for events that can't sleep but need to be able to
 992 * trigger a snapshot.
 993 */
 994int tracing_alloc_snapshot(void)
 995{
 996        struct trace_array *tr = &global_trace;
 997        int ret;
 998
 999        ret = alloc_snapshot(tr);
1000        WARN_ON(ret < 0);
1001
1002        return ret;
1003}
1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006/**
1007 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 *
1009 * This is similar to tracing_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1012 *
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1016 */
1017void tracing_snapshot_alloc(void)
1018{
1019        int ret;
1020
1021        ret = tracing_alloc_snapshot();
1022        if (ret < 0)
1023                return;
1024
1025        tracing_snapshot();
1026}
1027EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028#else
1029void tracing_snapshot(void)
1030{
1031        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032}
1033EXPORT_SYMBOL_GPL(tracing_snapshot);
1034int tracing_alloc_snapshot(void)
1035{
1036        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037        return -ENODEV;
1038}
1039EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040void tracing_snapshot_alloc(void)
1041{
1042        /* Give warning */
1043        tracing_snapshot();
1044}
1045EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046#endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048void tracer_tracing_off(struct trace_array *tr)
1049{
1050        if (tr->trace_buffer.buffer)
1051                ring_buffer_record_off(tr->trace_buffer.buffer);
1052        /*
1053         * This flag is looked at when buffers haven't been allocated
1054         * yet, or by some tracers (like irqsoff), that just want to
1055         * know if the ring buffer has been disabled, but it can handle
1056         * races of where it gets disabled but we still do a record.
1057         * As the check is in the fast path of the tracers, it is more
1058         * important to be fast than accurate.
1059         */
1060        tr->buffer_disabled = 1;
1061        /* Make the flag seen by readers */
1062        smp_wmb();
1063}
1064
1065/**
1066 * tracing_off - turn off tracing buffers
1067 *
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1072 */
1073void tracing_off(void)
1074{
1075        tracer_tracing_off(&global_trace);
1076}
1077EXPORT_SYMBOL_GPL(tracing_off);
1078
1079void disable_trace_on_warning(void)
1080{
1081        if (__disable_trace_on_warning)
1082                tracing_off();
1083}
1084
1085/**
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1088 *
1089 * Shows real state of the ring buffer if it is enabled or not.
1090 */
1091int tracer_tracing_is_on(struct trace_array *tr)
1092{
1093        if (tr->trace_buffer.buffer)
1094                return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095        return !tr->buffer_disabled;
1096}
1097
1098/**
1099 * tracing_is_on - show state of ring buffers enabled
1100 */
1101int tracing_is_on(void)
1102{
1103        return tracer_tracing_is_on(&global_trace);
1104}
1105EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107static int __init set_buf_size(char *str)
1108{
1109        unsigned long buf_size;
1110
1111        if (!str)
1112                return 0;
1113        buf_size = memparse(str, &str);
1114        /* nr_entries can not be zero */
1115        if (buf_size == 0)
1116                return 0;
1117        trace_buf_size = buf_size;
1118        return 1;
1119}
1120__setup("trace_buf_size=", set_buf_size);
1121
1122static int __init set_tracing_thresh(char *str)
1123{
1124        unsigned long threshold;
1125        int ret;
1126
1127        if (!str)
1128                return 0;
1129        ret = kstrtoul(str, 0, &threshold);
1130        if (ret < 0)
1131                return 0;
1132        tracing_thresh = threshold * 1000;
1133        return 1;
1134}
1135__setup("tracing_thresh=", set_tracing_thresh);
1136
1137unsigned long nsecs_to_usecs(unsigned long nsecs)
1138{
1139        return nsecs / 1000;
1140}
1141
1142/*
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the evals (enum) were defined.
1147 */
1148#undef C
1149#define C(a, b) b
1150
1151/* These must match the bit postions in trace_iterator_flags */
1152static const char *trace_options[] = {
1153        TRACE_FLAGS
1154        NULL
1155};
1156
1157static struct {
1158        u64 (*func)(void);
1159        const char *name;
1160        int in_ns;              /* is this clock in nanoseconds? */
1161} trace_clocks[] = {
1162        { trace_clock_local,            "local",        1 },
1163        { trace_clock_global,           "global",       1 },
1164        { trace_clock_counter,          "counter",      0 },
1165        { trace_clock_jiffies,          "uptime",       0 },
1166        { trace_clock,                  "perf",         1 },
1167        { ktime_get_mono_fast_ns,       "mono",         1 },
1168        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169        { ktime_get_boot_fast_ns,       "boot",         1 },
1170        ARCH_TRACE_CLOCKS
1171};
1172
1173/*
1174 * trace_parser_get_init - gets the buffer for trace parser
1175 */
1176int trace_parser_get_init(struct trace_parser *parser, int size)
1177{
1178        memset(parser, 0, sizeof(*parser));
1179
1180        parser->buffer = kmalloc(size, GFP_KERNEL);
1181        if (!parser->buffer)
1182                return 1;
1183
1184        parser->size = size;
1185        return 0;
1186}
1187
1188/*
1189 * trace_parser_put - frees the buffer for trace parser
1190 */
1191void trace_parser_put(struct trace_parser *parser)
1192{
1193        kfree(parser->buffer);
1194        parser->buffer = NULL;
1195}
1196
1197/*
1198 * trace_get_user - reads the user input string separated by  space
1199 * (matched by isspace(ch))
1200 *
1201 * For each string found the 'struct trace_parser' is updated,
1202 * and the function returns.
1203 *
1204 * Returns number of bytes read.
1205 *
1206 * See kernel/trace/trace.h for 'struct trace_parser' details.
1207 */
1208int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209        size_t cnt, loff_t *ppos)
1210{
1211        char ch;
1212        size_t read = 0;
1213        ssize_t ret;
1214
1215        if (!*ppos)
1216                trace_parser_clear(parser);
1217
1218        ret = get_user(ch, ubuf++);
1219        if (ret)
1220                goto out;
1221
1222        read++;
1223        cnt--;
1224
1225        /*
1226         * The parser is not finished with the last write,
1227         * continue reading the user input without skipping spaces.
1228         */
1229        if (!parser->cont) {
1230                /* skip white space */
1231                while (cnt && isspace(ch)) {
1232                        ret = get_user(ch, ubuf++);
1233                        if (ret)
1234                                goto out;
1235                        read++;
1236                        cnt--;
1237                }
1238
1239                /* only spaces were written */
1240                if (isspace(ch)) {
1241                        *ppos += read;
1242                        ret = read;
1243                        goto out;
1244                }
1245
1246                parser->idx = 0;
1247        }
1248
1249        /* read the non-space input */
1250        while (cnt && !isspace(ch)) {
1251                if (parser->idx < parser->size - 1)
1252                        parser->buffer[parser->idx++] = ch;
1253                else {
1254                        ret = -EINVAL;
1255                        goto out;
1256                }
1257                ret = get_user(ch, ubuf++);
1258                if (ret)
1259                        goto out;
1260                read++;
1261                cnt--;
1262        }
1263
1264        /* We either got finished input or we have to wait for another call. */
1265        if (isspace(ch)) {
1266                parser->buffer[parser->idx] = 0;
1267                parser->cont = false;
1268        } else if (parser->idx < parser->size - 1) {
1269                parser->cont = true;
1270                parser->buffer[parser->idx++] = ch;
1271        } else {
1272                ret = -EINVAL;
1273                goto out;
1274        }
1275
1276        *ppos += read;
1277        ret = read;
1278
1279out:
1280        return ret;
1281}
1282
1283/* TODO add a seq_buf_to_buffer() */
1284static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285{
1286        int len;
1287
1288        if (trace_seq_used(s) <= s->seq.readpos)
1289                return -EBUSY;
1290
1291        len = trace_seq_used(s) - s->seq.readpos;
1292        if (cnt > len)
1293                cnt = len;
1294        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296        s->seq.readpos += cnt;
1297        return cnt;
1298}
1299
1300unsigned long __read_mostly     tracing_thresh;
1301
1302#ifdef CONFIG_TRACER_MAX_TRACE
1303/*
1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1307 */
1308static void
1309__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310{
1311        struct trace_buffer *trace_buf = &tr->trace_buffer;
1312        struct trace_buffer *max_buf = &tr->max_buffer;
1313        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316        max_buf->cpu = cpu;
1317        max_buf->time_start = data->preempt_timestamp;
1318
1319        max_data->saved_latency = tr->max_latency;
1320        max_data->critical_start = data->critical_start;
1321        max_data->critical_end = data->critical_end;
1322
1323        memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324        max_data->pid = tsk->pid;
1325        /*
1326         * If tsk == current, then use current_uid(), as that does not use
1327         * RCU. The irq tracer can be called out of RCU scope.
1328         */
1329        if (tsk == current)
1330                max_data->uid = current_uid();
1331        else
1332                max_data->uid = task_uid(tsk);
1333
1334        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335        max_data->policy = tsk->policy;
1336        max_data->rt_priority = tsk->rt_priority;
1337
1338        /* record this tasks comm */
1339        tracing_record_cmdline(tsk);
1340}
1341
1342/**
1343 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344 * @tr: tracer
1345 * @tsk: the task with the latency
1346 * @cpu: The cpu that initiated the trace.
1347 *
1348 * Flip the buffers between the @tr and the max_tr and record information
1349 * about which task was the cause of this latency.
1350 */
1351void
1352update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353{
1354        struct ring_buffer *buf;
1355
1356        if (tr->stop_count)
1357                return;
1358
1359        WARN_ON_ONCE(!irqs_disabled());
1360
1361        if (!tr->allocated_snapshot) {
1362                /* Only the nop tracer should hit this when disabling */
1363                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                return;
1365        }
1366
1367        arch_spin_lock(&tr->max_lock);
1368
1369        buf = tr->trace_buffer.buffer;
1370        tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371        tr->max_buffer.buffer = buf;
1372
1373        __update_max_tr(tr, tsk, cpu);
1374        arch_spin_unlock(&tr->max_lock);
1375}
1376
1377/**
1378 * update_max_tr_single - only copy one trace over, and reset the rest
1379 * @tr - tracer
1380 * @tsk - task with the latency
1381 * @cpu - the cpu of the buffer to copy.
1382 *
1383 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384 */
1385void
1386update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387{
1388        int ret;
1389
1390        if (tr->stop_count)
1391                return;
1392
1393        WARN_ON_ONCE(!irqs_disabled());
1394        if (!tr->allocated_snapshot) {
1395                /* Only the nop tracer should hit this when disabling */
1396                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                return;
1398        }
1399
1400        arch_spin_lock(&tr->max_lock);
1401
1402        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404        if (ret == -EBUSY) {
1405                /*
1406                 * We failed to swap the buffer due to a commit taking
1407                 * place on this CPU. We fail to record, but we reset
1408                 * the max trace buffer (no one writes directly to it)
1409                 * and flag that it failed.
1410                 */
1411                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                        "Failed to swap buffers due to commit in progress\n");
1413        }
1414
1415        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417        __update_max_tr(tr, tsk, cpu);
1418        arch_spin_unlock(&tr->max_lock);
1419}
1420#endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423{
1424        /* Iterators are static, they should be filled or empty */
1425        if (trace_buffer_iter(iter, iter->cpu_file))
1426                return 0;
1427
1428        return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                full);
1430}
1431
1432#ifdef CONFIG_FTRACE_STARTUP_TEST
1433static bool selftests_can_run;
1434
1435struct trace_selftests {
1436        struct list_head                list;
1437        struct tracer                   *type;
1438};
1439
1440static LIST_HEAD(postponed_selftests);
1441
1442static int save_selftest(struct tracer *type)
1443{
1444        struct trace_selftests *selftest;
1445
1446        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447        if (!selftest)
1448                return -ENOMEM;
1449
1450        selftest->type = type;
1451        list_add(&selftest->list, &postponed_selftests);
1452        return 0;
1453}
1454
1455static int run_tracer_selftest(struct tracer *type)
1456{
1457        struct trace_array *tr = &global_trace;
1458        struct tracer *saved_tracer = tr->current_trace;
1459        int ret;
1460
1461        if (!type->selftest || tracing_selftest_disabled)
1462                return 0;
1463
1464        /*
1465         * If a tracer registers early in boot up (before scheduling is
1466         * initialized and such), then do not run its selftests yet.
1467         * Instead, run it a little later in the boot process.
1468         */
1469        if (!selftests_can_run)
1470                return save_selftest(type);
1471
1472        /*
1473         * Run a selftest on this tracer.
1474         * Here we reset the trace buffer, and set the current
1475         * tracer to be this tracer. The tracer can then run some
1476         * internal tracing to verify that everything is in order.
1477         * If we fail, we do not register this tracer.
1478         */
1479        tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481        tr->current_trace = type;
1482
1483#ifdef CONFIG_TRACER_MAX_TRACE
1484        if (type->use_max_tr) {
1485                /* If we expanded the buffers, make sure the max is expanded too */
1486                if (ring_buffer_expanded)
1487                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                           RING_BUFFER_ALL_CPUS);
1489                tr->allocated_snapshot = true;
1490        }
1491#endif
1492
1493        /* the test is responsible for initializing and enabling */
1494        pr_info("Testing tracer %s: ", type->name);
1495        ret = type->selftest(type, tr);
1496        /* the test is responsible for resetting too */
1497        tr->current_trace = saved_tracer;
1498        if (ret) {
1499                printk(KERN_CONT "FAILED!\n");
1500                /* Add the warning after printing 'FAILED' */
1501                WARN_ON(1);
1502                return -1;
1503        }
1504        /* Only reset on passing, to avoid touching corrupted buffers */
1505        tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507#ifdef CONFIG_TRACER_MAX_TRACE
1508        if (type->use_max_tr) {
1509                tr->allocated_snapshot = false;
1510
1511                /* Shrink the max buffer again */
1512                if (ring_buffer_expanded)
1513                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                           RING_BUFFER_ALL_CPUS);
1515        }
1516#endif
1517
1518        printk(KERN_CONT "PASSED\n");
1519        return 0;
1520}
1521
1522static __init int init_trace_selftests(void)
1523{
1524        struct trace_selftests *p, *n;
1525        struct tracer *t, **last;
1526        int ret;
1527
1528        selftests_can_run = true;
1529
1530        mutex_lock(&trace_types_lock);
1531
1532        if (list_empty(&postponed_selftests))
1533                goto out;
1534
1535        pr_info("Running postponed tracer tests:\n");
1536
1537        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                ret = run_tracer_selftest(p->type);
1539                /* If the test fails, then warn and remove from available_tracers */
1540                if (ret < 0) {
1541                        WARN(1, "tracer: %s failed selftest, disabling\n",
1542                             p->type->name);
1543                        last = &trace_types;
1544                        for (t = trace_types; t; t = t->next) {
1545                                if (t == p->type) {
1546                                        *last = t->next;
1547                                        break;
1548                                }
1549                                last = &t->next;
1550                        }
1551                }
1552                list_del(&p->list);
1553                kfree(p);
1554        }
1555
1556 out:
1557        mutex_unlock(&trace_types_lock);
1558
1559        return 0;
1560}
1561core_initcall(init_trace_selftests);
1562#else
1563static inline int run_tracer_selftest(struct tracer *type)
1564{
1565        return 0;
1566}
1567#endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571static void __init apply_trace_boot_options(void);
1572
1573/**
1574 * register_tracer - register a tracer with the ftrace system.
1575 * @type - the plugin for the tracer
1576 *
1577 * Register a new plugin tracer.
1578 */
1579int __init register_tracer(struct tracer *type)
1580{
1581        struct tracer *t;
1582        int ret = 0;
1583
1584        if (!type->name) {
1585                pr_info("Tracer must have a name\n");
1586                return -1;
1587        }
1588
1589        if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                return -1;
1592        }
1593
1594        mutex_lock(&trace_types_lock);
1595
1596        tracing_selftest_running = true;
1597
1598        for (t = trace_types; t; t = t->next) {
1599                if (strcmp(type->name, t->name) == 0) {
1600                        /* already found */
1601                        pr_info("Tracer %s already registered\n",
1602                                type->name);
1603                        ret = -1;
1604                        goto out;
1605                }
1606        }
1607
1608        if (!type->set_flag)
1609                type->set_flag = &dummy_set_flag;
1610        if (!type->flags) {
1611                /*allocate a dummy tracer_flags*/
1612                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                if (!type->flags) {
1614                        ret = -ENOMEM;
1615                        goto out;
1616                }
1617                type->flags->val = 0;
1618                type->flags->opts = dummy_tracer_opt;
1619        } else
1620                if (!type->flags->opts)
1621                        type->flags->opts = dummy_tracer_opt;
1622
1623        /* store the tracer for __set_tracer_option */
1624        type->flags->trace = type;
1625
1626        ret = run_tracer_selftest(type);
1627        if (ret < 0)
1628                goto out;
1629
1630        type->next = trace_types;
1631        trace_types = type;
1632        add_tracer_options(&global_trace, type);
1633
1634 out:
1635        tracing_selftest_running = false;
1636        mutex_unlock(&trace_types_lock);
1637
1638        if (ret || !default_bootup_tracer)
1639                goto out_unlock;
1640
1641        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                goto out_unlock;
1643
1644        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645        /* Do we want this tracer to start on bootup? */
1646        tracing_set_tracer(&global_trace, type->name);
1647        default_bootup_tracer = NULL;
1648
1649        apply_trace_boot_options();
1650
1651        /* disable other selftests, since this will break it. */
1652        tracing_selftest_disabled = true;
1653#ifdef CONFIG_FTRACE_STARTUP_TEST
1654        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655               type->name);
1656#endif
1657
1658 out_unlock:
1659        return ret;
1660}
1661
1662void tracing_reset(struct trace_buffer *buf, int cpu)
1663{
1664        struct ring_buffer *buffer = buf->buffer;
1665
1666        if (!buffer)
1667                return;
1668
1669        ring_buffer_record_disable(buffer);
1670
1671        /* Make sure all commits have finished */
1672        synchronize_sched();
1673        ring_buffer_reset_cpu(buffer, cpu);
1674
1675        ring_buffer_record_enable(buffer);
1676}
1677
1678void tracing_reset_online_cpus(struct trace_buffer *buf)
1679{
1680        struct ring_buffer *buffer = buf->buffer;
1681        int cpu;
1682
1683        if (!buffer)
1684                return;
1685
1686        ring_buffer_record_disable(buffer);
1687
1688        /* Make sure all commits have finished */
1689        synchronize_sched();
1690
1691        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693        for_each_online_cpu(cpu)
1694                ring_buffer_reset_cpu(buffer, cpu);
1695
1696        ring_buffer_record_enable(buffer);
1697}
1698
1699/* Must have trace_types_lock held */
1700void tracing_reset_all_online_cpus(void)
1701{
1702        struct trace_array *tr;
1703
1704        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                if (!tr->clear_trace)
1706                        continue;
1707                tr->clear_trace = false;
1708                tracing_reset_online_cpus(&tr->trace_buffer);
1709#ifdef CONFIG_TRACER_MAX_TRACE
1710                tracing_reset_online_cpus(&tr->max_buffer);
1711#endif
1712        }
1713}
1714
1715static int *tgid_map;
1716
1717#define SAVED_CMDLINES_DEFAULT 128
1718#define NO_CMDLINE_MAP UINT_MAX
1719static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1720struct saved_cmdlines_buffer {
1721        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1722        unsigned *map_cmdline_to_pid;
1723        unsigned cmdline_num;
1724        int cmdline_idx;
1725        char *saved_cmdlines;
1726};
1727static struct saved_cmdlines_buffer *savedcmd;
1728
1729/* temporary disable recording */
1730static atomic_t trace_record_taskinfo_disabled __read_mostly;
1731
1732static inline char *get_saved_cmdlines(int idx)
1733{
1734        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1735}
1736
1737static inline void set_cmdline(int idx, const char *cmdline)
1738{
1739        memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1740}
1741
1742static int allocate_cmdlines_buffer(unsigned int val,
1743                                    struct saved_cmdlines_buffer *s)
1744{
1745        s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1746                                        GFP_KERNEL);
1747        if (!s->map_cmdline_to_pid)
1748                return -ENOMEM;
1749
1750        s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1751        if (!s->saved_cmdlines) {
1752                kfree(s->map_cmdline_to_pid);
1753                return -ENOMEM;
1754        }
1755
1756        s->cmdline_idx = 0;
1757        s->cmdline_num = val;
1758        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1759               sizeof(s->map_pid_to_cmdline));
1760        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1761               val * sizeof(*s->map_cmdline_to_pid));
1762
1763        return 0;
1764}
1765
1766static int trace_create_savedcmd(void)
1767{
1768        int ret;
1769
1770        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1771        if (!savedcmd)
1772                return -ENOMEM;
1773
1774        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1775        if (ret < 0) {
1776                kfree(savedcmd);
1777                savedcmd = NULL;
1778                return -ENOMEM;
1779        }
1780
1781        return 0;
1782}
1783
1784int is_tracing_stopped(void)
1785{
1786        return global_trace.stop_count;
1787}
1788
1789/**
1790 * tracing_start - quick start of the tracer
1791 *
1792 * If tracing is enabled but was stopped by tracing_stop,
1793 * this will start the tracer back up.
1794 */
1795void tracing_start(void)
1796{
1797        struct ring_buffer *buffer;
1798        unsigned long flags;
1799
1800        if (tracing_disabled)
1801                return;
1802
1803        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1804        if (--global_trace.stop_count) {
1805                if (global_trace.stop_count < 0) {
1806                        /* Someone screwed up their debugging */
1807                        WARN_ON_ONCE(1);
1808                        global_trace.stop_count = 0;
1809                }
1810                goto out;
1811        }
1812
1813        /* Prevent the buffers from switching */
1814        arch_spin_lock(&global_trace.max_lock);
1815
1816        buffer = global_trace.trace_buffer.buffer;
1817        if (buffer)
1818                ring_buffer_record_enable(buffer);
1819
1820#ifdef CONFIG_TRACER_MAX_TRACE
1821        buffer = global_trace.max_buffer.buffer;
1822        if (buffer)
1823                ring_buffer_record_enable(buffer);
1824#endif
1825
1826        arch_spin_unlock(&global_trace.max_lock);
1827
1828 out:
1829        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1830}
1831
1832static void tracing_start_tr(struct trace_array *tr)
1833{
1834        struct ring_buffer *buffer;
1835        unsigned long flags;
1836
1837        if (tracing_disabled)
1838                return;
1839
1840        /* If global, we need to also start the max tracer */
1841        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1842                return tracing_start();
1843
1844        raw_spin_lock_irqsave(&tr->start_lock, flags);
1845
1846        if (--tr->stop_count) {
1847                if (tr->stop_count < 0) {
1848                        /* Someone screwed up their debugging */
1849                        WARN_ON_ONCE(1);
1850                        tr->stop_count = 0;
1851                }
1852                goto out;
1853        }
1854
1855        buffer = tr->trace_buffer.buffer;
1856        if (buffer)
1857                ring_buffer_record_enable(buffer);
1858
1859 out:
1860        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1861}
1862
1863/**
1864 * tracing_stop - quick stop of the tracer
1865 *
1866 * Light weight way to stop tracing. Use in conjunction with
1867 * tracing_start.
1868 */
1869void tracing_stop(void)
1870{
1871        struct ring_buffer *buffer;
1872        unsigned long flags;
1873
1874        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1875        if (global_trace.stop_count++)
1876                goto out;
1877
1878        /* Prevent the buffers from switching */
1879        arch_spin_lock(&global_trace.max_lock);
1880
1881        buffer = global_trace.trace_buffer.buffer;
1882        if (buffer)
1883                ring_buffer_record_disable(buffer);
1884
1885#ifdef CONFIG_TRACER_MAX_TRACE
1886        buffer = global_trace.max_buffer.buffer;
1887        if (buffer)
1888                ring_buffer_record_disable(buffer);
1889#endif
1890
1891        arch_spin_unlock(&global_trace.max_lock);
1892
1893 out:
1894        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1895}
1896
1897static void tracing_stop_tr(struct trace_array *tr)
1898{
1899        struct ring_buffer *buffer;
1900        unsigned long flags;
1901
1902        /* If global, we need to also stop the max tracer */
1903        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1904                return tracing_stop();
1905
1906        raw_spin_lock_irqsave(&tr->start_lock, flags);
1907        if (tr->stop_count++)
1908                goto out;
1909
1910        buffer = tr->trace_buffer.buffer;
1911        if (buffer)
1912                ring_buffer_record_disable(buffer);
1913
1914 out:
1915        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1916}
1917
1918static int trace_save_cmdline(struct task_struct *tsk)
1919{
1920        unsigned pid, idx;
1921
1922        /* treat recording of idle task as a success */
1923        if (!tsk->pid)
1924                return 1;
1925
1926        if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1927                return 0;
1928
1929        /*
1930         * It's not the end of the world if we don't get
1931         * the lock, but we also don't want to spin
1932         * nor do we want to disable interrupts,
1933         * so if we miss here, then better luck next time.
1934         */
1935        if (!arch_spin_trylock(&trace_cmdline_lock))
1936                return 0;
1937
1938        idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1939        if (idx == NO_CMDLINE_MAP) {
1940                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1941
1942                /*
1943                 * Check whether the cmdline buffer at idx has a pid
1944                 * mapped. We are going to overwrite that entry so we
1945                 * need to clear the map_pid_to_cmdline. Otherwise we
1946                 * would read the new comm for the old pid.
1947                 */
1948                pid = savedcmd->map_cmdline_to_pid[idx];
1949                if (pid != NO_CMDLINE_MAP)
1950                        savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1951
1952                savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1953                savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1954
1955                savedcmd->cmdline_idx = idx;
1956        }
1957
1958        set_cmdline(idx, tsk->comm);
1959
1960        arch_spin_unlock(&trace_cmdline_lock);
1961
1962        return 1;
1963}
1964
1965static void __trace_find_cmdline(int pid, char comm[])
1966{
1967        unsigned map;
1968
1969        if (!pid) {
1970                strcpy(comm, "<idle>");
1971                return;
1972        }
1973
1974        if (WARN_ON_ONCE(pid < 0)) {
1975                strcpy(comm, "<XXX>");
1976                return;
1977        }
1978
1979        if (pid > PID_MAX_DEFAULT) {
1980                strcpy(comm, "<...>");
1981                return;
1982        }
1983
1984        map = savedcmd->map_pid_to_cmdline[pid];
1985        if (map != NO_CMDLINE_MAP)
1986                strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1987        else
1988                strcpy(comm, "<...>");
1989}
1990
1991void trace_find_cmdline(int pid, char comm[])
1992{
1993        preempt_disable();
1994        arch_spin_lock(&trace_cmdline_lock);
1995
1996        __trace_find_cmdline(pid, comm);
1997
1998        arch_spin_unlock(&trace_cmdline_lock);
1999        preempt_enable();
2000}
2001
2002int trace_find_tgid(int pid)
2003{
2004        if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2005                return 0;
2006
2007        return tgid_map[pid];
2008}
2009
2010static int trace_save_tgid(struct task_struct *tsk)
2011{
2012        /* treat recording of idle task as a success */
2013        if (!tsk->pid)
2014                return 1;
2015
2016        if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2017                return 0;
2018
2019        tgid_map[tsk->pid] = tsk->tgid;
2020        return 1;
2021}
2022
2023static bool tracing_record_taskinfo_skip(int flags)
2024{
2025        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2026                return true;
2027        if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2028                return true;
2029        if (!__this_cpu_read(trace_taskinfo_save))
2030                return true;
2031        return false;
2032}
2033
2034/**
2035 * tracing_record_taskinfo - record the task info of a task
2036 *
2037 * @task  - task to record
2038 * @flags - TRACE_RECORD_CMDLINE for recording comm
2039 *        - TRACE_RECORD_TGID for recording tgid
2040 */
2041void tracing_record_taskinfo(struct task_struct *task, int flags)
2042{
2043        bool done;
2044
2045        if (tracing_record_taskinfo_skip(flags))
2046                return;
2047
2048        /*
2049         * Record as much task information as possible. If some fail, continue
2050         * to try to record the others.
2051         */
2052        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2053        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2054
2055        /* If recording any information failed, retry again soon. */
2056        if (!done)
2057                return;
2058
2059        __this_cpu_write(trace_taskinfo_save, false);
2060}
2061
2062/**
2063 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2064 *
2065 * @prev - previous task during sched_switch
2066 * @next - next task during sched_switch
2067 * @flags - TRACE_RECORD_CMDLINE for recording comm
2068 *          TRACE_RECORD_TGID for recording tgid
2069 */
2070void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2071                                          struct task_struct *next, int flags)
2072{
2073        bool done;
2074
2075        if (tracing_record_taskinfo_skip(flags))
2076                return;
2077
2078        /*
2079         * Record as much task information as possible. If some fail, continue
2080         * to try to record the others.
2081         */
2082        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2083        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2084        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2085        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2086
2087        /* If recording any information failed, retry again soon. */
2088        if (!done)
2089                return;
2090
2091        __this_cpu_write(trace_taskinfo_save, false);
2092}
2093
2094/* Helpers to record a specific task information */
2095void tracing_record_cmdline(struct task_struct *task)
2096{
2097        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2098}
2099
2100void tracing_record_tgid(struct task_struct *task)
2101{
2102        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2103}
2104
2105/*
2106 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2107 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2108 * simplifies those functions and keeps them in sync.
2109 */
2110enum print_line_t trace_handle_return(struct trace_seq *s)
2111{
2112        return trace_seq_has_overflowed(s) ?
2113                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2114}
2115EXPORT_SYMBOL_GPL(trace_handle_return);
2116
2117void
2118tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2119                             int pc)
2120{
2121        struct task_struct *tsk = current;
2122
2123        entry->preempt_count            = pc & 0xff;
2124        entry->pid                      = (tsk) ? tsk->pid : 0;
2125        entry->flags =
2126#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2127                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2128#else
2129                TRACE_FLAG_IRQS_NOSUPPORT |
2130#endif
2131                ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2132                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2133                ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2134                (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2135                (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2136}
2137EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2138
2139struct ring_buffer_event *
2140trace_buffer_lock_reserve(struct ring_buffer *buffer,
2141                          int type,
2142                          unsigned long len,
2143                          unsigned long flags, int pc)
2144{
2145        return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2146}
2147
2148DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2149DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2150static int trace_buffered_event_ref;
2151
2152/**
2153 * trace_buffered_event_enable - enable buffering events
2154 *
2155 * When events are being filtered, it is quicker to use a temporary
2156 * buffer to write the event data into if there's a likely chance
2157 * that it will not be committed. The discard of the ring buffer
2158 * is not as fast as committing, and is much slower than copying
2159 * a commit.
2160 *
2161 * When an event is to be filtered, allocate per cpu buffers to
2162 * write the event data into, and if the event is filtered and discarded
2163 * it is simply dropped, otherwise, the entire data is to be committed
2164 * in one shot.
2165 */
2166void trace_buffered_event_enable(void)
2167{
2168        struct ring_buffer_event *event;
2169        struct page *page;
2170        int cpu;
2171
2172        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2173
2174        if (trace_buffered_event_ref++)
2175                return;
2176
2177        for_each_tracing_cpu(cpu) {
2178                page = alloc_pages_node(cpu_to_node(cpu),
2179                                        GFP_KERNEL | __GFP_NORETRY, 0);
2180                if (!page)
2181                        goto failed;
2182
2183                event = page_address(page);
2184                memset(event, 0, sizeof(*event));
2185
2186                per_cpu(trace_buffered_event, cpu) = event;
2187
2188                preempt_disable();
2189                if (cpu == smp_processor_id() &&
2190                    this_cpu_read(trace_buffered_event) !=
2191                    per_cpu(trace_buffered_event, cpu))
2192                        WARN_ON_ONCE(1);
2193                preempt_enable();
2194        }
2195
2196        return;
2197 failed:
2198        trace_buffered_event_disable();
2199}
2200
2201static void enable_trace_buffered_event(void *data)
2202{
2203        /* Probably not needed, but do it anyway */
2204        smp_rmb();
2205        this_cpu_dec(trace_buffered_event_cnt);
2206}
2207
2208static void disable_trace_buffered_event(void *data)
2209{
2210        this_cpu_inc(trace_buffered_event_cnt);
2211}
2212
2213/**
2214 * trace_buffered_event_disable - disable buffering events
2215 *
2216 * When a filter is removed, it is faster to not use the buffered
2217 * events, and to commit directly into the ring buffer. Free up
2218 * the temp buffers when there are no more users. This requires
2219 * special synchronization with current events.
2220 */
2221void trace_buffered_event_disable(void)
2222{
2223        int cpu;
2224
2225        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2226
2227        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2228                return;
2229
2230        if (--trace_buffered_event_ref)
2231                return;
2232
2233        preempt_disable();
2234        /* For each CPU, set the buffer as used. */
2235        smp_call_function_many(tracing_buffer_mask,
2236                               disable_trace_buffered_event, NULL, 1);
2237        preempt_enable();
2238
2239        /* Wait for all current users to finish */
2240        synchronize_sched();
2241
2242        for_each_tracing_cpu(cpu) {
2243                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2244                per_cpu(trace_buffered_event, cpu) = NULL;
2245        }
2246        /*
2247         * Make sure trace_buffered_event is NULL before clearing
2248         * trace_buffered_event_cnt.
2249         */
2250        smp_wmb();
2251
2252        preempt_disable();
2253        /* Do the work on each cpu */
2254        smp_call_function_many(tracing_buffer_mask,
2255                               enable_trace_buffered_event, NULL, 1);
2256        preempt_enable();
2257}
2258
2259static struct ring_buffer *temp_buffer;
2260
2261struct ring_buffer_event *
2262trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2263                          struct trace_event_file *trace_file,
2264                          int type, unsigned long len,
2265                          unsigned long flags, int pc)
2266{
2267        struct ring_buffer_event *entry;
2268        int val;
2269
2270        *current_rb = trace_file->tr->trace_buffer.buffer;
2271
2272        if ((trace_file->flags &
2273             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2274            (entry = this_cpu_read(trace_buffered_event))) {
2275                /* Try to use the per cpu buffer first */
2276                val = this_cpu_inc_return(trace_buffered_event_cnt);
2277                if (val == 1) {
2278                        trace_event_setup(entry, type, flags, pc);
2279                        entry->array[0] = len;
2280                        return entry;
2281                }
2282                this_cpu_dec(trace_buffered_event_cnt);
2283        }
2284
2285        entry = __trace_buffer_lock_reserve(*current_rb,
2286                                            type, len, flags, pc);
2287        /*
2288         * If tracing is off, but we have triggers enabled
2289         * we still need to look at the event data. Use the temp_buffer
2290         * to store the trace event for the tigger to use. It's recusive
2291         * safe and will not be recorded anywhere.
2292         */
2293        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2294                *current_rb = temp_buffer;
2295                entry = __trace_buffer_lock_reserve(*current_rb,
2296                                                    type, len, flags, pc);
2297        }
2298        return entry;
2299}
2300EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2301
2302static DEFINE_SPINLOCK(tracepoint_iter_lock);
2303static DEFINE_MUTEX(tracepoint_printk_mutex);
2304
2305static void output_printk(struct trace_event_buffer *fbuffer)
2306{
2307        struct trace_event_call *event_call;
2308        struct trace_event *event;
2309        unsigned long flags;
2310        struct trace_iterator *iter = tracepoint_print_iter;
2311
2312        /* We should never get here if iter is NULL */
2313        if (WARN_ON_ONCE(!iter))
2314                return;
2315
2316        event_call = fbuffer->trace_file->event_call;
2317        if (!event_call || !event_call->event.funcs ||
2318            !event_call->event.funcs->trace)
2319                return;
2320
2321        event = &fbuffer->trace_file->event_call->event;
2322
2323        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2324        trace_seq_init(&iter->seq);
2325        iter->ent = fbuffer->entry;
2326        event_call->event.funcs->trace(iter, 0, event);
2327        trace_seq_putc(&iter->seq, 0);
2328        printk("%s", iter->seq.buffer);
2329
2330        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2331}
2332
2333int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2334                             void __user *buffer, size_t *lenp,
2335                             loff_t *ppos)
2336{
2337        int save_tracepoint_printk;
2338        int ret;
2339
2340        mutex_lock(&tracepoint_printk_mutex);
2341        save_tracepoint_printk = tracepoint_printk;
2342
2343        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2344
2345        /*
2346         * This will force exiting early, as tracepoint_printk
2347         * is always zero when tracepoint_printk_iter is not allocated
2348         */
2349        if (!tracepoint_print_iter)
2350                tracepoint_printk = 0;
2351
2352        if (save_tracepoint_printk == tracepoint_printk)
2353                goto out;
2354
2355        if (tracepoint_printk)
2356                static_key_enable(&tracepoint_printk_key.key);
2357        else
2358                static_key_disable(&tracepoint_printk_key.key);
2359
2360 out:
2361        mutex_unlock(&tracepoint_printk_mutex);
2362
2363        return ret;
2364}
2365
2366void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2367{
2368        if (static_key_false(&tracepoint_printk_key.key))
2369                output_printk(fbuffer);
2370
2371        event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2372                                    fbuffer->event, fbuffer->entry,
2373                                    fbuffer->flags, fbuffer->pc);
2374}
2375EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2376
2377/*
2378 * Skip 3:
2379 *
2380 *   trace_buffer_unlock_commit_regs()
2381 *   trace_event_buffer_commit()
2382 *   trace_event_raw_event_xxx()
2383*/
2384# define STACK_SKIP 3
2385
2386void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2387                                     struct ring_buffer *buffer,
2388                                     struct ring_buffer_event *event,
2389                                     unsigned long flags, int pc,
2390                                     struct pt_regs *regs)
2391{
2392        __buffer_unlock_commit(buffer, event);
2393
2394        /*
2395         * If regs is not set, then skip the necessary functions.
2396         * Note, we can still get here via blktrace, wakeup tracer
2397         * and mmiotrace, but that's ok if they lose a function or
2398         * two. They are not that meaningful.
2399         */
2400        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2401        ftrace_trace_userstack(buffer, flags, pc);
2402}
2403
2404/*
2405 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2406 */
2407void
2408trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2409                                   struct ring_buffer_event *event)
2410{
2411        __buffer_unlock_commit(buffer, event);
2412}
2413
2414static void
2415trace_process_export(struct trace_export *export,
2416               struct ring_buffer_event *event)
2417{
2418        struct trace_entry *entry;
2419        unsigned int size = 0;
2420
2421        entry = ring_buffer_event_data(event);
2422        size = ring_buffer_event_length(event);
2423        export->write(export, entry, size);
2424}
2425
2426static DEFINE_MUTEX(ftrace_export_lock);
2427
2428static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2429
2430static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2431
2432static inline void ftrace_exports_enable(void)
2433{
2434        static_branch_enable(&ftrace_exports_enabled);
2435}
2436
2437static inline void ftrace_exports_disable(void)
2438{
2439        static_branch_disable(&ftrace_exports_enabled);
2440}
2441
2442void ftrace_exports(struct ring_buffer_event *event)
2443{
2444        struct trace_export *export;
2445
2446        preempt_disable_notrace();
2447
2448        export = rcu_dereference_raw_notrace(ftrace_exports_list);
2449        while (export) {
2450                trace_process_export(export, event);
2451                export = rcu_dereference_raw_notrace(export->next);
2452        }
2453
2454        preempt_enable_notrace();
2455}
2456
2457static inline void
2458add_trace_export(struct trace_export **list, struct trace_export *export)
2459{
2460        rcu_assign_pointer(export->next, *list);
2461        /*
2462         * We are entering export into the list but another
2463         * CPU might be walking that list. We need to make sure
2464         * the export->next pointer is valid before another CPU sees
2465         * the export pointer included into the list.
2466         */
2467        rcu_assign_pointer(*list, export);
2468}
2469
2470static inline int
2471rm_trace_export(struct trace_export **list, struct trace_export *export)
2472{
2473        struct trace_export **p;
2474
2475        for (p = list; *p != NULL; p = &(*p)->next)
2476                if (*p == export)
2477                        break;
2478
2479        if (*p != export)
2480                return -1;
2481
2482        rcu_assign_pointer(*p, (*p)->next);
2483
2484        return 0;
2485}
2486
2487static inline void
2488add_ftrace_export(struct trace_export **list, struct trace_export *export)
2489{
2490        if (*list == NULL)
2491                ftrace_exports_enable();
2492
2493        add_trace_export(list, export);
2494}
2495
2496static inline int
2497rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2498{
2499        int ret;
2500
2501        ret = rm_trace_export(list, export);
2502        if (*list == NULL)
2503                ftrace_exports_disable();
2504
2505        return ret;
2506}
2507
2508int register_ftrace_export(struct trace_export *export)
2509{
2510        if (WARN_ON_ONCE(!export->write))
2511                return -1;
2512
2513        mutex_lock(&ftrace_export_lock);
2514
2515        add_ftrace_export(&ftrace_exports_list, export);
2516
2517        mutex_unlock(&ftrace_export_lock);
2518
2519        return 0;
2520}
2521EXPORT_SYMBOL_GPL(register_ftrace_export);
2522
2523int unregister_ftrace_export(struct trace_export *export)
2524{
2525        int ret;
2526
2527        mutex_lock(&ftrace_export_lock);
2528
2529        ret = rm_ftrace_export(&ftrace_exports_list, export);
2530
2531        mutex_unlock(&ftrace_export_lock);
2532
2533        return ret;
2534}
2535EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2536
2537void
2538trace_function(struct trace_array *tr,
2539               unsigned long ip, unsigned long parent_ip, unsigned long flags,
2540               int pc)
2541{
2542        struct trace_event_call *call = &event_function;
2543        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2544        struct ring_buffer_event *event;
2545        struct ftrace_entry *entry;
2546
2547        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2548                                            flags, pc);
2549        if (!event)
2550                return;
2551        entry   = ring_buffer_event_data(event);
2552        entry->ip                       = ip;
2553        entry->parent_ip                = parent_ip;
2554
2555        if (!call_filter_check_discard(call, entry, buffer, event)) {
2556                if (static_branch_unlikely(&ftrace_exports_enabled))
2557                        ftrace_exports(event);
2558                __buffer_unlock_commit(buffer, event);
2559        }
2560}
2561
2562#ifdef CONFIG_STACKTRACE
2563
2564#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2565struct ftrace_stack {
2566        unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2567};
2568
2569static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2570static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2571
2572static void __ftrace_trace_stack(struct ring_buffer *buffer,
2573                                 unsigned long flags,
2574                                 int skip, int pc, struct pt_regs *regs)
2575{
2576        struct trace_event_call *call = &event_kernel_stack;
2577        struct ring_buffer_event *event;
2578        struct stack_entry *entry;
2579        struct stack_trace trace;
2580        int use_stack;
2581        int size = FTRACE_STACK_ENTRIES;
2582
2583        trace.nr_entries        = 0;
2584        trace.skip              = skip;
2585
2586        /*
2587         * Add one, for this function and the call to save_stack_trace()
2588         * If regs is set, then these functions will not be in the way.
2589         */
2590#ifndef CONFIG_UNWINDER_ORC
2591        if (!regs)
2592                trace.skip++;
2593#endif
2594
2595        /*
2596         * Since events can happen in NMIs there's no safe way to
2597         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2598         * or NMI comes in, it will just have to use the default
2599         * FTRACE_STACK_SIZE.
2600         */
2601        preempt_disable_notrace();
2602
2603        use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2604        /*
2605         * We don't need any atomic variables, just a barrier.
2606         * If an interrupt comes in, we don't care, because it would
2607         * have exited and put the counter back to what we want.
2608         * We just need a barrier to keep gcc from moving things
2609         * around.
2610         */
2611        barrier();
2612        if (use_stack == 1) {
2613                trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2614                trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2615
2616                if (regs)
2617                        save_stack_trace_regs(regs, &trace);
2618                else
2619                        save_stack_trace(&trace);
2620
2621                if (trace.nr_entries > size)
2622                        size = trace.nr_entries;
2623        } else
2624                /* From now on, use_stack is a boolean */
2625                use_stack = 0;
2626
2627        size *= sizeof(unsigned long);
2628
2629        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2630                                            sizeof(*entry) + size, flags, pc);
2631        if (!event)
2632                goto out;
2633        entry = ring_buffer_event_data(event);
2634
2635        memset(&entry->caller, 0, size);
2636
2637        if (use_stack)
2638                memcpy(&entry->caller, trace.entries,
2639                       trace.nr_entries * sizeof(unsigned long));
2640        else {
2641                trace.max_entries       = FTRACE_STACK_ENTRIES;
2642                trace.entries           = entry->caller;
2643                if (regs)
2644                        save_stack_trace_regs(regs, &trace);
2645                else
2646                        save_stack_trace(&trace);
2647        }
2648
2649        entry->size = trace.nr_entries;
2650
2651        if (!call_filter_check_discard(call, entry, buffer, event))
2652                __buffer_unlock_commit(buffer, event);
2653
2654 out:
2655        /* Again, don't let gcc optimize things here */
2656        barrier();
2657        __this_cpu_dec(ftrace_stack_reserve);
2658        preempt_enable_notrace();
2659
2660}
2661
2662static inline void ftrace_trace_stack(struct trace_array *tr,
2663                                      struct ring_buffer *buffer,
2664                                      unsigned long flags,
2665                                      int skip, int pc, struct pt_regs *regs)
2666{
2667        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2668                return;
2669
2670        __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2671}
2672
2673void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2674                   int pc)
2675{
2676        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2677
2678        if (rcu_is_watching()) {
2679                __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2680                return;
2681        }
2682
2683        /*
2684         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2685         * but if the above rcu_is_watching() failed, then the NMI
2686         * triggered someplace critical, and rcu_irq_enter() should
2687         * not be called from NMI.
2688         */
2689        if (unlikely(in_nmi()))
2690                return;
2691
2692        /*
2693         * It is possible that a function is being traced in a
2694         * location that RCU is not watching. A call to
2695         * rcu_irq_enter() will make sure that it is, but there's
2696         * a few internal rcu functions that could be traced
2697         * where that wont work either. In those cases, we just
2698         * do nothing.
2699         */
2700        if (unlikely(rcu_irq_enter_disabled()))
2701                return;
2702
2703        rcu_irq_enter_irqson();
2704        __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2705        rcu_irq_exit_irqson();
2706}
2707
2708/**
2709 * trace_dump_stack - record a stack back trace in the trace buffer
2710 * @skip: Number of functions to skip (helper handlers)
2711 */
2712void trace_dump_stack(int skip)
2713{
2714        unsigned long flags;
2715
2716        if (tracing_disabled || tracing_selftest_running)
2717                return;
2718
2719        local_save_flags(flags);
2720
2721#ifndef CONFIG_UNWINDER_ORC
2722        /* Skip 1 to skip this function. */
2723        skip++;
2724#endif
2725        __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2726                             flags, skip, preempt_count(), NULL);
2727}
2728
2729static DEFINE_PER_CPU(int, user_stack_count);
2730
2731void
2732ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2733{
2734        struct trace_event_call *call = &event_user_stack;
2735        struct ring_buffer_event *event;
2736        struct userstack_entry *entry;
2737        struct stack_trace trace;
2738
2739        if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2740                return;
2741
2742        /*
2743         * NMIs can not handle page faults, even with fix ups.
2744         * The save user stack can (and often does) fault.
2745         */
2746        if (unlikely(in_nmi()))
2747                return;
2748
2749        /*
2750         * prevent recursion, since the user stack tracing may
2751         * trigger other kernel events.
2752         */
2753        preempt_disable();
2754        if (__this_cpu_read(user_stack_count))
2755                goto out;
2756
2757        __this_cpu_inc(user_stack_count);
2758
2759        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2760                                            sizeof(*entry), flags, pc);
2761        if (!event)
2762                goto out_drop_count;
2763        entry   = ring_buffer_event_data(event);
2764
2765        entry->tgid             = current->tgid;
2766        memset(&entry->caller, 0, sizeof(entry->caller));
2767
2768        trace.nr_entries        = 0;
2769        trace.max_entries       = FTRACE_STACK_ENTRIES;
2770        trace.skip              = 0;
2771        trace.entries           = entry->caller;
2772
2773        save_stack_trace_user(&trace);
2774        if (!call_filter_check_discard(call, entry, buffer, event))
2775                __buffer_unlock_commit(buffer, event);
2776
2777 out_drop_count:
2778        __this_cpu_dec(user_stack_count);
2779 out:
2780        preempt_enable();
2781}
2782
2783#ifdef UNUSED
2784static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2785{
2786        ftrace_trace_userstack(tr, flags, preempt_count());
2787}
2788#endif /* UNUSED */
2789
2790#endif /* CONFIG_STACKTRACE */
2791
2792/* created for use with alloc_percpu */
2793struct trace_buffer_struct {
2794        int nesting;
2795        char buffer[4][TRACE_BUF_SIZE];
2796};
2797
2798static struct trace_buffer_struct *trace_percpu_buffer;
2799
2800/*
2801 * Thise allows for lockless recording.  If we're nested too deeply, then
2802 * this returns NULL.
2803 */
2804static char *get_trace_buf(void)
2805{
2806        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2807
2808        if (!buffer || buffer->nesting >= 4)
2809                return NULL;
2810
2811        buffer->nesting++;
2812
2813        /* Interrupts must see nesting incremented before we use the buffer */
2814        barrier();
2815        return &buffer->buffer[buffer->nesting][0];
2816}
2817
2818static void put_trace_buf(void)
2819{
2820        /* Don't let the decrement of nesting leak before this */
2821        barrier();
2822        this_cpu_dec(trace_percpu_buffer->nesting);
2823}
2824
2825static int alloc_percpu_trace_buffer(void)
2826{
2827        struct trace_buffer_struct *buffers;
2828
2829        buffers = alloc_percpu(struct trace_buffer_struct);
2830        if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2831                return -ENOMEM;
2832
2833        trace_percpu_buffer = buffers;
2834        return 0;
2835}
2836
2837static int buffers_allocated;
2838
2839void trace_printk_init_buffers(void)
2840{
2841        if (buffers_allocated)
2842                return;
2843
2844        if (alloc_percpu_trace_buffer())
2845                return;
2846
2847        /* trace_printk() is for debug use only. Don't use it in production. */
2848
2849        pr_warn("\n");
2850        pr_warn("**********************************************************\n");
2851        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2852        pr_warn("**                                                      **\n");
2853        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2854        pr_warn("**                                                      **\n");
2855        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2856        pr_warn("** unsafe for production use.                           **\n");
2857        pr_warn("**                                                      **\n");
2858        pr_warn("** If you see this message and you are not debugging    **\n");
2859        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2860        pr_warn("**                                                      **\n");
2861        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2862        pr_warn("**********************************************************\n");
2863
2864        /* Expand the buffers to set size */
2865        tracing_update_buffers();
2866
2867        buffers_allocated = 1;
2868
2869        /*
2870         * trace_printk_init_buffers() can be called by modules.
2871         * If that happens, then we need to start cmdline recording
2872         * directly here. If the global_trace.buffer is already
2873         * allocated here, then this was called by module code.
2874         */
2875        if (global_trace.trace_buffer.buffer)
2876                tracing_start_cmdline_record();
2877}
2878
2879void trace_printk_start_comm(void)
2880{
2881        /* Start tracing comms if trace printk is set */
2882        if (!buffers_allocated)
2883                return;
2884        tracing_start_cmdline_record();
2885}
2886
2887static void trace_printk_start_stop_comm(int enabled)
2888{
2889        if (!buffers_allocated)
2890                return;
2891
2892        if (enabled)
2893                tracing_start_cmdline_record();
2894        else
2895                tracing_stop_cmdline_record();
2896}
2897
2898/**
2899 * trace_vbprintk - write binary msg to tracing buffer
2900 *
2901 */
2902int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2903{
2904        struct trace_event_call *call = &event_bprint;
2905        struct ring_buffer_event *event;
2906        struct ring_buffer *buffer;
2907        struct trace_array *tr = &global_trace;
2908        struct bprint_entry *entry;
2909        unsigned long flags;
2910        char *tbuffer;
2911        int len = 0, size, pc;
2912
2913        if (unlikely(tracing_selftest_running || tracing_disabled))
2914                return 0;
2915
2916        /* Don't pollute graph traces with trace_vprintk internals */
2917        pause_graph_tracing();
2918
2919        pc = preempt_count();
2920        preempt_disable_notrace();
2921
2922        tbuffer = get_trace_buf();
2923        if (!tbuffer) {
2924                len = 0;
2925                goto out_nobuffer;
2926        }
2927
2928        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2929
2930        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2931                goto out;
2932
2933        local_save_flags(flags);
2934        size = sizeof(*entry) + sizeof(u32) * len;
2935        buffer = tr->trace_buffer.buffer;
2936        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2937                                            flags, pc);
2938        if (!event)
2939                goto out;
2940        entry = ring_buffer_event_data(event);
2941        entry->ip                       = ip;
2942        entry->fmt                      = fmt;
2943
2944        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2945        if (!call_filter_check_discard(call, entry, buffer, event)) {
2946                __buffer_unlock_commit(buffer, event);
2947                ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2948        }
2949
2950out:
2951        put_trace_buf();
2952
2953out_nobuffer:
2954        preempt_enable_notrace();
2955        unpause_graph_tracing();
2956
2957        return len;
2958}
2959EXPORT_SYMBOL_GPL(trace_vbprintk);
2960
2961static int
2962__trace_array_vprintk(struct ring_buffer *buffer,
2963                      unsigned long ip, const char *fmt, va_list args)
2964{
2965        struct trace_event_call *call = &event_print;
2966        struct ring_buffer_event *event;
2967        int len = 0, size, pc;
2968        struct print_entry *entry;
2969        unsigned long flags;
2970        char *tbuffer;
2971
2972        if (tracing_disabled || tracing_selftest_running)
2973                return 0;
2974
2975        /* Don't pollute graph traces with trace_vprintk internals */
2976        pause_graph_tracing();
2977
2978        pc = preempt_count();
2979        preempt_disable_notrace();
2980
2981
2982        tbuffer = get_trace_buf();
2983        if (!tbuffer) {
2984                len = 0;
2985                goto out_nobuffer;
2986        }
2987
2988        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2989
2990        local_save_flags(flags);
2991        size = sizeof(*entry) + len + 1;
2992        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2993                                            flags, pc);
2994        if (!event)
2995                goto out;
2996        entry = ring_buffer_event_data(event);
2997        entry->ip = ip;
2998
2999        memcpy(&entry->buf, tbuffer, len + 1);
3000        if (!call_filter_check_discard(call, entry, buffer, event)) {
3001                __buffer_unlock_commit(buffer, event);
3002                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3003        }
3004
3005out:
3006        put_trace_buf();
3007
3008out_nobuffer:
3009        preempt_enable_notrace();
3010        unpause_graph_tracing();
3011
3012        return len;
3013}
3014
3015int trace_array_vprintk(struct trace_array *tr,
3016                        unsigned long ip, const char *fmt, va_list args)
3017{
3018        return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3019}
3020
3021int trace_array_printk(struct trace_array *tr,
3022                       unsigned long ip, const char *fmt, ...)
3023{
3024        int ret;
3025        va_list ap;
3026
3027        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3028                return 0;
3029
3030        va_start(ap, fmt);
3031        ret = trace_array_vprintk(tr, ip, fmt, ap);
3032        va_end(ap);
3033        return ret;
3034}
3035
3036int trace_array_printk_buf(struct ring_buffer *buffer,
3037                           unsigned long ip, const char *fmt, ...)
3038{
3039        int ret;
3040        va_list ap;
3041
3042        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3043                return 0;
3044
3045        va_start(ap, fmt);
3046        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3047        va_end(ap);
3048        return ret;
3049}
3050
3051int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3052{
3053        return trace_array_vprintk(&global_trace, ip, fmt, args);
3054}
3055EXPORT_SYMBOL_GPL(trace_vprintk);
3056
3057static void trace_iterator_increment(struct trace_iterator *iter)
3058{
3059        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3060
3061        iter->idx++;
3062        if (buf_iter)
3063                ring_buffer_read(buf_iter, NULL);
3064}
3065
3066static struct trace_entry *
3067peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3068                unsigned long *lost_events)
3069{
3070        struct ring_buffer_event *event;
3071        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3072
3073        if (buf_iter)
3074                event = ring_buffer_iter_peek(buf_iter, ts);
3075        else
3076                event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3077                                         lost_events);
3078
3079        if (event) {
3080                iter->ent_size = ring_buffer_event_length(event);
3081                return ring_buffer_event_data(event);
3082        }
3083        iter->ent_size = 0;
3084        return NULL;
3085}
3086
3087static struct trace_entry *
3088__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3089                  unsigned long *missing_events, u64 *ent_ts)
3090{
3091        struct ring_buffer *buffer = iter->trace_buffer->buffer;
3092        struct trace_entry *ent, *next = NULL;
3093        unsigned long lost_events = 0, next_lost = 0;
3094        int cpu_file = iter->cpu_file;
3095        u64 next_ts = 0, ts;
3096        int next_cpu = -1;
3097        int next_size = 0;
3098        int cpu;
3099
3100        /*
3101         * If we are in a per_cpu trace file, don't bother by iterating over
3102         * all cpu and peek directly.
3103         */
3104        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3105                if (ring_buffer_empty_cpu(buffer, cpu_file))
3106                        return NULL;
3107                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3108                if (ent_cpu)
3109                        *ent_cpu = cpu_file;
3110
3111                return ent;
3112        }
3113
3114        for_each_tracing_cpu(cpu) {
3115
3116                if (ring_buffer_empty_cpu(buffer, cpu))
3117                        continue;
3118
3119                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3120
3121                /*
3122                 * Pick the entry with the smallest timestamp:
3123                 */
3124                if (ent && (!next || ts < next_ts)) {
3125                        next = ent;
3126                        next_cpu = cpu;
3127                        next_ts = ts;
3128                        next_lost = lost_events;
3129                        next_size = iter->ent_size;
3130                }
3131        }
3132
3133        iter->ent_size = next_size;
3134
3135        if (ent_cpu)
3136                *ent_cpu = next_cpu;
3137
3138        if (ent_ts)
3139                *ent_ts = next_ts;
3140
3141        if (missing_events)
3142                *missing_events = next_lost;
3143
3144        return next;
3145}
3146
3147/* Find the next real entry, without updating the iterator itself */
3148struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3149                                          int *ent_cpu, u64 *ent_ts)
3150{
3151        return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3152}
3153
3154/* Find the next real entry, and increment the iterator to the next entry */
3155void *trace_find_next_entry_inc(struct trace_iterator *iter)
3156{
3157        iter->ent = __find_next_entry(iter, &iter->cpu,
3158                                      &iter->lost_events, &iter->ts);
3159
3160        if (iter->ent)
3161                trace_iterator_increment(iter);
3162
3163        return iter->ent ? iter : NULL;
3164}
3165
3166static void trace_consume(struct trace_iterator *iter)
3167{
3168        ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3169                            &iter->lost_events);
3170}
3171
3172static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3173{
3174        struct trace_iterator *iter = m->private;
3175        int i = (int)*pos;
3176        void *ent;
3177
3178        WARN_ON_ONCE(iter->leftover);
3179
3180        (*pos)++;
3181
3182        /* can't go backwards */
3183        if (iter->idx > i)
3184                return NULL;
3185
3186        if (iter->idx < 0)
3187                ent = trace_find_next_entry_inc(iter);
3188        else
3189                ent = iter;
3190
3191        while (ent && iter->idx < i)
3192                ent = trace_find_next_entry_inc(iter);
3193
3194        iter->pos = *pos;
3195
3196        return ent;
3197}
3198
3199void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3200{
3201        struct ring_buffer_event *event;
3202        struct ring_buffer_iter *buf_iter;
3203        unsigned long entries = 0;
3204        u64 ts;
3205
3206        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3207
3208        buf_iter = trace_buffer_iter(iter, cpu);
3209        if (!buf_iter)
3210                return;
3211
3212        ring_buffer_iter_reset(buf_iter);
3213
3214        /*
3215         * We could have the case with the max latency tracers
3216         * that a reset never took place on a cpu. This is evident
3217         * by the timestamp being before the start of the buffer.
3218         */
3219        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3220                if (ts >= iter->trace_buffer->time_start)
3221                        break;
3222                entries++;
3223                ring_buffer_read(buf_iter, NULL);
3224        }
3225
3226        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3227}
3228
3229/*
3230 * The current tracer is copied to avoid a global locking
3231 * all around.
3232 */
3233static void *s_start(struct seq_file *m, loff_t *pos)
3234{
3235        struct trace_iterator *iter = m->private;
3236        struct trace_array *tr = iter->tr;
3237        int cpu_file = iter->cpu_file;
3238        void *p = NULL;
3239        loff_t l = 0;
3240        int cpu;
3241
3242        /*
3243         * copy the tracer to avoid using a global lock all around.
3244         * iter->trace is a copy of current_trace, the pointer to the
3245         * name may be used instead of a strcmp(), as iter->trace->name
3246         * will point to the same string as current_trace->name.
3247         */
3248        mutex_lock(&trace_types_lock);
3249        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3250                *iter->trace = *tr->current_trace;
3251        mutex_unlock(&trace_types_lock);
3252
3253#ifdef CONFIG_TRACER_MAX_TRACE
3254        if (iter->snapshot && iter->trace->use_max_tr)
3255                return ERR_PTR(-EBUSY);
3256#endif
3257
3258        if (!iter->snapshot)
3259                atomic_inc(&trace_record_taskinfo_disabled);
3260
3261        if (*pos != iter->pos) {
3262                iter->ent = NULL;
3263                iter->cpu = 0;
3264                iter->idx = -1;
3265
3266                if (cpu_file == RING_BUFFER_ALL_CPUS) {
3267                        for_each_tracing_cpu(cpu)
3268                                tracing_iter_reset(iter, cpu);
3269                } else
3270                        tracing_iter_reset(iter, cpu_file);
3271
3272                iter->leftover = 0;
3273                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3274                        ;
3275
3276        } else {
3277                /*
3278                 * If we overflowed the seq_file before, then we want
3279                 * to just reuse the trace_seq buffer again.
3280                 */
3281                if (iter->leftover)
3282                        p = iter;
3283                else {
3284                        l = *pos - 1;
3285                        p = s_next(m, p, &l);
3286                }
3287        }
3288
3289        trace_event_read_lock();
3290        trace_access_lock(cpu_file);
3291        return p;
3292}
3293
3294static void s_stop(struct seq_file *m, void *p)
3295{
3296        struct trace_iterator *iter = m->private;
3297
3298#ifdef CONFIG_TRACER_MAX_TRACE
3299        if (iter->snapshot && iter->trace->use_max_tr)
3300                return;
3301#endif
3302
3303        if (!iter->snapshot)
3304                atomic_dec(&trace_record_taskinfo_disabled);
3305
3306        trace_access_unlock(iter->cpu_file);
3307        trace_event_read_unlock();
3308}
3309
3310static void
3311get_total_entries(struct trace_buffer *buf,
3312                  unsigned long *total, unsigned long *entries)
3313{
3314        unsigned long count;
3315        int cpu;
3316
3317        *total = 0;
3318        *entries = 0;
3319
3320        for_each_tracing_cpu(cpu) {
3321                count = ring_buffer_entries_cpu(buf->buffer, cpu);
3322                /*
3323                 * If this buffer has skipped entries, then we hold all
3324                 * entries for the trace and we need to ignore the
3325                 * ones before the time stamp.
3326                 */
3327                if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3328                        count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3329                        /* total is the same as the entries */
3330                        *total += count;
3331                } else
3332                        *total += count +
3333                                ring_buffer_overrun_cpu(buf->buffer, cpu);
3334                *entries += count;
3335        }
3336}
3337
3338static void print_lat_help_header(struct seq_file *m)
3339{
3340        seq_puts(m, "#                  _------=> CPU#            \n"
3341                    "#                 / _-----=> irqs-off        \n"
3342                    "#                | / _----=> need-resched    \n"
3343                    "#                || / _---=> hardirq/softirq \n"
3344                    "#                ||| / _--=> preempt-depth   \n"
3345                    "#                |||| /     delay            \n"
3346                    "#  cmd     pid   ||||| time  |   caller      \n"
3347                    "#     \\   /      |||||  \\    |   /         \n");
3348}
3349
3350static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3351{
3352        unsigned long total;
3353        unsigned long entries;
3354
3355        get_total_entries(buf, &total, &entries);
3356        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3357                   entries, total, num_online_cpus());
3358        seq_puts(m, "#\n");
3359}
3360
3361static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3362                                   unsigned int flags)
3363{
3364        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3365
3366        print_event_info(buf, m);
3367
3368        seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3369        seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3370}
3371
3372static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3373                                       unsigned int flags)
3374{
3375        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3376        const char tgid_space[] = "          ";
3377        const char space[] = "  ";
3378
3379        seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3380                   tgid ? tgid_space : space);
3381        seq_printf(m, "#                          %s / _----=> need-resched\n",
3382                   tgid ? tgid_space : space);
3383        seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3384                   tgid ? tgid_space : space);
3385        seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3386                   tgid ? tgid_space : space);
3387        seq_printf(m, "#                          %s||| /     delay\n",
3388                   tgid ? tgid_space : space);
3389        seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3390                   tgid ? "   TGID   " : space);
3391        seq_printf(m, "#              | |       | %s||||       |         |\n",
3392                   tgid ? "     |    " : space);
3393}
3394
3395void
3396print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3397{
3398        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3399        struct trace_buffer *buf = iter->trace_buffer;
3400        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3401        struct tracer *type = iter->trace;
3402        unsigned long entries;
3403        unsigned long total;
3404        const char *name = "preemption";
3405
3406        name = type->name;
3407
3408        get_total_entries(buf, &total, &entries);
3409
3410        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3411                   name, UTS_RELEASE);
3412        seq_puts(m, "# -----------------------------------"
3413                 "---------------------------------\n");
3414        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3415                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3416                   nsecs_to_usecs(data->saved_latency),
3417                   entries,
3418                   total,
3419                   buf->cpu,
3420#if defined(CONFIG_PREEMPT_NONE)
3421                   "server",
3422#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3423                   "desktop",
3424#elif defined(CONFIG_PREEMPT)
3425                   "preempt",
3426#else
3427                   "unknown",
3428#endif
3429                   /* These are reserved for later use */
3430                   0, 0, 0, 0);
3431#ifdef CONFIG_SMP
3432        seq_printf(m, " #P:%d)\n", num_online_cpus());
3433#else
3434        seq_puts(m, ")\n");
3435#endif
3436        seq_puts(m, "#    -----------------\n");
3437        seq_printf(m, "#    | task: %.16s-%d "
3438                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3439                   data->comm, data->pid,
3440                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3441                   data->policy, data->rt_priority);
3442        seq_puts(m, "#    -----------------\n");
3443
3444        if (data->critical_start) {
3445                seq_puts(m, "#  => started at: ");
3446                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3447                trace_print_seq(m, &iter->seq);
3448                seq_puts(m, "\n#  => ended at:   ");
3449                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3450                trace_print_seq(m, &iter->seq);
3451                seq_puts(m, "\n#\n");
3452        }
3453
3454        seq_puts(m, "#\n");
3455}
3456
3457static void test_cpu_buff_start(struct trace_iterator *iter)
3458{
3459        struct trace_seq *s = &iter->seq;
3460        struct trace_array *tr = iter->tr;
3461
3462        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3463                return;
3464
3465        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3466                return;
3467
3468        if (cpumask_available(iter->started) &&
3469            cpumask_test_cpu(iter->cpu, iter->started))
3470                return;
3471
3472        if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3473                return;
3474
3475        if (cpumask_available(iter->started))
3476                cpumask_set_cpu(iter->cpu, iter->started);
3477
3478        /* Don't print started cpu buffer for the first entry of the trace */
3479        if (iter->idx > 1)
3480                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3481                                iter->cpu);
3482}
3483
3484static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3485{
3486        struct trace_array *tr = iter->tr;
3487        struct trace_seq *s = &iter->seq;
3488        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3489        struct trace_entry *entry;
3490        struct trace_event *event;
3491
3492        entry = iter->ent;
3493
3494        test_cpu_buff_start(iter);
3495
3496        event = ftrace_find_event(entry->type);
3497
3498        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3499                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3500                        trace_print_lat_context(iter);
3501                else
3502                        trace_print_context(iter);
3503        }
3504
3505        if (trace_seq_has_overflowed(s))
3506                return TRACE_TYPE_PARTIAL_LINE;
3507
3508        if (event)
3509                return event->funcs->trace(iter, sym_flags, event);
3510
3511        trace_seq_printf(s, "Unknown type %d\n", entry->type);
3512
3513        return trace_handle_return(s);
3514}
3515
3516static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3517{
3518        struct trace_array *tr = iter->tr;
3519        struct trace_seq *s = &iter->seq;
3520        struct trace_entry *entry;
3521        struct trace_event *event;
3522
3523        entry = iter->ent;
3524
3525        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3526                trace_seq_printf(s, "%d %d %llu ",
3527                                 entry->pid, iter->cpu, iter->ts);
3528
3529        if (trace_seq_has_overflowed(s))
3530                return TRACE_TYPE_PARTIAL_LINE;
3531
3532        event = ftrace_find_event(entry->type);
3533        if (event)
3534                return event->funcs->raw(iter, 0, event);
3535
3536        trace_seq_printf(s, "%d ?\n", entry->type);
3537
3538        return trace_handle_return(s);
3539}
3540
3541static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3542{
3543        struct trace_array *tr = iter->tr;
3544        struct trace_seq *s = &iter->seq;
3545        unsigned char newline = '\n';
3546        struct trace_entry *entry;
3547        struct trace_event *event;
3548
3549        entry = iter->ent;
3550
3551        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3552                SEQ_PUT_HEX_FIELD(s, entry->pid);
3553                SEQ_PUT_HEX_FIELD(s, iter->cpu);
3554                SEQ_PUT_HEX_FIELD(s, iter->ts);
3555                if (trace_seq_has_overflowed(s))
3556                        return TRACE_TYPE_PARTIAL_LINE;
3557        }
3558
3559        event = ftrace_find_event(entry->type);
3560        if (event) {
3561                enum print_line_t ret = event->funcs->hex(iter, 0, event);
3562                if (ret != TRACE_TYPE_HANDLED)
3563                        return ret;
3564        }
3565
3566        SEQ_PUT_FIELD(s, newline);
3567
3568        return trace_handle_return(s);
3569}
3570
3571static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3572{
3573        struct trace_array *tr = iter->tr;
3574        struct trace_seq *s = &iter->seq;
3575        struct trace_entry *entry;
3576        struct trace_event *event;
3577
3578        entry = iter->ent;
3579
3580        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3581                SEQ_PUT_FIELD(s, entry->pid);
3582                SEQ_PUT_FIELD(s, iter->cpu);
3583                SEQ_PUT_FIELD(s, iter->ts);
3584                if (trace_seq_has_overflowed(s))
3585                        return TRACE_TYPE_PARTIAL_LINE;
3586        }
3587
3588        event = ftrace_find_event(entry->type);
3589        return event ? event->funcs->binary(iter, 0, event) :
3590                TRACE_TYPE_HANDLED;
3591}
3592
3593int trace_empty(struct trace_iterator *iter)
3594{
3595        struct ring_buffer_iter *buf_iter;
3596        int cpu;
3597
3598        /* If we are looking at one CPU buffer, only check that one */
3599        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3600                cpu = iter->cpu_file;
3601                buf_iter = trace_buffer_iter(iter, cpu);
3602                if (buf_iter) {
3603                        if (!ring_buffer_iter_empty(buf_iter))
3604                                return 0;
3605                } else {
3606                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3607                                return 0;
3608                }
3609                return 1;
3610        }
3611
3612        for_each_tracing_cpu(cpu) {
3613                buf_iter = trace_buffer_iter(iter, cpu);
3614                if (buf_iter) {
3615                        if (!ring_buffer_iter_empty(buf_iter))
3616                                return 0;
3617                } else {
3618                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619                                return 0;
3620                }
3621        }
3622
3623        return 1;
3624}
3625
3626/*  Called with trace_event_read_lock() held. */
3627enum print_line_t print_trace_line(struct trace_iterator *iter)
3628{
3629        struct trace_array *tr = iter->tr;
3630        unsigned long trace_flags = tr->trace_flags;
3631        enum print_line_t ret;
3632
3633        if (iter->lost_events) {
3634                trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3635                                 iter->cpu, iter->lost_events);
3636                if (trace_seq_has_overflowed(&iter->seq))
3637                        return TRACE_TYPE_PARTIAL_LINE;
3638        }
3639
3640        if (iter->trace && iter->trace->print_line) {
3641                ret = iter->trace->print_line(iter);
3642                if (ret != TRACE_TYPE_UNHANDLED)
3643                        return ret;
3644        }
3645
3646        if (iter->ent->type == TRACE_BPUTS &&
3647                        trace_flags & TRACE_ITER_PRINTK &&
3648                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3649                return trace_print_bputs_msg_only(iter);
3650
3651        if (iter->ent->type == TRACE_BPRINT &&
3652                        trace_flags & TRACE_ITER_PRINTK &&
3653                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3654                return trace_print_bprintk_msg_only(iter);
3655
3656        if (iter->ent->type == TRACE_PRINT &&
3657                        trace_flags & TRACE_ITER_PRINTK &&
3658                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3659                return trace_print_printk_msg_only(iter);
3660
3661        if (trace_flags & TRACE_ITER_BIN)
3662                return print_bin_fmt(iter);
3663
3664        if (trace_flags & TRACE_ITER_HEX)
3665                return print_hex_fmt(iter);
3666
3667        if (trace_flags & TRACE_ITER_RAW)
3668                return print_raw_fmt(iter);
3669
3670        return print_trace_fmt(iter);
3671}
3672
3673void trace_latency_header(struct seq_file *m)
3674{
3675        struct trace_iterator *iter = m->private;
3676        struct trace_array *tr = iter->tr;
3677
3678        /* print nothing if the buffers are empty */
3679        if (trace_empty(iter))
3680                return;
3681
3682        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3683                print_trace_header(m, iter);
3684
3685        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3686                print_lat_help_header(m);
3687}
3688
3689void trace_default_header(struct seq_file *m)
3690{
3691        struct trace_iterator *iter = m->private;
3692        struct trace_array *tr = iter->tr;
3693        unsigned long trace_flags = tr->trace_flags;
3694
3695        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3696                return;
3697
3698        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3699                /* print nothing if the buffers are empty */
3700                if (trace_empty(iter))
3701                        return;
3702                print_trace_header(m, iter);
3703                if (!(trace_flags & TRACE_ITER_VERBOSE))
3704                        print_lat_help_header(m);
3705        } else {
3706                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3707                        if (trace_flags & TRACE_ITER_IRQ_INFO)
3708                                print_func_help_header_irq(iter->trace_buffer,
3709                                                           m, trace_flags);
3710                        else
3711                                print_func_help_header(iter->trace_buffer, m,
3712                                                       trace_flags);
3713                }
3714        }
3715}
3716
3717static void test_ftrace_alive(struct seq_file *m)
3718{
3719        if (!ftrace_is_dead())
3720                return;
3721        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3722                    "#          MAY BE MISSING FUNCTION EVENTS\n");
3723}
3724
3725#ifdef CONFIG_TRACER_MAX_TRACE
3726static void show_snapshot_main_help(struct seq_file *m)
3727{
3728        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3729                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3730                    "#                      Takes a snapshot of the main buffer.\n"
3731                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3732                    "#                      (Doesn't have to be '2' works with any number that\n"
3733                    "#                       is not a '0' or '1')\n");
3734}
3735
3736static void show_snapshot_percpu_help(struct seq_file *m)
3737{
3738        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3739#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3740        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3741                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3742#else
3743        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3744                    "#                     Must use main snapshot file to allocate.\n");
3745#endif
3746        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3747                    "#                      (Doesn't have to be '2' works with any number that\n"
3748                    "#                       is not a '0' or '1')\n");
3749}
3750
3751static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3752{
3753        if (iter->tr->allocated_snapshot)
3754                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3755        else
3756                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3757
3758        seq_puts(m, "# Snapshot commands:\n");
3759        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3760                show_snapshot_main_help(m);
3761        else
3762                show_snapshot_percpu_help(m);
3763}
3764#else
3765/* Should never be called */
3766static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3767#endif
3768
3769static int s_show(struct seq_file *m, void *v)
3770{
3771        struct trace_iterator *iter = v;
3772        int ret;
3773
3774        if (iter->ent == NULL) {
3775                if (iter->tr) {
3776                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
3777                        seq_puts(m, "#\n");
3778                        test_ftrace_alive(m);
3779                }
3780                if (iter->snapshot && trace_empty(iter))
3781                        print_snapshot_help(m, iter);
3782                else if (iter->trace && iter->trace->print_header)
3783                        iter->trace->print_header(m);
3784                else
3785                        trace_default_header(m);
3786
3787        } else if (iter->leftover) {
3788                /*
3789                 * If we filled the seq_file buffer earlier, we
3790                 * want to just show it now.
3791                 */
3792                ret = trace_print_seq(m, &iter->seq);
3793
3794                /* ret should this time be zero, but you never know */
3795                iter->leftover = ret;
3796
3797        } else {
3798                print_trace_line(iter);
3799                ret = trace_print_seq(m, &iter->seq);
3800                /*
3801                 * If we overflow the seq_file buffer, then it will
3802                 * ask us for this data again at start up.
3803                 * Use that instead.
3804                 *  ret is 0 if seq_file write succeeded.
3805                 *        -1 otherwise.
3806                 */
3807                iter->leftover = ret;
3808        }
3809
3810        return 0;
3811}
3812
3813/*
3814 * Should be used after trace_array_get(), trace_types_lock
3815 * ensures that i_cdev was already initialized.
3816 */
3817static inline int tracing_get_cpu(struct inode *inode)
3818{
3819        if (inode->i_cdev) /* See trace_create_cpu_file() */
3820                return (long)inode->i_cdev - 1;
3821        return RING_BUFFER_ALL_CPUS;
3822}
3823
3824static const struct seq_operations tracer_seq_ops = {
3825        .start          = s_start,
3826        .next           = s_next,
3827        .stop           = s_stop,
3828        .show           = s_show,
3829};
3830
3831static struct trace_iterator *
3832__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3833{
3834        struct trace_array *tr = inode->i_private;
3835        struct trace_iterator *iter;
3836        int cpu;
3837
3838        if (tracing_disabled)
3839                return ERR_PTR(-ENODEV);
3840
3841        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3842        if (!iter)
3843                return ERR_PTR(-ENOMEM);
3844
3845        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3846                                    GFP_KERNEL);
3847        if (!iter->buffer_iter)
3848                goto release;
3849
3850        /*
3851         * We make a copy of the current tracer to avoid concurrent
3852         * changes on it while we are reading.
3853         */
3854        mutex_lock(&trace_types_lock);
3855        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3856        if (!iter->trace)
3857                goto fail;
3858
3859        *iter->trace = *tr->current_trace;
3860
3861        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3862                goto fail;
3863
3864        iter->tr = tr;
3865
3866#ifdef CONFIG_TRACER_MAX_TRACE
3867        /* Currently only the top directory has a snapshot */
3868        if (tr->current_trace->print_max || snapshot)
3869                iter->trace_buffer = &tr->max_buffer;
3870        else
3871#endif
3872                iter->trace_buffer = &tr->trace_buffer;
3873        iter->snapshot = snapshot;
3874        iter->pos = -1;
3875        iter->cpu_file = tracing_get_cpu(inode);
3876        mutex_init(&iter->mutex);
3877
3878        /* Notify the tracer early; before we stop tracing. */
3879        if (iter->trace && iter->trace->open)
3880                iter->trace->open(iter);
3881
3882        /* Annotate start of buffers if we had overruns */
3883        if (ring_buffer_overruns(iter->trace_buffer->buffer))
3884                iter->iter_flags |= TRACE_FILE_ANNOTATE;
3885
3886        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3887        if (trace_clocks[tr->clock_id].in_ns)
3888                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3889
3890        /* stop the trace while dumping if we are not opening "snapshot" */
3891        if (!iter->snapshot)
3892                tracing_stop_tr(tr);
3893
3894        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3895                for_each_tracing_cpu(cpu) {
3896                        iter->buffer_iter[cpu] =
3897                                ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3898                }
3899                ring_buffer_read_prepare_sync();
3900                for_each_tracing_cpu(cpu) {
3901                        ring_buffer_read_start(iter->buffer_iter[cpu]);
3902                        tracing_iter_reset(iter, cpu);
3903                }
3904        } else {
3905                cpu = iter->cpu_file;
3906                iter->buffer_iter[cpu] =
3907                        ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3908                ring_buffer_read_prepare_sync();
3909                ring_buffer_read_start(iter->buffer_iter[cpu]);
3910                tracing_iter_reset(iter, cpu);
3911        }
3912
3913        mutex_unlock(&trace_types_lock);
3914
3915        return iter;
3916
3917 fail:
3918        mutex_unlock(&trace_types_lock);
3919        kfree(iter->trace);
3920        kfree(iter->buffer_iter);
3921release:
3922        seq_release_private(inode, file);
3923        return ERR_PTR(-ENOMEM);
3924}
3925
3926int tracing_open_generic(struct inode *inode, struct file *filp)
3927{
3928        if (tracing_disabled)
3929                return -ENODEV;
3930
3931        filp->private_data = inode->i_private;
3932        return 0;
3933}
3934
3935bool tracing_is_disabled(void)
3936{
3937        return (tracing_disabled) ? true: false;
3938}
3939
3940/*
3941 * Open and update trace_array ref count.
3942 * Must have the current trace_array passed to it.
3943 */
3944static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3945{
3946        struct trace_array *tr = inode->i_private;
3947
3948        if (tracing_disabled)
3949                return -ENODEV;
3950
3951        if (trace_array_get(tr) < 0)
3952                return -ENODEV;
3953
3954        filp->private_data = inode->i_private;
3955
3956        return 0;
3957}
3958
3959static int tracing_release(struct inode *inode, struct file *file)
3960{
3961        struct trace_array *tr = inode->i_private;
3962        struct seq_file *m = file->private_data;
3963        struct trace_iterator *iter;
3964        int cpu;
3965
3966        if (!(file->f_mode & FMODE_READ)) {
3967                trace_array_put(tr);
3968                return 0;
3969        }
3970
3971        /* Writes do not use seq_file */
3972        iter = m->private;
3973        mutex_lock(&trace_types_lock);
3974
3975        for_each_tracing_cpu(cpu) {
3976                if (iter->buffer_iter[cpu])
3977                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
3978        }
3979
3980        if (iter->trace && iter->trace->close)
3981                iter->trace->close(iter);
3982
3983        if (!iter->snapshot)
3984                /* reenable tracing if it was previously enabled */
3985                tracing_start_tr(tr);
3986
3987        __trace_array_put(tr);
3988
3989        mutex_unlock(&trace_types_lock);
3990
3991        mutex_destroy(&iter->mutex);
3992        free_cpumask_var(iter->started);
3993        kfree(iter->trace);
3994        kfree(iter->buffer_iter);
3995        seq_release_private(inode, file);
3996
3997        return 0;
3998}
3999
4000static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4001{
4002        struct trace_array *tr = inode->i_private;
4003
4004        trace_array_put(tr);
4005        return 0;
4006}
4007
4008static int tracing_single_release_tr(struct inode *inode, struct file *file)
4009{
4010        struct trace_array *tr = inode->i_private;
4011
4012        trace_array_put(tr);
4013
4014        return single_release(inode, file);
4015}
4016
4017static int tracing_open(struct inode *inode, struct file *file)
4018{
4019        struct trace_array *tr = inode->i_private;
4020        struct trace_iterator *iter;
4021        int ret = 0;
4022
4023        if (trace_array_get(tr) < 0)
4024                return -ENODEV;
4025
4026        /* If this file was open for write, then erase contents */
4027        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4028                int cpu = tracing_get_cpu(inode);
4029                struct trace_buffer *trace_buf = &tr->trace_buffer;
4030
4031#ifdef CONFIG_TRACER_MAX_TRACE
4032                if (tr->current_trace->print_max)
4033                        trace_buf = &tr->max_buffer;
4034#endif
4035
4036                if (cpu == RING_BUFFER_ALL_CPUS)
4037                        tracing_reset_online_cpus(trace_buf);
4038                else
4039                        tracing_reset(trace_buf, cpu);
4040        }
4041
4042        if (file->f_mode & FMODE_READ) {
4043                iter = __tracing_open(inode, file, false);
4044                if (IS_ERR(iter))
4045                        ret = PTR_ERR(iter);
4046                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4047                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4048        }
4049
4050        if (ret < 0)
4051                trace_array_put(tr);
4052
4053        return ret;
4054}
4055
4056/*
4057 * Some tracers are not suitable for instance buffers.
4058 * A tracer is always available for the global array (toplevel)
4059 * or if it explicitly states that it is.
4060 */
4061static bool
4062trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4063{
4064        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4065}
4066
4067/* Find the next tracer that this trace array may use */
4068static struct tracer *
4069get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4070{
4071        while (t && !trace_ok_for_array(t, tr))
4072                t = t->next;
4073
4074        return t;
4075}
4076
4077static void *
4078t_next(struct seq_file *m, void *v, loff_t *pos)
4079{
4080        struct trace_array *tr = m->private;
4081        struct tracer *t = v;
4082
4083        (*pos)++;
4084
4085        if (t)
4086                t = get_tracer_for_array(tr, t->next);
4087
4088        return t;
4089}
4090
4091static void *t_start(struct seq_file *m, loff_t *pos)
4092{
4093        struct trace_array *tr = m->private;
4094        struct tracer *t;
4095        loff_t l = 0;
4096
4097        mutex_lock(&trace_types_lock);
4098
4099        t = get_tracer_for_array(tr, trace_types);
4100        for (; t && l < *pos; t = t_next(m, t, &l))
4101                        ;
4102
4103        return t;
4104}
4105
4106static void t_stop(struct seq_file *m, void *p)
4107{
4108        mutex_unlock(&trace_types_lock);
4109}
4110
4111static int t_show(struct seq_file *m, void *v)
4112{
4113        struct tracer *t = v;
4114
4115        if (!t)
4116                return 0;
4117
4118        seq_puts(m, t->name);
4119        if (t->next)
4120                seq_putc(m, ' ');
4121        else
4122                seq_putc(m, '\n');
4123
4124        return 0;
4125}
4126
4127static const struct seq_operations show_traces_seq_ops = {
4128        .start          = t_start,
4129        .next           = t_next,
4130        .stop           = t_stop,
4131        .show           = t_show,
4132};
4133
4134static int show_traces_open(struct inode *inode, struct file *file)
4135{
4136        struct trace_array *tr = inode->i_private;
4137        struct seq_file *m;
4138        int ret;
4139
4140        if (tracing_disabled)
4141                return -ENODEV;
4142
4143        ret = seq_open(file, &show_traces_seq_ops);
4144        if (ret)
4145                return ret;
4146
4147        m = file->private_data;
4148        m->private = tr;
4149
4150        return 0;
4151}
4152
4153static ssize_t
4154tracing_write_stub(struct file *filp, const char __user *ubuf,
4155                   size_t count, loff_t *ppos)
4156{
4157        return count;
4158}
4159
4160loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4161{
4162        int ret;
4163
4164        if (file->f_mode & FMODE_READ)
4165                ret = seq_lseek(file, offset, whence);
4166        else
4167                file->f_pos = ret = 0;
4168
4169        return ret;
4170}
4171
4172static const struct file_operations tracing_fops = {
4173        .open           = tracing_open,
4174        .read           = seq_read,
4175        .write          = tracing_write_stub,
4176        .llseek         = tracing_lseek,
4177        .release        = tracing_release,
4178};
4179
4180static const struct file_operations show_traces_fops = {
4181        .open           = show_traces_open,
4182        .read           = seq_read,
4183        .release        = seq_release,
4184        .llseek         = seq_lseek,
4185};
4186
4187static ssize_t
4188tracing_cpumask_read(struct file *filp, char __user *ubuf,
4189                     size_t count, loff_t *ppos)
4190{
4191        struct trace_array *tr = file_inode(filp)->i_private;
4192        char *mask_str;
4193        int len;
4194
4195        len = snprintf(NULL, 0, "%*pb\n",
4196                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4197        mask_str = kmalloc(len, GFP_KERNEL);
4198        if (!mask_str)
4199                return -ENOMEM;
4200
4201        len = snprintf(mask_str, len, "%*pb\n",
4202                       cpumask_pr_args(tr->tracing_cpumask));
4203        if (len >= count) {
4204                count = -EINVAL;
4205                goto out_err;
4206        }
4207        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4208
4209out_err:
4210        kfree(mask_str);
4211
4212        return count;
4213}
4214
4215static ssize_t
4216tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4217                      size_t count, loff_t *ppos)
4218{
4219        struct trace_array *tr = file_inode(filp)->i_private;
4220        cpumask_var_t tracing_cpumask_new;
4221        int err, cpu;
4222
4223        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4224                return -ENOMEM;
4225
4226        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4227        if (err)
4228                goto err_unlock;
4229
4230        local_irq_disable();
4231        arch_spin_lock(&tr->max_lock);
4232        for_each_tracing_cpu(cpu) {
4233                /*
4234                 * Increase/decrease the disabled counter if we are
4235                 * about to flip a bit in the cpumask:
4236                 */
4237                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4238                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4239                        atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4240                        ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4241                }
4242                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4243                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4244                        atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4245                        ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4246                }
4247        }
4248        arch_spin_unlock(&tr->max_lock);
4249        local_irq_enable();
4250
4251        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4252        free_cpumask_var(tracing_cpumask_new);
4253
4254        return count;
4255
4256err_unlock:
4257        free_cpumask_var(tracing_cpumask_new);
4258
4259        return err;
4260}
4261
4262static const struct file_operations tracing_cpumask_fops = {
4263        .open           = tracing_open_generic_tr,
4264        .read           = tracing_cpumask_read,
4265        .write          = tracing_cpumask_write,
4266        .release        = tracing_release_generic_tr,
4267        .llseek         = generic_file_llseek,
4268};
4269
4270static int tracing_trace_options_show(struct seq_file *m, void *v)
4271{
4272        struct tracer_opt *trace_opts;
4273        struct trace_array *tr = m->private;
4274        u32 tracer_flags;
4275        int i;
4276
4277        mutex_lock(&trace_types_lock);
4278        tracer_flags = tr->current_trace->flags->val;
4279        trace_opts = tr->current_trace->flags->opts;
4280
4281        for (i = 0; trace_options[i]; i++) {
4282                if (tr->trace_flags & (1 << i))
4283                        seq_printf(m, "%s\n", trace_options[i]);
4284                else
4285                        seq_printf(m, "no%s\n", trace_options[i]);
4286        }
4287
4288        for (i = 0; trace_opts[i].name; i++) {
4289                if (tracer_flags & trace_opts[i].bit)
4290                        seq_printf(m, "%s\n", trace_opts[i].name);
4291                else
4292                        seq_printf(m, "no%s\n", trace_opts[i].name);
4293        }
4294        mutex_unlock(&trace_types_lock);
4295
4296        return 0;
4297}
4298
4299static int __set_tracer_option(struct trace_array *tr,
4300                               struct tracer_flags *tracer_flags,
4301                               struct tracer_opt *opts, int neg)
4302{
4303        struct tracer *trace = tracer_flags->trace;
4304        int ret;
4305
4306        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4307        if (ret)
4308                return ret;
4309
4310        if (neg)
4311                tracer_flags->val &= ~opts->bit;
4312        else
4313                tracer_flags->val |= opts->bit;
4314        return 0;
4315}
4316
4317/* Try to assign a tracer specific option */
4318static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4319{
4320        struct tracer *trace = tr->current_trace;
4321        struct tracer_flags *tracer_flags = trace->flags;
4322        struct tracer_opt *opts = NULL;
4323        int i;
4324
4325        for (i = 0; tracer_flags->opts[i].name; i++) {
4326                opts = &tracer_flags->opts[i];
4327
4328                if (strcmp(cmp, opts->name) == 0)
4329                        return __set_tracer_option(tr, trace->flags, opts, neg);
4330        }
4331
4332        return -EINVAL;
4333}
4334
4335/* Some tracers require overwrite to stay enabled */
4336int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4337{
4338        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4339                return -1;
4340
4341        return 0;
4342}
4343
4344int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4345{
4346        /* do nothing if flag is already set */
4347        if (!!(tr->trace_flags & mask) == !!enabled)
4348                return 0;
4349
4350        /* Give the tracer a chance to approve the change */
4351        if (tr->current_trace->flag_changed)
4352                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4353                        return -EINVAL;
4354
4355        if (enabled)
4356                tr->trace_flags |= mask;
4357        else
4358                tr->trace_flags &= ~mask;
4359
4360        if (mask == TRACE_ITER_RECORD_CMD)
4361                trace_event_enable_cmd_record(enabled);
4362
4363        if (mask == TRACE_ITER_RECORD_TGID) {
4364                if (!tgid_map)
4365                        tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4366                                           GFP_KERNEL);
4367                if (!tgid_map) {
4368                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4369                        return -ENOMEM;
4370                }
4371
4372                trace_event_enable_tgid_record(enabled);
4373        }
4374
4375        if (mask == TRACE_ITER_EVENT_FORK)
4376                trace_event_follow_fork(tr, enabled);
4377
4378        if (mask == TRACE_ITER_FUNC_FORK)
4379                ftrace_pid_follow_fork(tr, enabled);
4380
4381        if (mask == TRACE_ITER_OVERWRITE) {
4382                ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4383#ifdef CONFIG_TRACER_MAX_TRACE
4384                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4385#endif
4386        }
4387
4388        if (mask == TRACE_ITER_PRINTK) {
4389                trace_printk_start_stop_comm(enabled);
4390                trace_printk_control(enabled);
4391        }
4392
4393        return 0;
4394}
4395
4396static int trace_set_options(struct trace_array *tr, char *option)
4397{
4398        char *cmp;
4399        int neg = 0;
4400        int ret = -ENODEV;
4401        int i;
4402        size_t orig_len = strlen(option);
4403
4404        cmp = strstrip(option);
4405
4406        if (strncmp(cmp, "no", 2) == 0) {
4407                neg = 1;
4408                cmp += 2;
4409        }
4410
4411        mutex_lock(&trace_types_lock);
4412
4413        for (i = 0; trace_options[i]; i++) {
4414                if (strcmp(cmp, trace_options[i]) == 0) {
4415                        ret = set_tracer_flag(tr, 1 << i, !neg);
4416                        break;
4417                }
4418        }
4419
4420        /* If no option could be set, test the specific tracer options */
4421        if (!trace_options[i])
4422                ret = set_tracer_option(tr, cmp, neg);
4423
4424        mutex_unlock(&trace_types_lock);
4425
4426        /*
4427         * If the first trailing whitespace is replaced with '\0' by strstrip,
4428         * turn it back into a space.
4429         */
4430        if (orig_len > strlen(option))
4431                option[strlen(option)] = ' ';
4432
4433        return ret;
4434}
4435
4436static void __init apply_trace_boot_options(void)
4437{
4438        char *buf = trace_boot_options_buf;
4439        char *option;
4440
4441        while (true) {
4442                option = strsep(&buf, ",");
4443
4444                if (!option)
4445                        break;
4446
4447                if (*option)
4448                        trace_set_options(&global_trace, option);
4449
4450                /* Put back the comma to allow this to be called again */
4451                if (buf)
4452                        *(buf - 1) = ',';
4453        }
4454}
4455
4456static ssize_t
4457tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4458                        size_t cnt, loff_t *ppos)
4459{
4460        struct seq_file *m = filp->private_data;
4461        struct trace_array *tr = m->private;
4462        char buf[64];
4463        int ret;
4464
4465        if (cnt >= sizeof(buf))
4466                return -EINVAL;
4467
4468        if (copy_from_user(buf, ubuf, cnt))
4469                return -EFAULT;
4470
4471        buf[cnt] = 0;
4472
4473        ret = trace_set_options(tr, buf);
4474        if (ret < 0)
4475                return ret;
4476
4477        *ppos += cnt;
4478
4479        return cnt;
4480}
4481
4482static int tracing_trace_options_open(struct inode *inode, struct file *file)
4483{
4484        struct trace_array *tr = inode->i_private;
4485        int ret;
4486
4487        if (tracing_disabled)
4488                return -ENODEV;
4489
4490        if (trace_array_get(tr) < 0)
4491                return -ENODEV;
4492
4493        ret = single_open(file, tracing_trace_options_show, inode->i_private);
4494        if (ret < 0)
4495                trace_array_put(tr);
4496
4497        return ret;
4498}
4499
4500static const struct file_operations tracing_iter_fops = {
4501        .open           = tracing_trace_options_open,
4502        .read           = seq_read,
4503        .llseek         = seq_lseek,
4504        .release        = tracing_single_release_tr,
4505        .write          = tracing_trace_options_write,
4506};
4507
4508static const char readme_msg[] =
4509        "tracing mini-HOWTO:\n\n"
4510        "# echo 0 > tracing_on : quick way to disable tracing\n"
4511        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4512        " Important files:\n"
4513        "  trace\t\t\t- The static contents of the buffer\n"
4514        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4515        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4516        "  current_tracer\t- function and latency tracers\n"
4517        "  available_tracers\t- list of configured tracers for current_tracer\n"
4518        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4519        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4520        "  trace_clock\t\t-change the clock used to order events\n"
4521        "       local:   Per cpu clock but may not be synced across CPUs\n"
4522        "      global:   Synced across CPUs but slows tracing down.\n"
4523        "     counter:   Not a clock, but just an increment\n"
4524        "      uptime:   Jiffy counter from time of boot\n"
4525        "        perf:   Same clock that perf events use\n"
4526#ifdef CONFIG_X86_64
4527        "     x86-tsc:   TSC cycle counter\n"
4528#endif
4529        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4530        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4531        "  tracing_cpumask\t- Limit which CPUs to trace\n"
4532        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4533        "\t\t\t  Remove sub-buffer with rmdir\n"
4534        "  trace_options\t\t- Set format or modify how tracing happens\n"
4535        "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4536        "\t\t\t  option name\n"
4537        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4538#ifdef CONFIG_DYNAMIC_FTRACE
4539        "\n  available_filter_functions - list of functions that can be filtered on\n"
4540        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4541        "\t\t\t  functions\n"
4542        "\t     accepts: func_full_name or glob-matching-pattern\n"
4543        "\t     modules: Can select a group via module\n"
4544        "\t      Format: :mod:<module-name>\n"
4545        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4546        "\t    triggers: a command to perform when function is hit\n"
4547        "\t      Format: <function>:<trigger>[:count]\n"
4548        "\t     trigger: traceon, traceoff\n"
4549        "\t\t      enable_event:<system>:<event>\n"
4550        "\t\t      disable_event:<system>:<event>\n"
4551#ifdef CONFIG_STACKTRACE
4552        "\t\t      stacktrace\n"
4553#endif
4554#ifdef CONFIG_TRACER_SNAPSHOT
4555        "\t\t      snapshot\n"
4556#endif
4557        "\t\t      dump\n"
4558        "\t\t      cpudump\n"
4559        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4560        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4561        "\t     The first one will disable tracing every time do_fault is hit\n"
4562        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4563        "\t       The first time do trap is hit and it disables tracing, the\n"
4564        "\t       counter will decrement to 2. If tracing is already disabled,\n"
4565        "\t       the counter will not decrement. It only decrements when the\n"
4566        "\t       trigger did work\n"
4567        "\t     To remove trigger without count:\n"
4568        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4569        "\t     To remove trigger with a count:\n"
4570        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4571        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4572        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4573        "\t    modules: Can select a group via module command :mod:\n"
4574        "\t    Does not accept triggers\n"
4575#endif /* CONFIG_DYNAMIC_FTRACE */
4576#ifdef CONFIG_FUNCTION_TRACER
4577        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4578        "\t\t    (function)\n"
4579#endif
4580#ifdef CONFIG_FUNCTION_GRAPH_TRACER
4581        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4582        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4583        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4584#endif
4585#ifdef CONFIG_TRACER_SNAPSHOT
4586        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4587        "\t\t\t  snapshot buffer. Read the contents for more\n"
4588        "\t\t\t  information\n"
4589#endif
4590#ifdef CONFIG_STACK_TRACER
4591        "  stack_trace\t\t- Shows the max stack trace when active\n"
4592        "  stack_max_size\t- Shows current max stack size that was traced\n"
4593        "\t\t\t  Write into this file to reset the max size (trigger a\n"
4594        "\t\t\t  new trace)\n"
4595#ifdef CONFIG_DYNAMIC_FTRACE
4596        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4597        "\t\t\t  traces\n"
4598#endif
4599#endif /* CONFIG_STACK_TRACER */
4600#ifdef CONFIG_KPROBE_EVENTS
4601        "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4602        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4603#endif
4604#ifdef CONFIG_UPROBE_EVENTS
4605        "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4606        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4607#endif
4608#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4609        "\t  accepts: event-definitions (one definition per line)\n"
4610        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4611        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4612        "\t           -:[<group>/]<event>\n"
4613#ifdef CONFIG_KPROBE_EVENTS
4614        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4615  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4616#endif
4617#ifdef CONFIG_UPROBE_EVENTS
4618        "\t    place: <path>:<offset>\n"
4619#endif
4620        "\t     args: <name>=fetcharg[:type]\n"
4621        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4622        "\t           $stack<index>, $stack, $retval, $comm\n"
4623        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4624        "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4625#endif
4626        "  events/\t\t- Directory containing all trace event subsystems:\n"
4627        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4628        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4629        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4630        "\t\t\t  events\n"
4631        "      filter\t\t- If set, only events passing filter are traced\n"
4632        "  events/<system>/<event>/\t- Directory containing control files for\n"
4633        "\t\t\t  <event>:\n"
4634        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4635        "      filter\t\t- If set, only events passing filter are traced\n"
4636        "      trigger\t\t- If set, a command to perform when event is hit\n"
4637        "\t    Format: <trigger>[:count][if <filter>]\n"
4638        "\t   trigger: traceon, traceoff\n"
4639        "\t            enable_event:<system>:<event>\n"
4640        "\t            disable_event:<system>:<event>\n"
4641#ifdef CONFIG_HIST_TRIGGERS
4642        "\t            enable_hist:<system>:<event>\n"
4643        "\t            disable_hist:<system>:<event>\n"
4644#endif
4645#ifdef CONFIG_STACKTRACE
4646        "\t\t    stacktrace\n"
4647#endif
4648#ifdef CONFIG_TRACER_SNAPSHOT
4649        "\t\t    snapshot\n"
4650#endif
4651#ifdef CONFIG_HIST_TRIGGERS
4652        "\t\t    hist (see below)\n"
4653#endif
4654        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4655        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4656        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4657        "\t                  events/block/block_unplug/trigger\n"
4658        "\t   The first disables tracing every time block_unplug is hit.\n"
4659        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4660        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4661        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4662        "\t   Like function triggers, the counter is only decremented if it\n"
4663        "\t    enabled or disabled tracing.\n"
4664        "\t   To remove a trigger without a count:\n"
4665        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4666        "\t   To remove a trigger with a count:\n"
4667        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4668        "\t   Filters can be ignored when removing a trigger.\n"
4669#ifdef CONFIG_HIST_TRIGGERS
4670        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4671        "\t    Format: hist:keys=<field1[,field2,...]>\n"
4672        "\t            [:values=<field1[,field2,...]>]\n"
4673        "\t            [:sort=<field1[,field2,...]>]\n"
4674        "\t            [:size=#entries]\n"
4675        "\t            [:pause][:continue][:clear]\n"
4676        "\t            [:name=histname1]\n"
4677        "\t            [if <filter>]\n\n"
4678        "\t    When a matching event is hit, an entry is added to a hash\n"
4679        "\t    table using the key(s) and value(s) named, and the value of a\n"
4680        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4681        "\t    correspond to fields in the event's format description.  Keys\n"
4682        "\t    can be any field, or the special string 'stacktrace'.\n"
4683        "\t    Compound keys consisting of up to two fields can be specified\n"
4684        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4685        "\t    fields.  Sort keys consisting of up to two fields can be\n"
4686        "\t    specified using the 'sort' keyword.  The sort direction can\n"
4687        "\t    be modified by appending '.descending' or '.ascending' to a\n"
4688        "\t    sort field.  The 'size' parameter can be used to specify more\n"
4689        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4690        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4691        "\t    its histogram data will be shared with other triggers of the\n"
4692        "\t    same name, and trigger hits will update this common data.\n\n"
4693        "\t    Reading the 'hist' file for the event will dump the hash\n"
4694        "\t    table in its entirety to stdout.  If there are multiple hist\n"
4695        "\t    triggers attached to an event, there will be a table for each\n"
4696        "\t    trigger in the output.  The table displayed for a named\n"
4697        "\t    trigger will be the same as any other instance having the\n"
4698        "\t    same name.  The default format used to display a given field\n"
4699        "\t    can be modified by appending any of the following modifiers\n"
4700        "\t    to the field name, as applicable:\n\n"
4701        "\t            .hex        display a number as a hex value\n"
4702        "\t            .sym        display an address as a symbol\n"
4703        "\t            .sym-offset display an address as a symbol and offset\n"
4704        "\t            .execname   display a common_pid as a program name\n"
4705        "\t            .syscall    display a syscall id as a syscall name\n\n"
4706        "\t            .log2       display log2 value rather than raw number\n\n"
4707        "\t    The 'pause' parameter can be used to pause an existing hist\n"
4708        "\t    trigger or to start a hist trigger but not log any events\n"
4709        "\t    until told to do so.  'continue' can be used to start or\n"
4710        "\t    restart a paused hist trigger.\n\n"
4711        "\t    The 'clear' parameter will clear the contents of a running\n"
4712        "\t    hist trigger and leave its current paused/active state\n"
4713        "\t    unchanged.\n\n"
4714        "\t    The enable_hist and disable_hist triggers can be used to\n"
4715        "\t    have one event conditionally start and stop another event's\n"
4716        "\t    already-attached hist trigger.  The syntax is analagous to\n"
4717        "\t    the enable_event and disable_event triggers.\n"
4718#endif
4719;
4720
4721static ssize_t
4722tracing_readme_read(struct file *filp, char __user *ubuf,
4723                       size_t cnt, loff_t *ppos)
4724{
4725        return simple_read_from_buffer(ubuf, cnt, ppos,
4726                                        readme_msg, strlen(readme_msg));
4727}
4728
4729static const struct file_operations tracing_readme_fops = {
4730        .open           = tracing_open_generic,
4731        .read           = tracing_readme_read,
4732        .llseek         = generic_file_llseek,
4733};
4734
4735static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4736{
4737        int *ptr = v;
4738
4739        if (*pos || m->count)
4740                ptr++;
4741
4742        (*pos)++;
4743
4744        for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4745                if (trace_find_tgid(*ptr))
4746                        return ptr;
4747        }
4748
4749        return NULL;
4750}
4751
4752static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4753{
4754        void *v;
4755        loff_t l = 0;
4756
4757        if (!tgid_map)
4758                return NULL;
4759
4760        v = &tgid_map[0];
4761        while (l <= *pos) {
4762                v = saved_tgids_next(m, v, &l);
4763                if (!v)
4764                        return NULL;
4765        }
4766
4767        return v;
4768}
4769
4770static void saved_tgids_stop(struct seq_file *m, void *v)
4771{
4772}
4773
4774static int saved_tgids_show(struct seq_file *m, void *v)
4775{
4776        int pid = (int *)v - tgid_map;
4777
4778        seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4779        return 0;
4780}
4781
4782static const struct seq_operations tracing_saved_tgids_seq_ops = {
4783        .start          = saved_tgids_start,
4784        .stop           = saved_tgids_stop,
4785        .next           = saved_tgids_next,
4786        .show           = saved_tgids_show,
4787};
4788
4789static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4790{
4791        if (tracing_disabled)
4792                return -ENODEV;
4793
4794        return seq_open(filp, &tracing_saved_tgids_seq_ops);
4795}
4796
4797
4798static const struct file_operations tracing_saved_tgids_fops = {
4799        .open           = tracing_saved_tgids_open,
4800        .read           = seq_read,
4801        .llseek         = seq_lseek,
4802        .release        = seq_release,
4803};
4804
4805static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4806{
4807        unsigned int *ptr = v;
4808
4809        if (*pos || m->count)
4810                ptr++;
4811
4812        (*pos)++;
4813
4814        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4815             ptr++) {
4816                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4817                        continue;
4818
4819                return ptr;
4820        }
4821
4822        return NULL;
4823}
4824
4825static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4826{
4827        void *v;
4828        loff_t l = 0;
4829
4830        preempt_disable();
4831        arch_spin_lock(&trace_cmdline_lock);
4832
4833        v = &savedcmd->map_cmdline_to_pid[0];
4834        while (l <= *pos) {
4835                v = saved_cmdlines_next(m, v, &l);
4836                if (!v)
4837                        return NULL;
4838        }
4839
4840        return v;
4841}
4842
4843static void saved_cmdlines_stop(struct seq_file *m, void *v)
4844{
4845        arch_spin_unlock(&trace_cmdline_lock);
4846        preempt_enable();
4847}
4848
4849static int saved_cmdlines_show(struct seq_file *m, void *v)
4850{
4851        char buf[TASK_COMM_LEN];
4852        unsigned int *pid = v;
4853
4854        __trace_find_cmdline(*pid, buf);
4855        seq_printf(m, "%d %s\n", *pid, buf);
4856        return 0;
4857}
4858
4859static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4860        .start          = saved_cmdlines_start,
4861        .next           = saved_cmdlines_next,
4862        .stop           = saved_cmdlines_stop,
4863        .show           = saved_cmdlines_show,
4864};
4865
4866static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4867{
4868        if (tracing_disabled)
4869                return -ENODEV;
4870
4871        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4872}
4873
4874static const struct file_operations tracing_saved_cmdlines_fops = {
4875        .open           = tracing_saved_cmdlines_open,
4876        .read           = seq_read,
4877        .llseek         = seq_lseek,
4878        .release        = seq_release,
4879};
4880
4881static ssize_t
4882tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4883                                 size_t cnt, loff_t *ppos)
4884{
4885        char buf[64];
4886        int r;
4887
4888        arch_spin_lock(&trace_cmdline_lock);
4889        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4890        arch_spin_unlock(&trace_cmdline_lock);
4891
4892        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4893}
4894
4895static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4896{
4897        kfree(s->saved_cmdlines);
4898        kfree(s->map_cmdline_to_pid);
4899        kfree(s);
4900}
4901
4902static int tracing_resize_saved_cmdlines(unsigned int val)
4903{
4904        struct saved_cmdlines_buffer *s, *savedcmd_temp;
4905
4906        s = kmalloc(sizeof(*s), GFP_KERNEL);
4907        if (!s)
4908                return -ENOMEM;
4909
4910        if (allocate_cmdlines_buffer(val, s) < 0) {
4911                kfree(s);
4912                return -ENOMEM;
4913        }
4914
4915        arch_spin_lock(&trace_cmdline_lock);
4916        savedcmd_temp = savedcmd;
4917        savedcmd = s;
4918        arch_spin_unlock(&trace_cmdline_lock);
4919        free_saved_cmdlines_buffer(savedcmd_temp);
4920
4921        return 0;
4922}
4923
4924static ssize_t
4925tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4926                                  size_t cnt, loff_t *ppos)
4927{
4928        unsigned long val;
4929        int ret;
4930
4931        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4932        if (ret)
4933                return ret;
4934
4935        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4936        if (!val || val > PID_MAX_DEFAULT)
4937                return -EINVAL;
4938
4939        ret = tracing_resize_saved_cmdlines((unsigned int)val);
4940        if (ret < 0)
4941                return ret;
4942
4943        *ppos += cnt;
4944
4945        return cnt;
4946}
4947
4948static const struct file_operations tracing_saved_cmdlines_size_fops = {
4949        .open           = tracing_open_generic,
4950        .read           = tracing_saved_cmdlines_size_read,
4951        .write          = tracing_saved_cmdlines_size_write,
4952};
4953
4954#ifdef CONFIG_TRACE_EVAL_MAP_FILE
4955static union trace_eval_map_item *
4956update_eval_map(union trace_eval_map_item *ptr)
4957{
4958        if (!ptr->map.eval_string) {
4959                if (ptr->tail.next) {
4960                        ptr = ptr->tail.next;
4961                        /* Set ptr to the next real item (skip head) */
4962                        ptr++;
4963                } else
4964                        return NULL;
4965        }
4966        return ptr;
4967}
4968
4969static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4970{
4971        union trace_eval_map_item *ptr = v;
4972
4973        /*
4974         * Paranoid! If ptr points to end, we don't want to increment past it.
4975         * This really should never happen.
4976         */
4977        ptr = update_eval_map(ptr);
4978        if (WARN_ON_ONCE(!ptr))
4979                return NULL;
4980
4981        ptr++;
4982
4983        (*pos)++;
4984
4985        ptr = update_eval_map(ptr);
4986
4987        return ptr;
4988}
4989
4990static void *eval_map_start(struct seq_file *m, loff_t *pos)
4991{
4992        union trace_eval_map_item *v;
4993        loff_t l = 0;
4994
4995        mutex_lock(&trace_eval_mutex);
4996
4997        v = trace_eval_maps;
4998        if (v)
4999                v++;
5000
5001        while (v && l < *pos) {
5002                v = eval_map_next(m, v, &l);
5003        }
5004
5005        return v;
5006}
5007
5008static void eval_map_stop(struct seq_file *m, void *v)
5009{
5010        mutex_unlock(&trace_eval_mutex);
5011}
5012
5013static int eval_map_show(struct seq_file *m, void *v)
5014{
5015        union trace_eval_map_item *ptr = v;
5016
5017        seq_printf(m, "%s %ld (%s)\n",
5018                   ptr->map.eval_string, ptr->map.eval_value,
5019                   ptr->map.system);
5020
5021        return 0;
5022}
5023
5024static const struct seq_operations tracing_eval_map_seq_ops = {
5025        .start          = eval_map_start,
5026        .next           = eval_map_next,
5027        .stop           = eval_map_stop,
5028        .show           = eval_map_show,
5029};
5030
5031static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5032{
5033        if (tracing_disabled)
5034                return -ENODEV;
5035
5036        return seq_open(filp, &tracing_eval_map_seq_ops);
5037}
5038
5039static const struct file_operations tracing_eval_map_fops = {
5040        .open           = tracing_eval_map_open,
5041        .read           = seq_read,
5042        .llseek         = seq_lseek,
5043        .release        = seq_release,
5044};
5045
5046static inline union trace_eval_map_item *
5047trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5048{
5049        /* Return tail of array given the head */
5050        return ptr + ptr->head.length + 1;
5051}
5052
5053static void
5054trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5055                           int len)
5056{
5057        struct trace_eval_map **stop;
5058        struct trace_eval_map **map;
5059        union trace_eval_map_item *map_array;
5060        union trace_eval_map_item *ptr;
5061
5062        stop = start + len;
5063
5064        /*
5065         * The trace_eval_maps contains the map plus a head and tail item,
5066         * where the head holds the module and length of array, and the
5067         * tail holds a pointer to the next list.
5068         */
5069        map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5070        if (!map_array) {
5071                pr_warn("Unable to allocate trace eval mapping\n");
5072                return;
5073        }
5074
5075        mutex_lock(&trace_eval_mutex);
5076
5077        if (!trace_eval_maps)
5078                trace_eval_maps = map_array;
5079        else {
5080                ptr = trace_eval_maps;
5081                for (;;) {
5082                        ptr = trace_eval_jmp_to_tail(ptr);
5083                        if (!ptr->tail.next)
5084                                break;
5085                        ptr = ptr->tail.next;
5086
5087                }
5088                ptr->tail.next = map_array;
5089        }
5090        map_array->head.mod = mod;
5091        map_array->head.length = len;
5092        map_array++;
5093
5094        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5095                map_array->map = **map;
5096                map_array++;
5097        }
5098        memset(map_array, 0, sizeof(*map_array));
5099
5100        mutex_unlock(&trace_eval_mutex);
5101}
5102
5103static void trace_create_eval_file(struct dentry *d_tracer)
5104{
5105        trace_create_file("eval_map", 0444, d_tracer,
5106                          NULL, &tracing_eval_map_fops);
5107}
5108
5109#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5110static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5111static inline void trace_insert_eval_map_file(struct module *mod,
5112                              struct trace_eval_map **start, int len) { }
5113#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5114
5115static void trace_insert_eval_map(struct module *mod,
5116                                  struct trace_eval_map **start, int len)
5117{
5118        struct trace_eval_map **map;
5119
5120        if (len <= 0)
5121                return;
5122
5123        map = start;
5124
5125        trace_event_eval_update(map, len);
5126
5127        trace_insert_eval_map_file(mod, start, len);
5128}
5129
5130static ssize_t
5131tracing_set_trace_read(struct file *filp, char __user *ubuf,
5132                       size_t cnt, loff_t *ppos)
5133{
5134        struct trace_array *tr = filp->private_data;
5135        char buf[MAX_TRACER_SIZE+2];
5136        int r;
5137
5138        mutex_lock(&trace_types_lock);
5139        r = sprintf(buf, "%s\n", tr->current_trace->name);
5140        mutex_unlock(&trace_types_lock);
5141
5142        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5143}
5144
5145int tracer_init(struct tracer *t, struct trace_array *tr)
5146{
5147        tracing_reset_online_cpus(&tr->trace_buffer);
5148        return t->init(tr);
5149}
5150
5151static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5152{
5153        int cpu;
5154
5155        for_each_tracing_cpu(cpu)
5156                per_cpu_ptr(buf->data, cpu)->entries = val;
5157}
5158
5159#ifdef CONFIG_TRACER_MAX_TRACE
5160/* resize @tr's buffer to the size of @size_tr's entries */
5161static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5162                                        struct trace_buffer *size_buf, int cpu_id)
5163{
5164        int cpu, ret = 0;
5165
5166        if (cpu_id == RING_BUFFER_ALL_CPUS) {
5167                for_each_tracing_cpu(cpu) {
5168                        ret = ring_buffer_resize(trace_buf->buffer,
5169                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5170                        if (ret < 0)
5171                                break;
5172                        per_cpu_ptr(trace_buf->data, cpu)->entries =
5173                                per_cpu_ptr(size_buf->data, cpu)->entries;
5174                }
5175        } else {
5176                ret = ring_buffer_resize(trace_buf->buffer,
5177                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5178                if (ret == 0)
5179                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5180                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
5181        }
5182
5183        return ret;
5184}
5185#endif /* CONFIG_TRACER_MAX_TRACE */
5186
5187static int __tracing_resize_ring_buffer(struct trace_array *tr,
5188                                        unsigned long size, int cpu)
5189{
5190        int ret;
5191
5192        /*
5193         * If kernel or user changes the size of the ring buffer
5194         * we use the size that was given, and we can forget about
5195         * expanding it later.
5196         */
5197        ring_buffer_expanded = true;
5198
5199        /* May be called before buffers are initialized */
5200        if (!tr->trace_buffer.buffer)
5201                return 0;
5202
5203        ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5204        if (ret < 0)
5205                return ret;
5206
5207#ifdef CONFIG_TRACER_MAX_TRACE
5208        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5209            !tr->current_trace->use_max_tr)
5210                goto out;
5211
5212        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5213        if (ret < 0) {
5214                int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5215                                                     &tr->trace_buffer, cpu);
5216                if (r < 0) {
5217                        /*
5218                         * AARGH! We are left with different
5219                         * size max buffer!!!!
5220                         * The max buffer is our "snapshot" buffer.
5221                         * When a tracer needs a snapshot (one of the
5222                         * latency tracers), it swaps the max buffer
5223                         * with the saved snap shot. We succeeded to
5224                         * update the size of the main buffer, but failed to
5225                         * update the size of the max buffer. But when we tried
5226                         * to reset the main buffer to the original size, we
5227                         * failed there too. This is very unlikely to
5228                         * happen, but if it does, warn and kill all
5229                         * tracing.
5230                         */
5231                        WARN_ON(1);
5232                        tracing_disabled = 1;
5233                }
5234                return ret;
5235        }
5236
5237        if (cpu == RING_BUFFER_ALL_CPUS)
5238                set_buffer_entries(&tr->max_buffer, size);
5239        else
5240                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5241
5242 out:
5243#endif /* CONFIG_TRACER_MAX_TRACE */
5244
5245        if (cpu == RING_BUFFER_ALL_CPUS)
5246                set_buffer_entries(&tr->trace_buffer, size);
5247        else
5248                per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5249
5250        return ret;
5251}
5252
5253static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5254                                          unsigned long size, int cpu_id)
5255{
5256        int ret = size;
5257
5258        mutex_lock(&trace_types_lock);
5259
5260        if (cpu_id != RING_BUFFER_ALL_CPUS) {
5261                /* make sure, this cpu is enabled in the mask */
5262                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5263                        ret = -EINVAL;
5264                        goto out;
5265                }
5266        }
5267
5268        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5269        if (ret < 0)
5270                ret = -ENOMEM;
5271
5272out:
5273        mutex_unlock(&trace_types_lock);
5274
5275        return ret;
5276}
5277
5278
5279/**
5280 * tracing_update_buffers - used by tracing facility to expand ring buffers
5281 *
5282 * To save on memory when the tracing is never used on a system with it
5283 * configured in. The ring buffers are set to a minimum size. But once
5284 * a user starts to use the tracing facility, then they need to grow
5285 * to their default size.
5286 *
5287 * This function is to be called when a tracer is about to be used.
5288 */
5289int tracing_update_buffers(void)
5290{
5291        int ret = 0;
5292
5293        mutex_lock(&trace_types_lock);
5294        if (!ring_buffer_expanded)
5295                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5296                                                RING_BUFFER_ALL_CPUS);
5297        mutex_unlock(&trace_types_lock);
5298
5299        return ret;
5300}
5301
5302struct trace_option_dentry;
5303
5304static void
5305create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5306
5307/*
5308 * Used to clear out the tracer before deletion of an instance.
5309 * Must have trace_types_lock held.
5310 */
5311static void tracing_set_nop(struct trace_array *tr)
5312{
5313        if (tr->current_trace == &nop_trace)
5314                return;
5315        
5316        tr->current_trace->enabled--;
5317
5318        if (tr->current_trace->reset)
5319                tr->current_trace->reset(tr);
5320
5321        tr->current_trace = &nop_trace;
5322}
5323
5324static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5325{
5326        /* Only enable if the directory has been created already. */
5327        if (!tr->dir)
5328                return;
5329
5330        create_trace_option_files(tr, t);
5331}
5332
5333static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5334{
5335        struct tracer *t;
5336#ifdef CONFIG_TRACER_MAX_TRACE
5337        bool had_max_tr;
5338#endif
5339        int ret = 0;
5340
5341        mutex_lock(&trace_types_lock);
5342
5343        if (!ring_buffer_expanded) {
5344                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5345                                                RING_BUFFER_ALL_CPUS);
5346                if (ret < 0)
5347                        goto out;
5348                ret = 0;
5349        }
5350
5351        for (t = trace_types; t; t = t->next) {
5352                if (strcmp(t->name, buf) == 0)
5353                        break;
5354        }
5355        if (!t) {
5356                ret = -EINVAL;
5357                goto out;
5358        }
5359        if (t == tr->current_trace)
5360                goto out;
5361
5362        /* Some tracers won't work on kernel command line */
5363        if (system_state < SYSTEM_RUNNING && t->noboot) {
5364                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5365                        t->name);
5366                goto out;
5367        }
5368
5369        /* Some tracers are only allowed for the top level buffer */
5370        if (!trace_ok_for_array(t, tr)) {
5371                ret = -EINVAL;
5372                goto out;
5373        }
5374
5375        /* If trace pipe files are being read, we can't change the tracer */
5376        if (tr->current_trace->ref) {
5377                ret = -EBUSY;
5378                goto out;
5379        }
5380
5381        trace_branch_disable();
5382
5383        tr->current_trace->enabled--;
5384
5385        if (tr->current_trace->reset)
5386                tr->current_trace->reset(tr);
5387
5388        /* Current trace needs to be nop_trace before synchronize_sched */
5389        tr->current_trace = &nop_trace;
5390
5391#ifdef CONFIG_TRACER_MAX_TRACE
5392        had_max_tr = tr->allocated_snapshot;
5393
5394        if (had_max_tr && !t->use_max_tr) {
5395                /*
5396                 * We need to make sure that the update_max_tr sees that
5397                 * current_trace changed to nop_trace to keep it from
5398                 * swapping the buffers after we resize it.
5399                 * The update_max_tr is called from interrupts disabled
5400                 * so a synchronized_sched() is sufficient.
5401                 */
5402                synchronize_sched();
5403                free_snapshot(tr);
5404        }
5405#endif
5406
5407#ifdef CONFIG_TRACER_MAX_TRACE
5408        if (t->use_max_tr && !had_max_tr) {
5409                ret = alloc_snapshot(tr);
5410                if (ret < 0)
5411                        goto out;
5412        }
5413#endif
5414
5415        if (t->init) {
5416                ret = tracer_init(t, tr);
5417                if (ret)
5418                        goto out;
5419        }
5420
5421        tr->current_trace = t;
5422        tr->current_trace->enabled++;
5423        trace_branch_enable(tr);
5424 out:
5425        mutex_unlock(&trace_types_lock);
5426
5427        return ret;
5428}
5429
5430static ssize_t
5431tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5432                        size_t cnt, loff_t *ppos)
5433{
5434        struct trace_array *tr = filp->private_data;
5435        char buf[MAX_TRACER_SIZE+1];
5436        int i;
5437        size_t ret;
5438        int err;
5439
5440        ret = cnt;
5441
5442        if (cnt > MAX_TRACER_SIZE)
5443                cnt = MAX_TRACER_SIZE;
5444
5445        if (copy_from_user(buf, ubuf, cnt))
5446                return -EFAULT;
5447
5448        buf[cnt] = 0;
5449
5450        /* strip ending whitespace. */
5451        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5452                buf[i] = 0;
5453
5454        err = tracing_set_tracer(tr, buf);
5455        if (err)
5456                return err;
5457
5458        *ppos += ret;
5459
5460        return ret;
5461}
5462
5463static ssize_t
5464tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5465                   size_t cnt, loff_t *ppos)
5466{
5467        char buf[64];
5468        int r;
5469
5470        r = snprintf(buf, sizeof(buf), "%ld\n",
5471                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5472        if (r > sizeof(buf))
5473                r = sizeof(buf);
5474        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5475}
5476
5477static ssize_t
5478tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5479                    size_t cnt, loff_t *ppos)
5480{
5481        unsigned long val;
5482        int ret;
5483
5484        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5485        if (ret)
5486                return ret;
5487
5488        *ptr = val * 1000;
5489
5490        return cnt;
5491}
5492
5493static ssize_t
5494tracing_thresh_read(struct file *filp, char __user *ubuf,
5495                    size_t cnt, loff_t *ppos)
5496{
5497        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5498}
5499
5500static ssize_t
5501tracing_thresh_write(struct file *filp, const char __user *ubuf,
5502                     size_t cnt, loff_t *ppos)
5503{
5504        struct trace_array *tr = filp->private_data;
5505        int ret;
5506
5507        mutex_lock(&trace_types_lock);
5508        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5509        if (ret < 0)
5510                goto out;
5511
5512        if (tr->current_trace->update_thresh) {
5513                ret = tr->current_trace->update_thresh(tr);
5514                if (ret < 0)
5515                        goto out;
5516        }
5517
5518        ret = cnt;
5519out:
5520        mutex_unlock(&trace_types_lock);
5521
5522        return ret;
5523}
5524
5525#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5526
5527static ssize_t
5528tracing_max_lat_read(struct file *filp, char __user *ubuf,
5529                     size_t cnt, loff_t *ppos)
5530{
5531        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5532}
5533
5534static ssize_t
5535tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5536                      size_t cnt, loff_t *ppos)
5537{
5538        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5539}
5540
5541#endif
5542
5543static int tracing_open_pipe(struct inode *inode, struct file *filp)
5544{
5545        struct trace_array *tr = inode->i_private;
5546        struct trace_iterator *iter;
5547        int ret = 0;
5548
5549        if (tracing_disabled)
5550                return -ENODEV;
5551
5552        if (trace_array_get(tr) < 0)
5553                return -ENODEV;
5554
5555        mutex_lock(&trace_types_lock);
5556
5557        /* create a buffer to store the information to pass to userspace */
5558        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5559        if (!iter) {
5560                ret = -ENOMEM;
5561                __trace_array_put(tr);
5562                goto out;
5563        }
5564
5565        trace_seq_init(&iter->seq);
5566        iter->trace = tr->current_trace;
5567
5568        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5569                ret = -ENOMEM;
5570                goto fail;
5571        }
5572
5573        /* trace pipe does not show start of buffer */
5574        cpumask_setall(iter->started);
5575
5576        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5577                iter->iter_flags |= TRACE_FILE_LAT_FMT;
5578
5579        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5580        if (trace_clocks[tr->clock_id].in_ns)
5581                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5582
5583        iter->tr = tr;
5584        iter->trace_buffer = &tr->trace_buffer;
5585        iter->cpu_file = tracing_get_cpu(inode);
5586        mutex_init(&iter->mutex);
5587        filp->private_data = iter;
5588
5589        if (iter->trace->pipe_open)
5590                iter->trace->pipe_open(iter);
5591
5592        nonseekable_open(inode, filp);
5593
5594        tr->current_trace->ref++;
5595out:
5596        mutex_unlock(&trace_types_lock);
5597        return ret;
5598
5599fail:
5600        kfree(iter->trace);
5601        kfree(iter);
5602        __trace_array_put(tr);
5603        mutex_unlock(&trace_types_lock);
5604        return ret;
5605}
5606
5607static int tracing_release_pipe(struct inode *inode, struct file *file)
5608{
5609        struct trace_iterator *iter = file->private_data;
5610        struct trace_array *tr = inode->i_private;
5611
5612        mutex_lock(&trace_types_lock);
5613
5614        tr->current_trace->ref--;
5615
5616        if (iter->trace->pipe_close)
5617                iter->trace->pipe_close(iter);
5618
5619        mutex_unlock(&trace_types_lock);
5620
5621        free_cpumask_var(iter->started);
5622        mutex_destroy(&iter->mutex);
5623        kfree(iter);
5624
5625        trace_array_put(tr);
5626
5627        return 0;
5628}
5629
5630static unsigned int
5631trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5632{
5633        struct trace_array *tr = iter->tr;
5634
5635        /* Iterators are static, they should be filled or empty */
5636        if (trace_buffer_iter(iter, iter->cpu_file))
5637                return POLLIN | POLLRDNORM;
5638
5639        if (tr->trace_flags & TRACE_ITER_BLOCK)
5640                /*
5641                 * Always select as readable when in blocking mode
5642                 */
5643                return POLLIN | POLLRDNORM;
5644        else
5645                return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5646                                             filp, poll_table);
5647}
5648
5649static unsigned int
5650tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5651{
5652        struct trace_iterator *iter = filp->private_data;
5653
5654        return trace_poll(iter, filp, poll_table);
5655}
5656
5657/* Must be called with iter->mutex held. */
5658static int tracing_wait_pipe(struct file *filp)
5659{
5660        struct trace_iterator *iter = filp->private_data;
5661        int ret;
5662
5663        while (trace_empty(iter)) {
5664
5665                if ((filp->f_flags & O_NONBLOCK)) {
5666                        return -EAGAIN;
5667                }
5668
5669                /*
5670                 * We block until we read something and tracing is disabled.
5671                 * We still block if tracing is disabled, but we have never
5672                 * read anything. This allows a user to cat this file, and
5673                 * then enable tracing. But after we have read something,
5674                 * we give an EOF when tracing is again disabled.
5675                 *
5676                 * iter->pos will be 0 if we haven't read anything.
5677                 */
5678                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5679                        break;
5680
5681                mutex_unlock(&iter->mutex);
5682
5683                ret = wait_on_pipe(iter, false);
5684
5685                mutex_lock(&iter->mutex);
5686
5687                if (ret)
5688                        return ret;
5689        }
5690
5691        return 1;
5692}
5693
5694/*
5695 * Consumer reader.
5696 */
5697static ssize_t
5698tracing_read_pipe(struct file *filp, char __user *ubuf,
5699                  size_t cnt, loff_t *ppos)
5700{
5701        struct trace_iterator *iter = filp->private_data;
5702        ssize_t sret;
5703
5704        /*
5705         * Avoid more than one consumer on a single file descriptor
5706         * This is just a matter of traces coherency, the ring buffer itself
5707         * is protected.
5708         */
5709        mutex_lock(&iter->mutex);
5710
5711        /* return any leftover data */
5712        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5713        if (sret != -EBUSY)
5714                goto out;
5715
5716        trace_seq_init(&iter->seq);
5717
5718        if (iter->trace->read) {
5719                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5720                if (sret)
5721                        goto out;
5722        }
5723
5724waitagain:
5725        sret = tracing_wait_pipe(filp);
5726        if (sret <= 0)
5727                goto out;
5728
5729        /* stop when tracing is finished */
5730        if (trace_empty(iter)) {
5731                sret = 0;
5732                goto out;
5733        }
5734
5735        if (cnt >= PAGE_SIZE)
5736                cnt = PAGE_SIZE - 1;
5737
5738        /* reset all but tr, trace, and overruns */
5739        memset(&iter->seq, 0,
5740               sizeof(struct trace_iterator) -
5741               offsetof(struct trace_iterator, seq));
5742        cpumask_clear(iter->started);
5743        iter->pos = -1;
5744
5745        trace_event_read_lock();
5746        trace_access_lock(iter->cpu_file);
5747        while (trace_find_next_entry_inc(iter) != NULL) {
5748                enum print_line_t ret;
5749                int save_len = iter->seq.seq.len;
5750
5751                ret = print_trace_line(iter);
5752                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5753                        /* don't print partial lines */
5754                        iter->seq.seq.len = save_len;
5755                        break;
5756                }
5757                if (ret != TRACE_TYPE_NO_CONSUME)
5758                        trace_consume(iter);
5759
5760                if (trace_seq_used(&iter->seq) >= cnt)
5761                        break;
5762
5763                /*
5764                 * Setting the full flag means we reached the trace_seq buffer
5765                 * size and we should leave by partial output condition above.
5766                 * One of the trace_seq_* functions is not used properly.
5767                 */
5768                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5769                          iter->ent->type);
5770        }
5771        trace_access_unlock(iter->cpu_file);
5772        trace_event_read_unlock();
5773
5774        /* Now copy what we have to the user */
5775        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5776        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5777                trace_seq_init(&iter->seq);
5778
5779        /*
5780         * If there was nothing to send to user, in spite of consuming trace
5781         * entries, go back to wait for more entries.
5782         */
5783        if (sret == -EBUSY)
5784                goto waitagain;
5785
5786out:
5787        mutex_unlock(&iter->mutex);
5788
5789        return sret;
5790}
5791
5792static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5793                                     unsigned int idx)
5794{
5795        __free_page(spd->pages[idx]);
5796}
5797
5798static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5799        .can_merge              = 0,
5800        .confirm                = generic_pipe_buf_confirm,
5801        .release                = generic_pipe_buf_release,
5802        .steal                  = generic_pipe_buf_steal,
5803        .get                    = generic_pipe_buf_get,
5804};
5805
5806static size_t
5807tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5808{
5809        size_t count;
5810        int save_len;
5811        int ret;
5812
5813        /* Seq buffer is page-sized, exactly what we need. */
5814        for (;;) {
5815                save_len = iter->seq.seq.len;
5816                ret = print_trace_line(iter);
5817
5818                if (trace_seq_has_overflowed(&iter->seq)) {
5819                        iter->seq.seq.len = save_len;
5820                        break;
5821                }
5822
5823                /*
5824                 * This should not be hit, because it should only
5825                 * be set if the iter->seq overflowed. But check it
5826                 * anyway to be safe.
5827                 */
5828                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5829                        iter->seq.seq.len = save_len;
5830                        break;
5831                }
5832
5833                count = trace_seq_used(&iter->seq) - save_len;
5834                if (rem < count) {
5835                        rem = 0;
5836                        iter->seq.seq.len = save_len;
5837                        break;
5838                }
5839
5840                if (ret != TRACE_TYPE_NO_CONSUME)
5841                        trace_consume(iter);
5842                rem -= count;
5843                if (!trace_find_next_entry_inc(iter))   {
5844                        rem = 0;
5845                        iter->ent = NULL;
5846                        break;
5847                }
5848        }
5849
5850        return rem;
5851}
5852
5853static ssize_t tracing_splice_read_pipe(struct file *filp,
5854                                        loff_t *ppos,
5855                                        struct pipe_inode_info *pipe,
5856                                        size_t len,
5857                                        unsigned int flags)
5858{
5859        struct page *pages_def[PIPE_DEF_BUFFERS];
5860        struct partial_page partial_def[PIPE_DEF_BUFFERS];
5861        struct trace_iterator *iter = filp->private_data;
5862        struct splice_pipe_desc spd = {
5863                .pages          = pages_def,
5864                .partial        = partial_def,
5865                .nr_pages       = 0, /* This gets updated below. */
5866                .nr_pages_max   = PIPE_DEF_BUFFERS,
5867                .ops            = &tracing_pipe_buf_ops,
5868                .spd_release    = tracing_spd_release_pipe,
5869        };
5870        ssize_t ret;
5871        size_t rem;
5872        unsigned int i;
5873
5874        if (splice_grow_spd(pipe, &spd))
5875                return -ENOMEM;
5876
5877        mutex_lock(&iter->mutex);
5878
5879        if (iter->trace->splice_read) {
5880                ret = iter->trace->splice_read(iter, filp,
5881                                               ppos, pipe, len, flags);
5882                if (ret)
5883                        goto out_err;
5884        }
5885
5886        ret = tracing_wait_pipe(filp);
5887        if (ret <= 0)
5888                goto out_err;
5889
5890        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5891                ret = -EFAULT;
5892                goto out_err;
5893        }
5894
5895        trace_event_read_lock();
5896        trace_access_lock(iter->cpu_file);
5897
5898        /* Fill as many pages as possible. */
5899        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5900                spd.pages[i] = alloc_page(GFP_KERNEL);
5901                if (!spd.pages[i])
5902                        break;
5903
5904                rem = tracing_fill_pipe_page(rem, iter);
5905
5906                /* Copy the data into the page, so we can start over. */
5907                ret = trace_seq_to_buffer(&iter->seq,
5908                                          page_address(spd.pages[i]),
5909                                          trace_seq_used(&iter->seq));
5910                if (ret < 0) {
5911                        __free_page(spd.pages[i]);
5912                        break;
5913                }
5914                spd.partial[i].offset = 0;
5915                spd.partial[i].len = trace_seq_used(&iter->seq);
5916
5917                trace_seq_init(&iter->seq);
5918        }
5919
5920        trace_access_unlock(iter->cpu_file);
5921        trace_event_read_unlock();
5922        mutex_unlock(&iter->mutex);
5923
5924        spd.nr_pages = i;
5925
5926        if (i)
5927                ret = splice_to_pipe(pipe, &spd);
5928        else
5929                ret = 0;
5930out:
5931        splice_shrink_spd(&spd);
5932        return ret;
5933
5934out_err:
5935        mutex_unlock(&iter->mutex);
5936        goto out;
5937}
5938
5939static ssize_t
5940tracing_entries_read(struct file *filp, char __user *ubuf,
5941                     size_t cnt, loff_t *ppos)
5942{
5943        struct inode *inode = file_inode(filp);
5944        struct trace_array *tr = inode->i_private;
5945        int cpu = tracing_get_cpu(inode);
5946        char buf[64];
5947        int r = 0;
5948        ssize_t ret;
5949
5950        mutex_lock(&trace_types_lock);
5951
5952        if (cpu == RING_BUFFER_ALL_CPUS) {
5953                int cpu, buf_size_same;
5954                unsigned long size;
5955
5956                size = 0;
5957                buf_size_same = 1;
5958                /* check if all cpu sizes are same */
5959                for_each_tracing_cpu(cpu) {
5960                        /* fill in the size from first enabled cpu */
5961                        if (size == 0)
5962                                size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5963                        if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5964                                buf_size_same = 0;
5965                                break;
5966                        }
5967                }
5968
5969                if (buf_size_same) {
5970                        if (!ring_buffer_expanded)
5971                                r = sprintf(buf, "%lu (expanded: %lu)\n",
5972                                            size >> 10,
5973                                            trace_buf_size >> 10);
5974                        else
5975                                r = sprintf(buf, "%lu\n", size >> 10);
5976                } else
5977                        r = sprintf(buf, "X\n");
5978        } else
5979                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5980
5981        mutex_unlock(&trace_types_lock);
5982
5983        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5984        return ret;
5985}
5986
5987static ssize_t
5988tracing_entries_write(struct file *filp, const char __user *ubuf,
5989                      size_t cnt, loff_t *ppos)
5990{
5991        struct inode *inode = file_inode(filp);
5992        struct trace_array *tr = inode->i_private;
5993        unsigned long val;
5994        int ret;
5995
5996        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5997        if (ret)
5998                return ret;
5999
6000        /* must have at least 1 entry */
6001        if (!val)
6002                return -EINVAL;
6003
6004        /* value is in KB */
6005        val <<= 10;
6006        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6007        if (ret < 0)
6008                return ret;
6009
6010        *ppos += cnt;
6011
6012        return cnt;
6013}
6014
6015static ssize_t
6016tracing_total_entries_read(struct file *filp, char __user *ubuf,
6017                                size_t cnt, loff_t *ppos)
6018{
6019        struct trace_array *tr = filp->private_data;
6020        char buf[64];
6021        int r, cpu;
6022        unsigned long size = 0, expanded_size = 0;
6023
6024        mutex_lock(&trace_types_lock);
6025        for_each_tracing_cpu(cpu) {
6026                size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6027                if (!ring_buffer_expanded)
6028                        expanded_size += trace_buf_size >> 10;
6029        }
6030        if (ring_buffer_expanded)
6031                r = sprintf(buf, "%lu\n", size);
6032        else
6033                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6034        mutex_unlock(&trace_types_lock);
6035
6036        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6037}
6038
6039static ssize_t
6040tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6041                          size_t cnt, loff_t *ppos)
6042{
6043        /*
6044         * There is no need to read what the user has written, this function
6045         * is just to make sure that there is no error when "echo" is used
6046         */
6047
6048        *ppos += cnt;
6049
6050        return cnt;
6051}
6052
6053static int
6054tracing_free_buffer_release(struct inode *inode, struct file *filp)
6055{
6056        struct trace_array *tr = inode->i_private;
6057
6058        /* disable tracing ? */
6059        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6060                tracer_tracing_off(tr);
6061        /* resize the ring buffer to 0 */
6062        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6063
6064        trace_array_put(tr);
6065
6066        return 0;
6067}
6068
6069static ssize_t
6070tracing_mark_write(struct file *filp, const char __user *ubuf,
6071                                        size_t cnt, loff_t *fpos)
6072{
6073        struct trace_array *tr = filp->private_data;
6074        struct ring_buffer_event *event;
6075        struct ring_buffer *buffer;
6076        struct print_entry *entry;
6077        unsigned long irq_flags;
6078        const char faulted[] = "<faulted>";
6079        ssize_t written;
6080        int size;
6081        int len;
6082
6083/* Used in tracing_mark_raw_write() as well */
6084#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6085
6086        if (tracing_disabled)
6087                return -EINVAL;
6088
6089        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6090                return -EINVAL;
6091
6092        if (cnt > TRACE_BUF_SIZE)
6093                cnt = TRACE_BUF_SIZE;
6094
6095        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6096
6097        local_save_flags(irq_flags);
6098        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6099
6100        /* If less than "<faulted>", then make sure we can still add that */
6101        if (cnt < FAULTED_SIZE)
6102                size += FAULTED_SIZE - cnt;
6103
6104        buffer = tr->trace_buffer.buffer;
6105        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6106                                            irq_flags, preempt_count());
6107        if (unlikely(!event))
6108                /* Ring buffer disabled, return as if not open for write */
6109                return -EBADF;
6110
6111        entry = ring_buffer_event_data(event);
6112        entry->ip = _THIS_IP_;
6113
6114        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6115        if (len) {
6116                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6117                cnt = FAULTED_SIZE;
6118                written = -EFAULT;
6119        } else
6120                written = cnt;
6121        len = cnt;
6122
6123        if (entry->buf[cnt - 1] != '\n') {
6124                entry->buf[cnt] = '\n';
6125                entry->buf[cnt + 1] = '\0';
6126        } else
6127                entry->buf[cnt] = '\0';
6128
6129        __buffer_unlock_commit(buffer, event);
6130
6131        if (written > 0)
6132                *fpos += written;
6133
6134        return written;
6135}
6136
6137/* Limit it for now to 3K (including tag) */
6138#define RAW_DATA_MAX_SIZE (1024*3)
6139
6140static ssize_t
6141tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6142                                        size_t cnt, loff_t *fpos)
6143{
6144        struct trace_array *tr = filp->private_data;
6145        struct ring_buffer_event *event;
6146        struct ring_buffer *buffer;
6147        struct raw_data_entry *entry;
6148        const char faulted[] = "<faulted>";
6149        unsigned long irq_flags;
6150        ssize_t written;
6151        int size;
6152        int len;
6153
6154#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6155
6156        if (tracing_disabled)
6157                return -EINVAL;
6158
6159        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6160                return -EINVAL;
6161
6162        /* The marker must at least have a tag id */
6163        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6164                return -EINVAL;
6165
6166        if (cnt > TRACE_BUF_SIZE)
6167                cnt = TRACE_BUF_SIZE;
6168
6169        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6170
6171        local_save_flags(irq_flags);
6172        size = sizeof(*entry) + cnt;
6173        if (cnt < FAULT_SIZE_ID)
6174                size += FAULT_SIZE_ID - cnt;
6175
6176        buffer = tr->trace_buffer.buffer;
6177        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6178                                            irq_flags, preempt_count());
6179        if (!event)
6180                /* Ring buffer disabled, return as if not open for write */
6181                return -EBADF;
6182
6183        entry = ring_buffer_event_data(event);
6184
6185        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6186        if (len) {
6187                entry->id = -1;
6188                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6189                written = -EFAULT;
6190        } else
6191                written = cnt;
6192
6193        __buffer_unlock_commit(buffer, event);
6194
6195        if (written > 0)
6196                *fpos += written;
6197
6198        return written;
6199}
6200
6201static int tracing_clock_show(struct seq_file *m, void *v)
6202{
6203        struct trace_array *tr = m->private;
6204        int i;
6205
6206        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6207                seq_printf(m,
6208                        "%s%s%s%s", i ? " " : "",
6209                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6210                        i == tr->clock_id ? "]" : "");
6211        seq_putc(m, '\n');
6212
6213        return 0;
6214}
6215
6216static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6217{
6218        int i;
6219
6220        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6221                if (strcmp(trace_clocks[i].name, clockstr) == 0)
6222                        break;
6223        }
6224        if (i == ARRAY_SIZE(trace_clocks))
6225                return -EINVAL;
6226
6227        mutex_lock(&trace_types_lock);
6228
6229        tr->clock_id = i;
6230
6231        ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6232
6233        /*
6234         * New clock may not be consistent with the previous clock.
6235         * Reset the buffer so that it doesn't have incomparable timestamps.
6236         */
6237        tracing_reset_online_cpus(&tr->trace_buffer);
6238
6239#ifdef CONFIG_TRACER_MAX_TRACE
6240        if (tr->max_buffer.buffer)
6241                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6242        tracing_reset_online_cpus(&tr->max_buffer);
6243#endif
6244
6245        mutex_unlock(&trace_types_lock);
6246
6247        return 0;
6248}
6249
6250static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6251                                   size_t cnt, loff_t *fpos)
6252{
6253        struct seq_file *m = filp->private_data;
6254        struct trace_array *tr = m->private;
6255        char buf[64];
6256        const char *clockstr;
6257        int ret;
6258
6259        if (cnt >= sizeof(buf))
6260                return -EINVAL;
6261
6262        if (copy_from_user(buf, ubuf, cnt))
6263                return -EFAULT;
6264
6265        buf[cnt] = 0;
6266
6267        clockstr = strstrip(buf);
6268
6269        ret = tracing_set_clock(tr, clockstr);
6270        if (ret)
6271                return ret;
6272
6273        *fpos += cnt;
6274
6275        return cnt;
6276}
6277
6278static int tracing_clock_open(struct inode *inode, struct file *file)
6279{
6280        struct trace_array *tr = inode->i_private;
6281        int ret;
6282
6283        if (tracing_disabled)
6284                return -ENODEV;
6285
6286        if (trace_array_get(tr))
6287                return -ENODEV;
6288
6289        ret = single_open(file, tracing_clock_show, inode->i_private);
6290        if (ret < 0)
6291                trace_array_put(tr);
6292
6293        return ret;
6294}
6295
6296struct ftrace_buffer_info {
6297        struct trace_iterator   iter;
6298        void                    *spare;
6299        unsigned int            spare_cpu;
6300        unsigned int            read;
6301};
6302
6303#ifdef CONFIG_TRACER_SNAPSHOT
6304static int tracing_snapshot_open(struct inode *inode, struct file *file)
6305{
6306        struct trace_array *tr = inode->i_private;
6307        struct trace_iterator *iter;
6308        struct seq_file *m;
6309        int ret = 0;
6310
6311        if (trace_array_get(tr) < 0)
6312                return -ENODEV;
6313
6314        if (file->f_mode & FMODE_READ) {
6315                iter = __tracing_open(inode, file, true);
6316                if (IS_ERR(iter))
6317                        ret = PTR_ERR(iter);
6318        } else {
6319                /* Writes still need the seq_file to hold the private data */
6320                ret = -ENOMEM;
6321                m = kzalloc(sizeof(*m), GFP_KERNEL);
6322                if (!m)
6323                        goto out;
6324                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6325                if (!iter) {
6326                        kfree(m);
6327                        goto out;
6328                }
6329                ret = 0;
6330
6331                iter->tr = tr;
6332                iter->trace_buffer = &tr->max_buffer;
6333                iter->cpu_file = tracing_get_cpu(inode);
6334                m->private = iter;
6335                file->private_data = m;
6336        }
6337out:
6338        if (ret < 0)
6339                trace_array_put(tr);
6340
6341        return ret;
6342}
6343
6344static ssize_t
6345tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6346                       loff_t *ppos)
6347{
6348        struct seq_file *m = filp->private_data;
6349        struct trace_iterator *iter = m->private;
6350        struct trace_array *tr = iter->tr;
6351        unsigned long val;
6352        int ret;
6353
6354        ret = tracing_update_buffers();
6355        if (ret < 0)
6356                return ret;
6357
6358        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6359        if (ret)
6360                return ret;
6361
6362        mutex_lock(&trace_types_lock);
6363
6364        if (tr->current_trace->use_max_tr) {
6365                ret = -EBUSY;
6366                goto out;
6367        }
6368
6369        switch (val) {
6370        case 0:
6371                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6372                        ret = -EINVAL;
6373                        break;
6374                }
6375                if (tr->allocated_snapshot)
6376                        free_snapshot(tr);
6377                break;
6378        case 1:
6379/* Only allow per-cpu swap if the ring buffer supports it */
6380#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6381                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6382                        ret = -EINVAL;
6383                        break;
6384                }
6385#endif
6386                if (!tr->allocated_snapshot) {
6387                        ret = alloc_snapshot(tr);
6388                        if (ret < 0)
6389                                break;
6390                }
6391                local_irq_disable();
6392                /* Now, we're going to swap */
6393                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6394                        update_max_tr(tr, current, smp_processor_id());
6395                else
6396                        update_max_tr_single(tr, current, iter->cpu_file);
6397                local_irq_enable();
6398                break;
6399        default:
6400                if (tr->allocated_snapshot) {
6401                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6402                                tracing_reset_online_cpus(&tr->max_buffer);
6403                        else
6404                                tracing_reset(&tr->max_buffer, iter->cpu_file);
6405                }
6406                break;
6407        }
6408
6409        if (ret >= 0) {
6410                *ppos += cnt;
6411                ret = cnt;
6412        }
6413out:
6414        mutex_unlock(&trace_types_lock);
6415        return ret;
6416}
6417
6418static int tracing_snapshot_release(struct inode *inode, struct file *file)
6419{
6420        struct seq_file *m = file->private_data;
6421        int ret;
6422
6423        ret = tracing_release(inode, file);
6424
6425        if (file->f_mode & FMODE_READ)
6426                return ret;
6427
6428        /* If write only, the seq_file is just a stub */
6429        if (m)
6430                kfree(m->private);
6431        kfree(m);
6432
6433        return 0;
6434}
6435
6436static int tracing_buffers_open(struct inode *inode, struct file *filp);
6437static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6438                                    size_t count, loff_t *ppos);
6439static int tracing_buffers_release(struct inode *inode, struct file *file);
6440static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6441                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6442
6443static int snapshot_raw_open(struct inode *inode, struct file *filp)
6444{
6445        struct ftrace_buffer_info *info;
6446        int ret;
6447
6448        ret = tracing_buffers_open(inode, filp);
6449        if (ret < 0)
6450                return ret;
6451
6452        info = filp->private_data;
6453
6454        if (info->iter.trace->use_max_tr) {
6455                tracing_buffers_release(inode, filp);
6456                return -EBUSY;
6457        }
6458
6459        info->iter.snapshot = true;
6460        info->iter.trace_buffer = &info->iter.tr->max_buffer;
6461
6462        return ret;
6463}
6464
6465#endif /* CONFIG_TRACER_SNAPSHOT */
6466
6467
6468static const struct file_operations tracing_thresh_fops = {
6469        .open           = tracing_open_generic,
6470        .read           = tracing_thresh_read,
6471        .write          = tracing_thresh_write,
6472        .llseek         = generic_file_llseek,
6473};
6474
6475#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6476static const struct file_operations tracing_max_lat_fops = {
6477        .open           = tracing_open_generic,
6478        .read           = tracing_max_lat_read,
6479        .write          = tracing_max_lat_write,
6480        .llseek         = generic_file_llseek,
6481};
6482#endif
6483
6484static const struct file_operations set_tracer_fops = {
6485        .open           = tracing_open_generic,
6486        .read           = tracing_set_trace_read,
6487        .write          = tracing_set_trace_write,
6488        .llseek         = generic_file_llseek,
6489};
6490
6491static const struct file_operations tracing_pipe_fops = {
6492        .open           = tracing_open_pipe,
6493        .poll           = tracing_poll_pipe,
6494        .read           = tracing_read_pipe,
6495        .splice_read    = tracing_splice_read_pipe,
6496        .release        = tracing_release_pipe,
6497        .llseek         = no_llseek,
6498};
6499
6500static const struct file_operations tracing_entries_fops = {
6501        .open           = tracing_open_generic_tr,
6502        .read           = tracing_entries_read,
6503        .write          = tracing_entries_write,
6504        .llseek         = generic_file_llseek,
6505        .release        = tracing_release_generic_tr,
6506};
6507
6508static const struct file_operations tracing_total_entries_fops = {
6509        .open           = tracing_open_generic_tr,
6510        .read           = tracing_total_entries_read,
6511        .llseek         = generic_file_llseek,
6512        .release        = tracing_release_generic_tr,
6513};
6514
6515static const struct file_operations tracing_free_buffer_fops = {
6516        .open           = tracing_open_generic_tr,
6517        .write          = tracing_free_buffer_write,
6518        .release        = tracing_free_buffer_release,
6519};
6520
6521static const struct file_operations tracing_mark_fops = {
6522        .open           = tracing_open_generic_tr,
6523        .write          = tracing_mark_write,
6524        .llseek         = generic_file_llseek,
6525        .release        = tracing_release_generic_tr,
6526};
6527
6528static const struct file_operations tracing_mark_raw_fops = {
6529        .open           = tracing_open_generic_tr,
6530        .write          = tracing_mark_raw_write,
6531        .llseek         = generic_file_llseek,
6532        .release        = tracing_release_generic_tr,
6533};
6534
6535static const struct file_operations trace_clock_fops = {
6536        .open           = tracing_clock_open,
6537        .read           = seq_read,
6538        .llseek         = seq_lseek,
6539        .release        = tracing_single_release_tr,
6540        .write          = tracing_clock_write,
6541};
6542
6543#ifdef CONFIG_TRACER_SNAPSHOT
6544static const struct file_operations snapshot_fops = {
6545        .open           = tracing_snapshot_open,
6546        .read           = seq_read,
6547        .write          = tracing_snapshot_write,
6548        .llseek         = tracing_lseek,
6549        .release        = tracing_snapshot_release,
6550};
6551
6552static const struct file_operations snapshot_raw_fops = {
6553        .open           = snapshot_raw_open,
6554        .read           = tracing_buffers_read,
6555        .release        = tracing_buffers_release,
6556        .splice_read    = tracing_buffers_splice_read,
6557        .llseek         = no_llseek,
6558};
6559
6560#endif /* CONFIG_TRACER_SNAPSHOT */
6561
6562static int tracing_buffers_open(struct inode *inode, struct file *filp)
6563{
6564        struct trace_array *tr = inode->i_private;
6565        struct ftrace_buffer_info *info;
6566        int ret;
6567
6568        if (tracing_disabled)
6569                return -ENODEV;
6570
6571        if (trace_array_get(tr) < 0)
6572                return -ENODEV;
6573
6574        info = kzalloc(sizeof(*info), GFP_KERNEL);
6575        if (!info) {
6576                trace_array_put(tr);
6577                return -ENOMEM;
6578        }
6579
6580        mutex_lock(&trace_types_lock);
6581
6582        info->iter.tr           = tr;
6583        info->iter.cpu_file     = tracing_get_cpu(inode);
6584        info->iter.trace        = tr->current_trace;
6585        info->iter.trace_buffer = &tr->trace_buffer;
6586        info->spare             = NULL;
6587        /* Force reading ring buffer for first read */
6588        info->read              = (unsigned int)-1;
6589
6590        filp->private_data = info;
6591
6592        tr->current_trace->ref++;
6593
6594        mutex_unlock(&trace_types_lock);
6595
6596        ret = nonseekable_open(inode, filp);
6597        if (ret < 0)
6598                trace_array_put(tr);
6599
6600        return ret;
6601}
6602
6603static unsigned int
6604tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6605{
6606        struct ftrace_buffer_info *info = filp->private_data;
6607        struct trace_iterator *iter = &info->iter;
6608
6609        return trace_poll(iter, filp, poll_table);
6610}
6611
6612static ssize_t
6613tracing_buffers_read(struct file *filp, char __user *ubuf,
6614                     size_t count, loff_t *ppos)
6615{
6616        struct ftrace_buffer_info *info = filp->private_data;
6617        struct trace_iterator *iter = &info->iter;
6618        ssize_t ret = 0;
6619        ssize_t size;
6620
6621        if (!count)
6622                return 0;
6623
6624#ifdef CONFIG_TRACER_MAX_TRACE
6625        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6626                return -EBUSY;
6627#endif
6628
6629        if (!info->spare) {
6630                info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6631                                                          iter->cpu_file);
6632                if (IS_ERR(info->spare)) {
6633                        ret = PTR_ERR(info->spare);
6634                        info->spare = NULL;
6635                } else {
6636                        info->spare_cpu = iter->cpu_file;
6637                }
6638        }
6639        if (!info->spare)
6640                return ret;
6641
6642        /* Do we have previous read data to read? */
6643        if (info->read < PAGE_SIZE)
6644                goto read;
6645
6646 again:
6647        trace_access_lock(iter->cpu_file);
6648        ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6649                                    &info->spare,
6650                                    count,
6651                                    iter->cpu_file, 0);
6652        trace_access_unlock(iter->cpu_file);
6653
6654        if (ret < 0) {
6655                if (trace_empty(iter)) {
6656                        if ((filp->f_flags & O_NONBLOCK))
6657                                return -EAGAIN;
6658
6659                        ret = wait_on_pipe(iter, false);
6660                        if (ret)
6661                                return ret;
6662
6663                        goto again;
6664                }
6665                return 0;
6666        }
6667
6668        info->read = 0;
6669 read:
6670        size = PAGE_SIZE - info->read;
6671        if (size > count)
6672                size = count;
6673
6674        ret = copy_to_user(ubuf, info->spare + info->read, size);
6675        if (ret == size)
6676                return -EFAULT;
6677
6678        size -= ret;
6679
6680        *ppos += size;
6681        info->read += size;
6682
6683        return size;
6684}
6685
6686static int tracing_buffers_release(struct inode *inode, struct file *file)
6687{
6688        struct ftrace_buffer_info *info = file->private_data;
6689        struct trace_iterator *iter = &info->iter;
6690
6691        mutex_lock(&trace_types_lock);
6692
6693        iter->tr->current_trace->ref--;
6694
6695        __trace_array_put(iter->tr);
6696
6697        if (info->spare)
6698                ring_buffer_free_read_page(iter->trace_buffer->buffer,
6699                                           info->spare_cpu, info->spare);
6700        kfree(info);
6701
6702        mutex_unlock(&trace_types_lock);
6703
6704        return 0;
6705}
6706
6707struct buffer_ref {
6708        struct ring_buffer      *buffer;
6709        void                    *page;
6710        int                     cpu;
6711        int                     ref;
6712};
6713
6714static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6715                                    struct pipe_buffer *buf)
6716{
6717        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6718
6719        if (--ref->ref)
6720                return;
6721
6722        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6723        kfree(ref);
6724        buf->private = 0;
6725}
6726
6727static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6728                                struct pipe_buffer *buf)
6729{
6730        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6731
6732        ref->ref++;
6733}
6734
6735/* Pipe buffer operations for a buffer. */
6736static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6737        .can_merge              = 0,
6738        .confirm                = generic_pipe_buf_confirm,
6739        .release                = buffer_pipe_buf_release,
6740        .steal                  = generic_pipe_buf_steal,
6741        .get                    = buffer_pipe_buf_get,
6742};
6743
6744/*
6745 * Callback from splice_to_pipe(), if we need to release some pages
6746 * at the end of the spd in case we error'ed out in filling the pipe.
6747 */
6748static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6749{
6750        struct buffer_ref *ref =
6751                (struct buffer_ref *)spd->partial[i].private;
6752
6753        if (--ref->ref)
6754                return;
6755
6756        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6757        kfree(ref);
6758        spd->partial[i].private = 0;
6759}
6760
6761static ssize_t
6762tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6763                            struct pipe_inode_info *pipe, size_t len,
6764                            unsigned int flags)
6765{
6766        struct ftrace_buffer_info *info = file->private_data;
6767        struct trace_iterator *iter = &info->iter;
6768        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6769        struct page *pages_def[PIPE_DEF_BUFFERS];
6770        struct splice_pipe_desc spd = {
6771                .pages          = pages_def,
6772                .partial        = partial_def,
6773                .nr_pages_max   = PIPE_DEF_BUFFERS,
6774                .ops            = &buffer_pipe_buf_ops,
6775                .spd_release    = buffer_spd_release,
6776        };
6777        struct buffer_ref *ref;
6778        int entries, i;
6779        ssize_t ret = 0;
6780
6781#ifdef CONFIG_TRACER_MAX_TRACE
6782        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6783                return -EBUSY;
6784#endif
6785
6786        if (*ppos & (PAGE_SIZE - 1))
6787                return -EINVAL;
6788
6789        if (len & (PAGE_SIZE - 1)) {
6790                if (len < PAGE_SIZE)
6791                        return -EINVAL;
6792                len &= PAGE_MASK;
6793        }
6794
6795        if (splice_grow_spd(pipe, &spd))
6796                return -ENOMEM;
6797
6798 again:
6799        trace_access_lock(iter->cpu_file);
6800        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6801
6802        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6803                struct page *page;
6804                int r;
6805
6806                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6807                if (!ref) {
6808                        ret = -ENOMEM;
6809                        break;
6810                }
6811
6812                ref->ref = 1;
6813                ref->buffer = iter->trace_buffer->buffer;
6814                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6815                if (IS_ERR(ref->page)) {
6816                        ret = PTR_ERR(ref->page);
6817                        ref->page = NULL;
6818                        kfree(ref);
6819                        break;
6820                }
6821                ref->cpu = iter->cpu_file;
6822
6823                r = ring_buffer_read_page(ref->buffer, &ref->page,
6824                                          len, iter->cpu_file, 1);
6825                if (r < 0) {
6826                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
6827                                                   ref->page);
6828                        kfree(ref);
6829                        break;
6830                }
6831
6832                page = virt_to_page(ref->page);
6833
6834                spd.pages[i] = page;
6835                spd.partial[i].len = PAGE_SIZE;
6836                spd.partial[i].offset = 0;
6837                spd.partial[i].private = (unsigned long)ref;
6838                spd.nr_pages++;
6839                *ppos += PAGE_SIZE;
6840
6841                entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6842        }
6843
6844        trace_access_unlock(iter->cpu_file);
6845        spd.nr_pages = i;
6846
6847        /* did we read anything? */
6848        if (!spd.nr_pages) {
6849                if (ret)
6850                        goto out;
6851
6852                ret = -EAGAIN;
6853                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6854                        goto out;
6855
6856                ret = wait_on_pipe(iter, true);
6857                if (ret)
6858                        goto out;
6859
6860                goto again;
6861        }
6862
6863        ret = splice_to_pipe(pipe, &spd);
6864out:
6865        splice_shrink_spd(&spd);
6866
6867        return ret;
6868}
6869
6870static const struct file_operations tracing_buffers_fops = {
6871        .open           = tracing_buffers_open,
6872        .read           = tracing_buffers_read,
6873        .poll           = tracing_buffers_poll,
6874        .release        = tracing_buffers_release,
6875        .splice_read    = tracing_buffers_splice_read,
6876        .llseek         = no_llseek,
6877};
6878
6879static ssize_t
6880tracing_stats_read(struct file *filp, char __user *ubuf,
6881                   size_t count, loff_t *ppos)
6882{
6883        struct inode *inode = file_inode(filp);
6884        struct trace_array *tr = inode->i_private;
6885        struct trace_buffer *trace_buf = &tr->trace_buffer;
6886        int cpu = tracing_get_cpu(inode);
6887        struct trace_seq *s;
6888        unsigned long cnt;
6889        unsigned long long t;
6890        unsigned long usec_rem;
6891
6892        s = kmalloc(sizeof(*s), GFP_KERNEL);
6893        if (!s)
6894                return -ENOMEM;
6895
6896        trace_seq_init(s);
6897
6898        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6899        trace_seq_printf(s, "entries: %ld\n", cnt);
6900
6901        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6902        trace_seq_printf(s, "overrun: %ld\n", cnt);
6903
6904        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6905        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6906
6907        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6908        trace_seq_printf(s, "bytes: %ld\n", cnt);
6909
6910        if (trace_clocks[tr->clock_id].in_ns) {
6911                /* local or global for trace_clock */
6912                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6913                usec_rem = do_div(t, USEC_PER_SEC);
6914                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6915                                                                t, usec_rem);
6916
6917                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6918                usec_rem = do_div(t, USEC_PER_SEC);
6919                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6920        } else {
6921                /* counter or tsc mode for trace_clock */
6922                trace_seq_printf(s, "oldest event ts: %llu\n",
6923                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6924
6925                trace_seq_printf(s, "now ts: %llu\n",
6926                                ring_buffer_time_stamp(trace_buf->buffer, cpu));
6927        }
6928
6929        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6930        trace_seq_printf(s, "dropped events: %ld\n", cnt);
6931
6932        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6933        trace_seq_printf(s, "read events: %ld\n", cnt);
6934
6935        count = simple_read_from_buffer(ubuf, count, ppos,
6936                                        s->buffer, trace_seq_used(s));
6937
6938        kfree(s);
6939
6940        return count;
6941}
6942
6943static const struct file_operations tracing_stats_fops = {
6944        .open           = tracing_open_generic_tr,
6945        .read           = tracing_stats_read,
6946        .llseek         = generic_file_llseek,
6947        .release        = tracing_release_generic_tr,
6948};
6949
6950#ifdef CONFIG_DYNAMIC_FTRACE
6951
6952static ssize_t
6953tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6954                  size_t cnt, loff_t *ppos)
6955{
6956        unsigned long *p = filp->private_data;
6957        char buf[64]; /* Not too big for a shallow stack */
6958        int r;
6959
6960        r = scnprintf(buf, 63, "%ld", *p);
6961        buf[r++] = '\n';
6962
6963        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6964}
6965
6966static const struct file_operations tracing_dyn_info_fops = {
6967        .open           = tracing_open_generic,
6968        .read           = tracing_read_dyn_info,
6969        .llseek         = generic_file_llseek,
6970};
6971#endif /* CONFIG_DYNAMIC_FTRACE */
6972
6973#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6974static void
6975ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6976                struct trace_array *tr, struct ftrace_probe_ops *ops,
6977                void *data)
6978{
6979        tracing_snapshot_instance(tr);
6980}
6981
6982static void
6983ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6984                      struct trace_array *tr, struct ftrace_probe_ops *ops,
6985                      void *data)
6986{
6987        struct ftrace_func_mapper *mapper = data;
6988        long *count = NULL;
6989
6990        if (mapper)
6991                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6992
6993        if (count) {
6994
6995                if (*count <= 0)
6996                        return;
6997
6998                (*count)--;
6999        }
7000
7001        tracing_snapshot_instance(tr);
7002}
7003
7004static int
7005ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7006                      struct ftrace_probe_ops *ops, void *data)
7007{
7008        struct ftrace_func_mapper *mapper = data;
7009        long *count = NULL;
7010
7011        seq_printf(m, "%ps:", (void *)ip);
7012
7013        seq_puts(m, "snapshot");
7014
7015        if (mapper)
7016                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7017
7018        if (count)
7019                seq_printf(m, ":count=%ld\n", *count);
7020        else
7021                seq_puts(m, ":unlimited\n");
7022
7023        return 0;
7024}
7025
7026static int
7027ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7028                     unsigned long ip, void *init_data, void **data)
7029{
7030        struct ftrace_func_mapper *mapper = *data;
7031
7032        if (!mapper) {
7033                mapper = allocate_ftrace_func_mapper();
7034                if (!mapper)
7035                        return -ENOMEM;
7036                *data = mapper;
7037        }
7038
7039        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7040}
7041
7042static void
7043ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7044                     unsigned long ip, void *data)
7045{
7046        struct ftrace_func_mapper *mapper = data;
7047
7048        if (!ip) {
7049                if (!mapper)
7050                        return;
7051                free_ftrace_func_mapper(mapper, NULL);
7052                return;
7053        }
7054
7055        ftrace_func_mapper_remove_ip(mapper, ip);
7056}
7057
7058static struct ftrace_probe_ops snapshot_probe_ops = {
7059        .func                   = ftrace_snapshot,
7060        .print                  = ftrace_snapshot_print,
7061};
7062
7063static struct ftrace_probe_ops snapshot_count_probe_ops = {
7064        .func                   = ftrace_count_snapshot,
7065        .print                  = ftrace_snapshot_print,
7066        .init                   = ftrace_snapshot_init,
7067        .free                   = ftrace_snapshot_free,
7068};
7069
7070static int
7071ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7072                               char *glob, char *cmd, char *param, int enable)
7073{
7074        struct ftrace_probe_ops *ops;
7075        void *count = (void *)-1;
7076        char *number;
7077        int ret;
7078
7079        if (!tr)
7080                return -ENODEV;
7081
7082        /* hash funcs only work with set_ftrace_filter */
7083        if (!enable)
7084                return -EINVAL;
7085
7086        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7087
7088        if (glob[0] == '!')
7089                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7090
7091        if (!param)
7092                goto out_reg;
7093
7094        number = strsep(&param, ":");
7095
7096        if (!strlen(number))
7097                goto out_reg;
7098
7099        /*
7100         * We use the callback data field (which is a pointer)
7101         * as our counter.
7102         */
7103        ret = kstrtoul(number, 0, (unsigned long *)&count);
7104        if (ret)
7105                return ret;
7106
7107 out_reg:
7108        ret = alloc_snapshot(tr);
7109        if (ret < 0)
7110                goto out;
7111
7112        ret = register_ftrace_function_probe(glob, tr, ops, count);
7113
7114 out:
7115        return ret < 0 ? ret : 0;
7116}
7117
7118static struct ftrace_func_command ftrace_snapshot_cmd = {
7119        .name                   = "snapshot",
7120        .func                   = ftrace_trace_snapshot_callback,
7121};
7122
7123static __init int register_snapshot_cmd(void)
7124{
7125        return register_ftrace_command(&ftrace_snapshot_cmd);
7126}
7127#else
7128static inline __init int register_snapshot_cmd(void) { return 0; }
7129#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7130
7131static struct dentry *tracing_get_dentry(struct trace_array *tr)
7132{
7133        if (WARN_ON(!tr->dir))
7134                return ERR_PTR(-ENODEV);
7135
7136        /* Top directory uses NULL as the parent */
7137        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7138                return NULL;
7139
7140        /* All sub buffers have a descriptor */
7141        return tr->dir;
7142}
7143
7144static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7145{
7146        struct dentry *d_tracer;
7147
7148        if (tr->percpu_dir)
7149                return tr->percpu_dir;
7150
7151        d_tracer = tracing_get_dentry(tr);
7152        if (IS_ERR(d_tracer))
7153                return NULL;
7154
7155        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7156
7157        WARN_ONCE(!tr->percpu_dir,
7158                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7159
7160        return tr->percpu_dir;
7161}
7162
7163static struct dentry *
7164trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7165                      void *data, long cpu, const struct file_operations *fops)
7166{
7167        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7168
7169        if (ret) /* See tracing_get_cpu() */
7170                d_inode(ret)->i_cdev = (void *)(cpu + 1);
7171        return ret;
7172}
7173
7174static void
7175tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7176{
7177        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7178        struct dentry *d_cpu;
7179        char cpu_dir[30]; /* 30 characters should be more than enough */
7180
7181        if (!d_percpu)
7182                return;
7183
7184        snprintf(cpu_dir, 30, "cpu%ld", cpu);
7185        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7186        if (!d_cpu) {
7187                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7188                return;
7189        }
7190
7191        /* per cpu trace_pipe */
7192        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7193                                tr, cpu, &tracing_pipe_fops);
7194
7195        /* per cpu trace */
7196        trace_create_cpu_file("trace", 0644, d_cpu,
7197                                tr, cpu, &tracing_fops);
7198
7199        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7200                                tr, cpu, &tracing_buffers_fops);
7201
7202        trace_create_cpu_file("stats", 0444, d_cpu,
7203                                tr, cpu, &tracing_stats_fops);
7204
7205        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7206                                tr, cpu, &tracing_entries_fops);
7207
7208#ifdef CONFIG_TRACER_SNAPSHOT
7209        trace_create_cpu_file("snapshot", 0644, d_cpu,
7210                                tr, cpu, &snapshot_fops);
7211
7212        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7213                                tr, cpu, &snapshot_raw_fops);
7214#endif
7215}
7216
7217#ifdef CONFIG_FTRACE_SELFTEST
7218/* Let selftest have access to static functions in this file */
7219#include "trace_selftest.c"
7220#endif
7221
7222static ssize_t
7223trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7224                        loff_t *ppos)
7225{
7226        struct trace_option_dentry *topt = filp->private_data;
7227        char *buf;
7228
7229        if (topt->flags->val & topt->opt->bit)
7230                buf = "1\n";
7231        else
7232                buf = "0\n";
7233
7234        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7235}
7236
7237static ssize_t
7238trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7239                         loff_t *ppos)
7240{
7241        struct trace_option_dentry *topt = filp->private_data;
7242        unsigned long val;
7243        int ret;
7244
7245        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7246        if (ret)
7247                return ret;
7248
7249        if (val != 0 && val != 1)
7250                return -EINVAL;
7251
7252        if (!!(topt->flags->val & topt->opt->bit) != val) {
7253                mutex_lock(&trace_types_lock);
7254                ret = __set_tracer_option(topt->tr, topt->flags,
7255                                          topt->opt, !val);
7256                mutex_unlock(&trace_types_lock);
7257                if (ret)
7258                        return ret;
7259        }
7260
7261        *ppos += cnt;
7262
7263        return cnt;
7264}
7265
7266
7267static const struct file_operations trace_options_fops = {
7268        .open = tracing_open_generic,
7269        .read = trace_options_read,
7270        .write = trace_options_write,
7271        .llseek = generic_file_llseek,
7272};
7273
7274/*
7275 * In order to pass in both the trace_array descriptor as well as the index
7276 * to the flag that the trace option file represents, the trace_array
7277 * has a character array of trace_flags_index[], which holds the index
7278 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7279 * The address of this character array is passed to the flag option file
7280 * read/write callbacks.
7281 *
7282 * In order to extract both the index and the trace_array descriptor,
7283 * get_tr_index() uses the following algorithm.
7284 *
7285 *   idx = *ptr;
7286 *
7287 * As the pointer itself contains the address of the index (remember
7288 * index[1] == 1).
7289 *
7290 * Then to get the trace_array descriptor, by subtracting that index
7291 * from the ptr, we get to the start of the index itself.
7292 *
7293 *   ptr - idx == &index[0]
7294 *
7295 * Then a simple container_of() from that pointer gets us to the
7296 * trace_array descriptor.
7297 */
7298static void get_tr_index(void *data, struct trace_array **ptr,
7299                         unsigned int *pindex)
7300{
7301        *pindex = *(unsigned char *)data;
7302
7303        *ptr = container_of(data - *pindex, struct trace_array,
7304                            trace_flags_index);
7305}
7306
7307static ssize_t
7308trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7309                        loff_t *ppos)
7310{
7311        void *tr_index = filp->private_data;
7312        struct trace_array *tr;
7313        unsigned int index;
7314        char *buf;
7315
7316        get_tr_index(tr_index, &tr, &index);
7317
7318        if (tr->trace_flags & (1 << index))
7319                buf = "1\n";
7320        else
7321                buf = "0\n";
7322
7323        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7324}
7325
7326static ssize_t
7327trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7328                         loff_t *ppos)
7329{
7330        void *tr_index = filp->private_data;
7331        struct trace_array *tr;
7332        unsigned int index;
7333        unsigned long val;
7334        int ret;
7335
7336        get_tr_index(tr_index, &tr, &index);
7337
7338        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7339        if (ret)
7340                return ret;
7341
7342        if (val != 0 && val != 1)
7343                return -EINVAL;
7344
7345        mutex_lock(&trace_types_lock);
7346        ret = set_tracer_flag(tr, 1 << index, val);
7347        mutex_unlock(&trace_types_lock);
7348
7349        if (ret < 0)
7350                return ret;
7351
7352        *ppos += cnt;
7353
7354        return cnt;
7355}
7356
7357static const struct file_operations trace_options_core_fops = {
7358        .open = tracing_open_generic,
7359        .read = trace_options_core_read,
7360        .write = trace_options_core_write,
7361        .llseek = generic_file_llseek,
7362};
7363
7364struct dentry *trace_create_file(const char *name,
7365                                 umode_t mode,
7366                                 struct dentry *parent,
7367                                 void *data,
7368                                 const struct file_operations *fops)
7369{
7370        struct dentry *ret;
7371
7372        ret = tracefs_create_file(name, mode, parent, data, fops);
7373        if (!ret)
7374                pr_warn("Could not create tracefs '%s' entry\n", name);
7375
7376        return ret;
7377}
7378
7379
7380static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7381{
7382        struct dentry *d_tracer;
7383
7384        if (tr->options)
7385                return tr->options;
7386
7387        d_tracer = tracing_get_dentry(tr);
7388        if (IS_ERR(d_tracer))
7389                return NULL;
7390
7391        tr->options = tracefs_create_dir("options", d_tracer);
7392        if (!tr->options) {
7393                pr_warn("Could not create tracefs directory 'options'\n");
7394                return NULL;
7395        }
7396
7397        return tr->options;
7398}
7399
7400static void
7401create_trace_option_file(struct trace_array *tr,
7402                         struct trace_option_dentry *topt,
7403                         struct tracer_flags *flags,
7404                         struct tracer_opt *opt)
7405{
7406        struct dentry *t_options;
7407
7408        t_options = trace_options_init_dentry(tr);
7409        if (!t_options)
7410                return;
7411
7412        topt->flags = flags;
7413        topt->opt = opt;
7414        topt->tr = tr;
7415
7416        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7417                                    &trace_options_fops);
7418
7419}
7420
7421static void
7422create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7423{
7424        struct trace_option_dentry *topts;
7425        struct trace_options *tr_topts;
7426        struct tracer_flags *flags;
7427        struct tracer_opt *opts;
7428        int cnt;
7429        int i;
7430
7431        if (!tracer)
7432                return;
7433
7434        flags = tracer->flags;
7435
7436        if (!flags || !flags->opts)
7437                return;
7438
7439        /*
7440         * If this is an instance, only create flags for tracers
7441         * the instance may have.
7442         */
7443        if (!trace_ok_for_array(tracer, tr))
7444                return;
7445
7446        for (i = 0; i < tr->nr_topts; i++) {
7447                /* Make sure there's no duplicate flags. */
7448                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7449                        return;
7450        }
7451
7452        opts = flags->opts;
7453
7454        for (cnt = 0; opts[cnt].name; cnt++)
7455                ;
7456
7457        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7458        if (!topts)
7459                return;
7460
7461        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7462                            GFP_KERNEL);
7463        if (!tr_topts) {
7464                kfree(topts);
7465                return;
7466        }
7467
7468        tr->topts = tr_topts;
7469        tr->topts[tr->nr_topts].tracer = tracer;
7470        tr->topts[tr->nr_topts].topts = topts;
7471        tr->nr_topts++;
7472
7473        for (cnt = 0; opts[cnt].name; cnt++) {
7474                create_trace_option_file(tr, &topts[cnt], flags,
7475                                         &opts[cnt]);
7476                WARN_ONCE(topts[cnt].entry == NULL,
7477                          "Failed to create trace option: %s",
7478                          opts[cnt].name);
7479        }
7480}
7481
7482static struct dentry *
7483create_trace_option_core_file(struct trace_array *tr,
7484                              const char *option, long index)
7485{
7486        struct dentry *t_options;
7487
7488        t_options = trace_options_init_dentry(tr);
7489        if (!t_options)
7490                return NULL;
7491
7492        return trace_create_file(option, 0644, t_options,
7493                                 (void *)&tr->trace_flags_index[index],
7494                                 &trace_options_core_fops);
7495}
7496
7497static void create_trace_options_dir(struct trace_array *tr)
7498{
7499        struct dentry *t_options;
7500        bool top_level = tr == &global_trace;
7501        int i;
7502
7503        t_options = trace_options_init_dentry(tr);
7504        if (!t_options)
7505                return;
7506
7507        for (i = 0; trace_options[i]; i++) {
7508                if (top_level ||
7509                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7510                        create_trace_option_core_file(tr, trace_options[i], i);
7511        }
7512}
7513
7514static ssize_t
7515rb_simple_read(struct file *filp, char __user *ubuf,
7516               size_t cnt, loff_t *ppos)
7517{
7518        struct trace_array *tr = filp->private_data;
7519        char buf[64];
7520        int r;
7521
7522        r = tracer_tracing_is_on(tr);
7523        r = sprintf(buf, "%d\n", r);
7524
7525        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7526}
7527
7528static ssize_t
7529rb_simple_write(struct file *filp, const char __user *ubuf,
7530                size_t cnt, loff_t *ppos)
7531{
7532        struct trace_array *tr = filp->private_data;
7533        struct ring_buffer *buffer = tr->trace_buffer.buffer;
7534        unsigned long val;
7535        int ret;
7536
7537        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7538        if (ret)
7539                return ret;
7540
7541        if (buffer) {
7542                mutex_lock(&trace_types_lock);
7543                if (val) {
7544                        tracer_tracing_on(tr);
7545                        if (tr->current_trace->start)
7546                                tr->current_trace->start(tr);
7547                } else {
7548                        tracer_tracing_off(tr);
7549                        if (tr->current_trace->stop)
7550                                tr->current_trace->stop(tr);
7551                }
7552                mutex_unlock(&trace_types_lock);
7553        }
7554
7555        (*ppos)++;
7556
7557        return cnt;
7558}
7559
7560static const struct file_operations rb_simple_fops = {
7561        .open           = tracing_open_generic_tr,
7562        .read           = rb_simple_read,
7563        .write          = rb_simple_write,
7564        .release        = tracing_release_generic_tr,
7565        .llseek         = default_llseek,
7566};
7567
7568struct dentry *trace_instance_dir;
7569
7570static void
7571init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7572
7573static int
7574allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7575{
7576        enum ring_buffer_flags rb_flags;
7577
7578        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7579
7580        buf->tr = tr;
7581
7582        buf->buffer = ring_buffer_alloc(size, rb_flags);
7583        if (!buf->buffer)
7584                return -ENOMEM;
7585
7586        buf->data = alloc_percpu(struct trace_array_cpu);
7587        if (!buf->data) {
7588                ring_buffer_free(buf->buffer);
7589                buf->buffer = NULL;
7590                return -ENOMEM;
7591        }
7592
7593        /* Allocate the first page for all buffers */
7594        set_buffer_entries(&tr->trace_buffer,
7595                           ring_buffer_size(tr->trace_buffer.buffer, 0));
7596
7597        return 0;
7598}
7599
7600static int allocate_trace_buffers(struct trace_array *tr, int size)
7601{
7602        int ret;
7603
7604        ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7605        if (ret)
7606                return ret;
7607
7608#ifdef CONFIG_TRACER_MAX_TRACE
7609        ret = allocate_trace_buffer(tr, &tr->max_buffer,
7610                                    allocate_snapshot ? size : 1);
7611        if (WARN_ON(ret)) {
7612                ring_buffer_free(tr->trace_buffer.buffer);
7613                tr->trace_buffer.buffer = NULL;
7614                free_percpu(tr->trace_buffer.data);
7615                tr->trace_buffer.data = NULL;
7616                return -ENOMEM;
7617        }
7618        tr->allocated_snapshot = allocate_snapshot;
7619
7620        /*
7621         * Only the top level trace array gets its snapshot allocated
7622         * from the kernel command line.
7623         */
7624        allocate_snapshot = false;
7625#endif
7626        return 0;
7627}
7628
7629static void free_trace_buffer(struct trace_buffer *buf)
7630{
7631        if (buf->buffer) {
7632                ring_buffer_free(buf->buffer);
7633                buf->buffer = NULL;
7634                free_percpu(buf->data);
7635                buf->data = NULL;
7636        }
7637}
7638
7639static void free_trace_buffers(struct trace_array *tr)
7640{
7641        if (!tr)
7642                return;
7643
7644        free_trace_buffer(&tr->trace_buffer);
7645
7646#ifdef CONFIG_TRACER_MAX_TRACE
7647        free_trace_buffer(&tr->max_buffer);
7648#endif
7649}
7650
7651static void init_trace_flags_index(struct trace_array *tr)
7652{
7653        int i;
7654
7655        /* Used by the trace options files */
7656        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7657                tr->trace_flags_index[i] = i;
7658}
7659
7660static void __update_tracer_options(struct trace_array *tr)
7661{
7662        struct tracer *t;
7663
7664        for (t = trace_types; t; t = t->next)
7665                add_tracer_options(tr, t);
7666}
7667
7668static void update_tracer_options(struct trace_array *tr)
7669{
7670        mutex_lock(&trace_types_lock);
7671        __update_tracer_options(tr);
7672        mutex_unlock(&trace_types_lock);
7673}
7674
7675static int instance_mkdir(const char *name)
7676{
7677        struct trace_array *tr;
7678        int ret;
7679
7680        mutex_lock(&event_mutex);
7681        mutex_lock(&trace_types_lock);
7682
7683        ret = -EEXIST;
7684        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7685                if (tr->name && strcmp(tr->name, name) == 0)
7686                        goto out_unlock;
7687        }
7688
7689        ret = -ENOMEM;
7690        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7691        if (!tr)
7692                goto out_unlock;
7693
7694        tr->name = kstrdup(name, GFP_KERNEL);
7695        if (!tr->name)
7696                goto out_free_tr;
7697
7698        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7699                goto out_free_tr;
7700
7701        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7702
7703        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7704
7705        raw_spin_lock_init(&tr->start_lock);
7706
7707        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7708
7709        tr->current_trace = &nop_trace;
7710
7711        INIT_LIST_HEAD(&tr->systems);
7712        INIT_LIST_HEAD(&tr->events);
7713
7714        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7715                goto out_free_tr;
7716
7717        tr->dir = tracefs_create_dir(name, trace_instance_dir);
7718        if (!tr->dir)
7719                goto out_free_tr;
7720
7721        ret = event_trace_add_tracer(tr->dir, tr);
7722        if (ret) {
7723                tracefs_remove_recursive(tr->dir);
7724                goto out_free_tr;
7725        }
7726
7727        ftrace_init_trace_array(tr);
7728
7729        init_tracer_tracefs(tr, tr->dir);
7730        init_trace_flags_index(tr);
7731        __update_tracer_options(tr);
7732
7733        list_add(&tr->list, &ftrace_trace_arrays);
7734
7735        mutex_unlock(&trace_types_lock);
7736        mutex_unlock(&event_mutex);
7737
7738        return 0;
7739
7740 out_free_tr:
7741        free_trace_buffers(tr);
7742        free_cpumask_var(tr->tracing_cpumask);
7743        kfree(tr->name);
7744        kfree(tr);
7745
7746 out_unlock:
7747        mutex_unlock(&trace_types_lock);
7748        mutex_unlock(&event_mutex);
7749
7750        return ret;
7751
7752}
7753
7754static int instance_rmdir(const char *name)
7755{
7756        struct trace_array *tr;
7757        int found = 0;
7758        int ret;
7759        int i;
7760
7761        mutex_lock(&event_mutex);
7762        mutex_lock(&trace_types_lock);
7763
7764        ret = -ENODEV;
7765        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7766                if (tr->name && strcmp(tr->name, name) == 0) {
7767                        found = 1;
7768                        break;
7769                }
7770        }
7771        if (!found)
7772                goto out_unlock;
7773
7774        ret = -EBUSY;
7775        if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7776                goto out_unlock;
7777
7778        list_del(&tr->list);
7779
7780        /* Disable all the flags that were enabled coming in */
7781        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7782                if ((1 << i) & ZEROED_TRACE_FLAGS)
7783                        set_tracer_flag(tr, 1 << i, 0);
7784        }
7785
7786        tracing_set_nop(tr);
7787        clear_ftrace_function_probes(tr);
7788        event_trace_del_tracer(tr);
7789        ftrace_clear_pids(tr);
7790        ftrace_destroy_function_files(tr);
7791        tracefs_remove_recursive(tr->dir);
7792        free_trace_buffers(tr);
7793
7794        for (i = 0; i < tr->nr_topts; i++) {
7795                kfree(tr->topts[i].topts);
7796        }
7797        kfree(tr->topts);
7798
7799        free_cpumask_var(tr->tracing_cpumask);
7800        kfree(tr->name);
7801        kfree(tr);
7802
7803        ret = 0;
7804
7805 out_unlock:
7806        mutex_unlock(&trace_types_lock);
7807        mutex_unlock(&event_mutex);
7808
7809        return ret;
7810}
7811
7812static __init void create_trace_instances(struct dentry *d_tracer)
7813{
7814        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7815                                                         instance_mkdir,
7816                                                         instance_rmdir);
7817        if (WARN_ON(!trace_instance_dir))
7818                return;
7819}
7820
7821static void
7822init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7823{
7824        int cpu;
7825
7826        trace_create_file("available_tracers", 0444, d_tracer,
7827                        tr, &show_traces_fops);
7828
7829        trace_create_file("current_tracer", 0644, d_tracer,
7830                        tr, &set_tracer_fops);
7831
7832        trace_create_file("tracing_cpumask", 0644, d_tracer,
7833                          tr, &tracing_cpumask_fops);
7834
7835        trace_create_file("trace_options", 0644, d_tracer,
7836                          tr, &tracing_iter_fops);
7837
7838        trace_create_file("trace", 0644, d_tracer,
7839                          tr, &tracing_fops);
7840
7841        trace_create_file("trace_pipe", 0444, d_tracer,
7842                          tr, &tracing_pipe_fops);
7843
7844        trace_create_file("buffer_size_kb", 0644, d_tracer,
7845                          tr, &tracing_entries_fops);
7846
7847        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7848                          tr, &tracing_total_entries_fops);
7849
7850        trace_create_file("free_buffer", 0200, d_tracer,
7851                          tr, &tracing_free_buffer_fops);
7852
7853        trace_create_file("trace_marker", 0220, d_tracer,
7854                          tr, &tracing_mark_fops);
7855
7856        trace_create_file("trace_marker_raw", 0220, d_tracer,
7857                          tr, &tracing_mark_raw_fops);
7858
7859        trace_create_file("trace_clock", 0644, d_tracer, tr,
7860                          &trace_clock_fops);
7861
7862        trace_create_file("tracing_on", 0644, d_tracer,
7863                          tr, &rb_simple_fops);
7864
7865        create_trace_options_dir(tr);
7866
7867#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7868        trace_create_file("tracing_max_latency", 0644, d_tracer,
7869                        &tr->max_latency, &tracing_max_lat_fops);
7870#endif
7871
7872        if (ftrace_create_function_files(tr, d_tracer))
7873                WARN(1, "Could not allocate function filter files");
7874
7875#ifdef CONFIG_TRACER_SNAPSHOT
7876        trace_create_file("snapshot", 0644, d_tracer,
7877                          tr, &snapshot_fops);
7878#endif
7879
7880        for_each_tracing_cpu(cpu)
7881                tracing_init_tracefs_percpu(tr, cpu);
7882
7883        ftrace_init_tracefs(tr, d_tracer);
7884}
7885
7886static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7887{
7888        struct vfsmount *mnt;
7889        struct file_system_type *type;
7890
7891        /*
7892         * To maintain backward compatibility for tools that mount
7893         * debugfs to get to the tracing facility, tracefs is automatically
7894         * mounted to the debugfs/tracing directory.
7895         */
7896        type = get_fs_type("tracefs");
7897        if (!type)
7898                return NULL;
7899        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7900        put_filesystem(type);
7901        if (IS_ERR(mnt))
7902                return NULL;
7903        mntget(mnt);
7904
7905        return mnt;
7906}
7907
7908/**
7909 * tracing_init_dentry - initialize top level trace array
7910 *
7911 * This is called when creating files or directories in the tracing
7912 * directory. It is called via fs_initcall() by any of the boot up code
7913 * and expects to return the dentry of the top level tracing directory.
7914 */
7915struct dentry *tracing_init_dentry(void)
7916{
7917        struct trace_array *tr = &global_trace;
7918
7919        /* The top level trace array uses  NULL as parent */
7920        if (tr->dir)
7921                return NULL;
7922
7923        if (WARN_ON(!tracefs_initialized()) ||
7924                (IS_ENABLED(CONFIG_DEBUG_FS) &&
7925                 WARN_ON(!debugfs_initialized())))
7926                return ERR_PTR(-ENODEV);
7927
7928        /*
7929         * As there may still be users that expect the tracing
7930         * files to exist in debugfs/tracing, we must automount
7931         * the tracefs file system there, so older tools still
7932         * work with the newer kerenl.
7933         */
7934        tr->dir = debugfs_create_automount("tracing", NULL,
7935                                           trace_automount, NULL);
7936        if (!tr->dir) {
7937                pr_warn_once("Could not create debugfs directory 'tracing'\n");
7938                return ERR_PTR(-ENOMEM);
7939        }
7940
7941        return NULL;
7942}
7943
7944extern struct trace_eval_map *__start_ftrace_eval_maps[];
7945extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7946
7947static void __init trace_eval_init(void)
7948{
7949        int len;
7950
7951        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7952        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7953}
7954
7955#ifdef CONFIG_MODULES
7956static void trace_module_add_evals(struct module *mod)
7957{
7958        if (!mod->num_trace_evals)
7959                return;
7960
7961        /*
7962         * Modules with bad taint do not have events created, do
7963         * not bother with enums either.
7964         */
7965        if (trace_module_has_bad_taint(mod))
7966                return;
7967
7968        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7969}
7970
7971#ifdef CONFIG_TRACE_EVAL_MAP_FILE
7972static void trace_module_remove_evals(struct module *mod)
7973{
7974        union trace_eval_map_item *map;
7975        union trace_eval_map_item **last = &trace_eval_maps;
7976
7977        if (!mod->num_trace_evals)
7978                return;
7979
7980        mutex_lock(&trace_eval_mutex);
7981
7982        map = trace_eval_maps;
7983
7984        while (map) {
7985                if (map->head.mod == mod)
7986                        break;
7987                map = trace_eval_jmp_to_tail(map);
7988                last = &map->tail.next;
7989                map = map->tail.next;
7990        }
7991        if (!map)
7992                goto out;
7993
7994        *last = trace_eval_jmp_to_tail(map)->tail.next;
7995        kfree(map);
7996 out:
7997        mutex_unlock(&trace_eval_mutex);
7998}
7999#else
8000static inline void trace_module_remove_evals(struct module *mod) { }
8001#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8002
8003static int trace_module_notify(struct notifier_block *self,
8004                               unsigned long val, void *data)
8005{
8006        struct module *mod = data;
8007
8008        switch (val) {
8009        case MODULE_STATE_COMING:
8010                trace_module_add_evals(mod);
8011                break;
8012        case MODULE_STATE_GOING:
8013                trace_module_remove_evals(mod);
8014                break;
8015        }
8016
8017        return 0;
8018}
8019
8020static struct notifier_block trace_module_nb = {
8021        .notifier_call = trace_module_notify,
8022        .priority = 0,
8023};
8024#endif /* CONFIG_MODULES */
8025
8026static __init int tracer_init_tracefs(void)
8027{
8028        struct dentry *d_tracer;
8029
8030        trace_access_lock_init();
8031
8032        d_tracer = tracing_init_dentry();
8033        if (IS_ERR(d_tracer))
8034                return 0;
8035
8036        init_tracer_tracefs(&global_trace, d_tracer);
8037        ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8038
8039        trace_create_file("tracing_thresh", 0644, d_tracer,
8040                        &global_trace, &tracing_thresh_fops);
8041
8042        trace_create_file("README", 0444, d_tracer,
8043                        NULL, &tracing_readme_fops);
8044
8045        trace_create_file("saved_cmdlines", 0444, d_tracer,
8046                        NULL, &tracing_saved_cmdlines_fops);
8047
8048        trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8049                          NULL, &tracing_saved_cmdlines_size_fops);
8050
8051        trace_create_file("saved_tgids", 0444, d_tracer,
8052                        NULL, &tracing_saved_tgids_fops);
8053
8054        trace_eval_init();
8055
8056        trace_create_eval_file(d_tracer);
8057
8058#ifdef CONFIG_MODULES
8059        register_module_notifier(&trace_module_nb);
8060#endif
8061
8062#ifdef CONFIG_DYNAMIC_FTRACE
8063        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8064                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8065#endif
8066
8067        create_trace_instances(d_tracer);
8068
8069        update_tracer_options(&global_trace);
8070
8071        return 0;
8072}
8073
8074static int trace_panic_handler(struct notifier_block *this,
8075                               unsigned long event, void *unused)
8076{
8077        if (ftrace_dump_on_oops)
8078                ftrace_dump(ftrace_dump_on_oops);
8079        return NOTIFY_OK;
8080}
8081
8082static struct notifier_block trace_panic_notifier = {
8083        .notifier_call  = trace_panic_handler,
8084        .next           = NULL,
8085        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8086};
8087
8088static int trace_die_handler(struct notifier_block *self,
8089                             unsigned long val,
8090                             void *data)
8091{
8092        switch (val) {
8093        case DIE_OOPS:
8094                if (ftrace_dump_on_oops)
8095                        ftrace_dump(ftrace_dump_on_oops);
8096                break;
8097        default:
8098                break;
8099        }
8100        return NOTIFY_OK;
8101}
8102
8103static struct notifier_block trace_die_notifier = {
8104        .notifier_call = trace_die_handler,
8105        .priority = 200
8106};
8107
8108/*
8109 * printk is set to max of 1024, we really don't need it that big.
8110 * Nothing should be printing 1000 characters anyway.
8111 */
8112#define TRACE_MAX_PRINT         1000
8113
8114/*
8115 * Define here KERN_TRACE so that we have one place to modify
8116 * it if we decide to change what log level the ftrace dump
8117 * should be at.
8118 */
8119#define KERN_TRACE              KERN_EMERG
8120
8121void
8122trace_printk_seq(struct trace_seq *s)
8123{
8124        /* Probably should print a warning here. */
8125        if (s->seq.len >= TRACE_MAX_PRINT)
8126                s->seq.len = TRACE_MAX_PRINT;
8127
8128        /*
8129         * More paranoid code. Although the buffer size is set to
8130         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8131         * an extra layer of protection.
8132         */
8133        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8134                s->seq.len = s->seq.size - 1;
8135
8136        /* should be zero ended, but we are paranoid. */
8137        s->buffer[s->seq.len] = 0;
8138
8139        printk(KERN_TRACE "%s", s->buffer);
8140
8141        trace_seq_init(s);
8142}
8143
8144void trace_init_global_iter(struct trace_iterator *iter)
8145{
8146        iter->tr = &global_trace;
8147        iter->trace = iter->tr->current_trace;
8148        iter->cpu_file = RING_BUFFER_ALL_CPUS;
8149        iter->trace_buffer = &global_trace.trace_buffer;
8150
8151        if (iter->trace && iter->trace->open)
8152                iter->trace->open(iter);
8153
8154        /* Annotate start of buffers if we had overruns */
8155        if (ring_buffer_overruns(iter->trace_buffer->buffer))
8156                iter->iter_flags |= TRACE_FILE_ANNOTATE;
8157
8158        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8159        if (trace_clocks[iter->tr->clock_id].in_ns)
8160                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8161}
8162
8163void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8164{
8165        /* use static because iter can be a bit big for the stack */
8166        static struct trace_iterator iter;
8167        static atomic_t dump_running;
8168        struct trace_array *tr = &global_trace;
8169        unsigned int old_userobj;
8170        unsigned long flags;
8171        int cnt = 0, cpu;
8172
8173        /* Only allow one dump user at a time. */
8174        if (atomic_inc_return(&dump_running) != 1) {
8175                atomic_dec(&dump_running);
8176                return;
8177        }
8178
8179        /*
8180         * Always turn off tracing when we dump.
8181         * We don't need to show trace output of what happens
8182         * between multiple crashes.
8183         *
8184         * If the user does a sysrq-z, then they can re-enable
8185         * tracing with echo 1 > tracing_on.
8186         */
8187        tracing_off();
8188
8189        local_irq_save(flags);
8190
8191        /* Simulate the iterator */
8192        trace_init_global_iter(&iter);
8193
8194        for_each_tracing_cpu(cpu) {
8195                atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8196        }
8197
8198        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8199
8200        /* don't look at user memory in panic mode */
8201        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8202
8203        switch (oops_dump_mode) {
8204        case DUMP_ALL:
8205                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8206                break;
8207        case DUMP_ORIG:
8208                iter.cpu_file = raw_smp_processor_id();
8209                break;
8210        case DUMP_NONE:
8211                goto out_enable;
8212        default:
8213                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8214                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8215        }
8216
8217        printk(KERN_TRACE "Dumping ftrace buffer:\n");
8218
8219        /* Did function tracer already get disabled? */
8220        if (ftrace_is_dead()) {
8221                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8222                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8223        }
8224
8225        /*
8226         * We need to stop all tracing on all CPUS to read the
8227         * the next buffer. This is a bit expensive, but is
8228         * not done often. We fill all what we can read,
8229         * and then release the locks again.
8230         */
8231
8232        while (!trace_empty(&iter)) {
8233
8234                if (!cnt)
8235                        printk(KERN_TRACE "---------------------------------\n");
8236
8237                cnt++;
8238
8239                /* reset all but tr, trace, and overruns */
8240                memset(&iter.seq, 0,
8241                       sizeof(struct trace_iterator) -
8242                       offsetof(struct trace_iterator, seq));
8243                iter.iter_flags |= TRACE_FILE_LAT_FMT;
8244                iter.pos = -1;
8245
8246                if (trace_find_next_entry_inc(&iter) != NULL) {
8247                        int ret;
8248
8249                        ret = print_trace_line(&iter);
8250                        if (ret != TRACE_TYPE_NO_CONSUME)
8251                                trace_consume(&iter);
8252                }
8253                touch_nmi_watchdog();
8254
8255                trace_printk_seq(&iter.seq);
8256        }
8257
8258        if (!cnt)
8259                printk(KERN_TRACE "   (ftrace buffer empty)\n");
8260        else
8261                printk(KERN_TRACE "---------------------------------\n");
8262
8263 out_enable:
8264        tr->trace_flags |= old_userobj;
8265
8266        for_each_tracing_cpu(cpu) {
8267                atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8268        }
8269        atomic_dec(&dump_running);
8270        local_irq_restore(flags);
8271}
8272EXPORT_SYMBOL_GPL(ftrace_dump);
8273
8274int trace_run_command(const char *buf, int (*createfn)(int, char **))
8275{
8276        char **argv;
8277        int argc, ret;
8278
8279        argc = 0;
8280        ret = 0;
8281        argv = argv_split(GFP_KERNEL, buf, &argc);
8282        if (!argv)
8283                return -ENOMEM;
8284
8285        if (argc)
8286                ret = createfn(argc, argv);
8287
8288        argv_free(argv);
8289
8290        return ret;
8291}
8292
8293#define WRITE_BUFSIZE  4096
8294
8295ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8296                                size_t count, loff_t *ppos,
8297                                int (*createfn)(int, char **))
8298{
8299        char *kbuf, *buf, *tmp;
8300        int ret = 0;
8301        size_t done = 0;
8302        size_t size;
8303
8304        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8305        if (!kbuf)
8306                return -ENOMEM;
8307
8308        while (done < count) {
8309                size = count - done;
8310
8311                if (size >= WRITE_BUFSIZE)
8312                        size = WRITE_BUFSIZE - 1;
8313
8314                if (copy_from_user(kbuf, buffer + done, size)) {
8315                        ret = -EFAULT;
8316                        goto out;
8317                }
8318                kbuf[size] = '\0';
8319                buf = kbuf;
8320                do {
8321                        tmp = strchr(buf, '\n');
8322                        if (tmp) {
8323                                *tmp = '\0';
8324                                size = tmp - buf + 1;
8325                        } else {
8326                                size = strlen(buf);
8327                                if (done + size < count) {
8328                                        if (buf != kbuf)
8329                                                break;
8330                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8331                                        pr_warn("Line length is too long: Should be less than %d\n",
8332                                                WRITE_BUFSIZE - 2);
8333                                        ret = -EINVAL;
8334                                        goto out;
8335                                }
8336                        }
8337                        done += size;
8338
8339                        /* Remove comments */
8340                        tmp = strchr(buf, '#');
8341
8342                        if (tmp)
8343                                *tmp = '\0';
8344
8345                        ret = trace_run_command(buf, createfn);
8346                        if (ret)
8347                                goto out;
8348                        buf += size;
8349
8350                } while (done < count);
8351        }
8352        ret = done;
8353
8354out:
8355        kfree(kbuf);
8356
8357        return ret;
8358}
8359
8360__init static int tracer_alloc_buffers(void)
8361{
8362        int ring_buf_size;
8363        int ret = -ENOMEM;
8364
8365        /*
8366         * Make sure we don't accidently add more trace options
8367         * than we have bits for.
8368         */
8369        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8370
8371        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8372                goto out;
8373
8374        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8375                goto out_free_buffer_mask;
8376
8377        /* Only allocate trace_printk buffers if a trace_printk exists */
8378        if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8379                /* Must be called before global_trace.buffer is allocated */
8380                trace_printk_init_buffers();
8381
8382        /* To save memory, keep the ring buffer size to its minimum */
8383        if (ring_buffer_expanded)
8384                ring_buf_size = trace_buf_size;
8385        else
8386                ring_buf_size = 1;
8387
8388        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8389        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8390
8391        raw_spin_lock_init(&global_trace.start_lock);
8392
8393        /*
8394         * The prepare callbacks allocates some memory for the ring buffer. We
8395         * don't free the buffer if the if the CPU goes down. If we were to free
8396         * the buffer, then the user would lose any trace that was in the
8397         * buffer. The memory will be removed once the "instance" is removed.
8398         */
8399        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8400                                      "trace/RB:preapre", trace_rb_cpu_prepare,
8401                                      NULL);
8402        if (ret < 0)
8403                goto out_free_cpumask;
8404        /* Used for event triggers */
8405        ret = -ENOMEM;
8406        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8407        if (!temp_buffer)
8408                goto out_rm_hp_state;
8409
8410        if (trace_create_savedcmd() < 0)
8411                goto out_free_temp_buffer;
8412
8413        /* TODO: make the number of buffers hot pluggable with CPUS */
8414        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8415                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8416                WARN_ON(1);
8417                goto out_free_savedcmd;
8418        }
8419
8420        if (global_trace.buffer_disabled)
8421                tracing_off();
8422
8423        if (trace_boot_clock) {
8424                ret = tracing_set_clock(&global_trace, trace_boot_clock);
8425                if (ret < 0)
8426                        pr_warn("Trace clock %s not defined, going back to default\n",
8427                                trace_boot_clock);
8428        }
8429
8430        /*
8431         * register_tracer() might reference current_trace, so it
8432         * needs to be set before we register anything. This is
8433         * just a bootstrap of current_trace anyway.
8434         */
8435        global_trace.current_trace = &nop_trace;
8436
8437        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8438
8439        ftrace_init_global_array_ops(&global_trace);
8440
8441        init_trace_flags_index(&global_trace);
8442
8443        register_tracer(&nop_trace);
8444
8445        /* Function tracing may start here (via kernel command line) */
8446        init_function_trace();
8447
8448        /* All seems OK, enable tracing */
8449        tracing_disabled = 0;
8450
8451        atomic_notifier_chain_register(&panic_notifier_list,
8452                                       &trace_panic_notifier);
8453
8454        register_die_notifier(&trace_die_notifier);
8455
8456        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8457
8458        INIT_LIST_HEAD(&global_trace.systems);
8459        INIT_LIST_HEAD(&global_trace.events);
8460        list_add(&global_trace.list, &ftrace_trace_arrays);
8461
8462        apply_trace_boot_options();
8463
8464        register_snapshot_cmd();
8465
8466        return 0;
8467
8468out_free_savedcmd:
8469        free_saved_cmdlines_buffer(savedcmd);
8470out_free_temp_buffer:
8471        ring_buffer_free(temp_buffer);
8472out_rm_hp_state:
8473        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8474out_free_cpumask:
8475        free_cpumask_var(global_trace.tracing_cpumask);
8476out_free_buffer_mask:
8477        free_cpumask_var(tracing_buffer_mask);
8478out:
8479        return ret;
8480}
8481
8482void __init early_trace_init(void)
8483{
8484        if (tracepoint_printk) {
8485                tracepoint_print_iter =
8486                        kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8487                if (WARN_ON(!tracepoint_print_iter))
8488                        tracepoint_printk = 0;
8489                else
8490                        static_key_enable(&tracepoint_printk_key.key);
8491        }
8492        tracer_alloc_buffers();
8493}
8494
8495void __init trace_init(void)
8496{
8497        trace_event_init();
8498}
8499
8500__init static int clear_boot_tracer(void)
8501{
8502        /*
8503         * The default tracer at boot buffer is an init section.
8504         * This function is called in lateinit. If we did not
8505         * find the boot tracer, then clear it out, to prevent
8506         * later registration from accessing the buffer that is
8507         * about to be freed.
8508         */
8509        if (!default_bootup_tracer)
8510                return 0;
8511
8512        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8513               default_bootup_tracer);
8514        default_bootup_tracer = NULL;
8515
8516        return 0;
8517}
8518
8519fs_initcall(tracer_init_tracefs);
8520late_initcall_sync(clear_boot_tracer);
8521