linux/kernel/trace/trace.c
<<
>>
Prefs
   1/*
   2 * ring buffer based function tracer
   3 *
   4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   6 *
   7 * Originally taken from the RT patch by:
   8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
   9 *
  10 * Based on code from the latency_tracer, that is:
  11 *  Copyright (C) 2004-2006 Ingo Molnar
  12 *  Copyright (C) 2004 Nadia Yvette Chambers
  13 */
  14#include <linux/ring_buffer.h>
  15#include <generated/utsrelease.h>
  16#include <linux/stacktrace.h>
  17#include <linux/writeback.h>
  18#include <linux/kallsyms.h>
  19#include <linux/seq_file.h>
  20#include <linux/notifier.h>
  21#include <linux/irqflags.h>
  22#include <linux/debugfs.h>
  23#include <linux/tracefs.h>
  24#include <linux/pagemap.h>
  25#include <linux/hardirq.h>
  26#include <linux/linkage.h>
  27#include <linux/uaccess.h>
  28#include <linux/vmalloc.h>
  29#include <linux/ftrace.h>
  30#include <linux/module.h>
  31#include <linux/percpu.h>
  32#include <linux/splice.h>
  33#include <linux/kdebug.h>
  34#include <linux/string.h>
  35#include <linux/mount.h>
  36#include <linux/rwsem.h>
  37#include <linux/slab.h>
  38#include <linux/ctype.h>
  39#include <linux/init.h>
  40#include <linux/poll.h>
  41#include <linux/nmi.h>
  42#include <linux/fs.h>
  43#include <linux/sched/rt.h>
  44
  45#include "trace.h"
  46#include "trace_output.h"
  47
  48/*
  49 * On boot up, the ring buffer is set to the minimum size, so that
  50 * we do not waste memory on systems that are not using tracing.
  51 */
  52bool ring_buffer_expanded;
  53
  54/*
  55 * We need to change this state when a selftest is running.
  56 * A selftest will lurk into the ring-buffer to count the
  57 * entries inserted during the selftest although some concurrent
  58 * insertions into the ring-buffer such as trace_printk could occurred
  59 * at the same time, giving false positive or negative results.
  60 */
  61static bool __read_mostly tracing_selftest_running;
  62
  63/*
  64 * If a tracer is running, we do not want to run SELFTEST.
  65 */
  66bool __read_mostly tracing_selftest_disabled;
  67
  68/* Pipe tracepoints to printk */
  69struct trace_iterator *tracepoint_print_iter;
  70int tracepoint_printk;
  71
  72/* For tracers that don't implement custom flags */
  73static struct tracer_opt dummy_tracer_opt[] = {
  74        { }
  75};
  76
  77static int
  78dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  79{
  80        return 0;
  81}
  82
  83/*
  84 * To prevent the comm cache from being overwritten when no
  85 * tracing is active, only save the comm when a trace event
  86 * occurred.
  87 */
  88static DEFINE_PER_CPU(bool, trace_cmdline_save);
  89
  90/*
  91 * Kill all tracing for good (never come back).
  92 * It is initialized to 1 but will turn to zero if the initialization
  93 * of the tracer is successful. But that is the only place that sets
  94 * this back to zero.
  95 */
  96static int tracing_disabled = 1;
  97
  98cpumask_var_t __read_mostly     tracing_buffer_mask;
  99
 100/*
 101 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 102 *
 103 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 104 * is set, then ftrace_dump is called. This will output the contents
 105 * of the ftrace buffers to the console.  This is very useful for
 106 * capturing traces that lead to crashes and outputing it to a
 107 * serial console.
 108 *
 109 * It is default off, but you can enable it with either specifying
 110 * "ftrace_dump_on_oops" in the kernel command line, or setting
 111 * /proc/sys/kernel/ftrace_dump_on_oops
 112 * Set 1 if you want to dump buffers of all CPUs
 113 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 114 */
 115
 116enum ftrace_dump_mode ftrace_dump_on_oops;
 117
 118/* When set, tracing will stop when a WARN*() is hit */
 119int __disable_trace_on_warning;
 120
 121#ifdef CONFIG_TRACE_ENUM_MAP_FILE
 122/* Map of enums to their values, for "enum_map" file */
 123struct trace_enum_map_head {
 124        struct module                   *mod;
 125        unsigned long                   length;
 126};
 127
 128union trace_enum_map_item;
 129
 130struct trace_enum_map_tail {
 131        /*
 132         * "end" is first and points to NULL as it must be different
 133         * than "mod" or "enum_string"
 134         */
 135        union trace_enum_map_item       *next;
 136        const char                      *end;   /* points to NULL */
 137};
 138
 139static DEFINE_MUTEX(trace_enum_mutex);
 140
 141/*
 142 * The trace_enum_maps are saved in an array with two extra elements,
 143 * one at the beginning, and one at the end. The beginning item contains
 144 * the count of the saved maps (head.length), and the module they
 145 * belong to if not built in (head.mod). The ending item contains a
 146 * pointer to the next array of saved enum_map items.
 147 */
 148union trace_enum_map_item {
 149        struct trace_enum_map           map;
 150        struct trace_enum_map_head      head;
 151        struct trace_enum_map_tail      tail;
 152};
 153
 154static union trace_enum_map_item *trace_enum_maps;
 155#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
 156
 157static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 158
 159#define MAX_TRACER_SIZE         100
 160static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 161static char *default_bootup_tracer;
 162
 163static bool allocate_snapshot;
 164
 165static int __init set_cmdline_ftrace(char *str)
 166{
 167        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 168        default_bootup_tracer = bootup_tracer_buf;
 169        /* We are using ftrace early, expand it */
 170        ring_buffer_expanded = true;
 171        return 1;
 172}
 173__setup("ftrace=", set_cmdline_ftrace);
 174
 175static int __init set_ftrace_dump_on_oops(char *str)
 176{
 177        if (*str++ != '=' || !*str) {
 178                ftrace_dump_on_oops = DUMP_ALL;
 179                return 1;
 180        }
 181
 182        if (!strcmp("orig_cpu", str)) {
 183                ftrace_dump_on_oops = DUMP_ORIG;
 184                return 1;
 185        }
 186
 187        return 0;
 188}
 189__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 190
 191static int __init stop_trace_on_warning(char *str)
 192{
 193        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 194                __disable_trace_on_warning = 1;
 195        return 1;
 196}
 197__setup("traceoff_on_warning", stop_trace_on_warning);
 198
 199static int __init boot_alloc_snapshot(char *str)
 200{
 201        allocate_snapshot = true;
 202        /* We also need the main ring buffer expanded */
 203        ring_buffer_expanded = true;
 204        return 1;
 205}
 206__setup("alloc_snapshot", boot_alloc_snapshot);
 207
 208
 209static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 210
 211static int __init set_trace_boot_options(char *str)
 212{
 213        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 214        return 0;
 215}
 216__setup("trace_options=", set_trace_boot_options);
 217
 218static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 219static char *trace_boot_clock __initdata;
 220
 221static int __init set_trace_boot_clock(char *str)
 222{
 223        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 224        trace_boot_clock = trace_boot_clock_buf;
 225        return 0;
 226}
 227__setup("trace_clock=", set_trace_boot_clock);
 228
 229static int __init set_tracepoint_printk(char *str)
 230{
 231        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 232                tracepoint_printk = 1;
 233        return 1;
 234}
 235__setup("tp_printk", set_tracepoint_printk);
 236
 237unsigned long long ns2usecs(cycle_t nsec)
 238{
 239        nsec += 500;
 240        do_div(nsec, 1000);
 241        return nsec;
 242}
 243
 244/* trace_flags holds trace_options default values */
 245#define TRACE_DEFAULT_FLAGS                                             \
 246        (FUNCTION_DEFAULT_FLAGS |                                       \
 247         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 248         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 249         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 250         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
 251
 252/* trace_options that are only supported by global_trace */
 253#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 254               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 255
 256/* trace_flags that are default zero for instances */
 257#define ZEROED_TRACE_FLAGS \
 258        TRACE_ITER_EVENT_FORK
 259
 260/*
 261 * The global_trace is the descriptor that holds the tracing
 262 * buffers for the live tracing. For each CPU, it contains
 263 * a link list of pages that will store trace entries. The
 264 * page descriptor of the pages in the memory is used to hold
 265 * the link list by linking the lru item in the page descriptor
 266 * to each of the pages in the buffer per CPU.
 267 *
 268 * For each active CPU there is a data field that holds the
 269 * pages for the buffer for that CPU. Each CPU has the same number
 270 * of pages allocated for its buffer.
 271 */
 272static struct trace_array global_trace = {
 273        .trace_flags = TRACE_DEFAULT_FLAGS,
 274};
 275
 276LIST_HEAD(ftrace_trace_arrays);
 277
 278int trace_array_get(struct trace_array *this_tr)
 279{
 280        struct trace_array *tr;
 281        int ret = -ENODEV;
 282
 283        mutex_lock(&trace_types_lock);
 284        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 285                if (tr == this_tr) {
 286                        tr->ref++;
 287                        ret = 0;
 288                        break;
 289                }
 290        }
 291        mutex_unlock(&trace_types_lock);
 292
 293        return ret;
 294}
 295
 296static void __trace_array_put(struct trace_array *this_tr)
 297{
 298        WARN_ON(!this_tr->ref);
 299        this_tr->ref--;
 300}
 301
 302void trace_array_put(struct trace_array *this_tr)
 303{
 304        mutex_lock(&trace_types_lock);
 305        __trace_array_put(this_tr);
 306        mutex_unlock(&trace_types_lock);
 307}
 308
 309int call_filter_check_discard(struct trace_event_call *call, void *rec,
 310                              struct ring_buffer *buffer,
 311                              struct ring_buffer_event *event)
 312{
 313        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 314            !filter_match_preds(call->filter, rec)) {
 315                __trace_event_discard_commit(buffer, event);
 316                return 1;
 317        }
 318
 319        return 0;
 320}
 321
 322void trace_free_pid_list(struct trace_pid_list *pid_list)
 323{
 324        vfree(pid_list->pids);
 325        kfree(pid_list);
 326}
 327
 328/**
 329 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 330 * @filtered_pids: The list of pids to check
 331 * @search_pid: The PID to find in @filtered_pids
 332 *
 333 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
 334 */
 335bool
 336trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 337{
 338        /*
 339         * If pid_max changed after filtered_pids was created, we
 340         * by default ignore all pids greater than the previous pid_max.
 341         */
 342        if (search_pid >= filtered_pids->pid_max)
 343                return false;
 344
 345        return test_bit(search_pid, filtered_pids->pids);
 346}
 347
 348/**
 349 * trace_ignore_this_task - should a task be ignored for tracing
 350 * @filtered_pids: The list of pids to check
 351 * @task: The task that should be ignored if not filtered
 352 *
 353 * Checks if @task should be traced or not from @filtered_pids.
 354 * Returns true if @task should *NOT* be traced.
 355 * Returns false if @task should be traced.
 356 */
 357bool
 358trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 359{
 360        /*
 361         * Return false, because if filtered_pids does not exist,
 362         * all pids are good to trace.
 363         */
 364        if (!filtered_pids)
 365                return false;
 366
 367        return !trace_find_filtered_pid(filtered_pids, task->pid);
 368}
 369
 370/**
 371 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
 372 * @pid_list: The list to modify
 373 * @self: The current task for fork or NULL for exit
 374 * @task: The task to add or remove
 375 *
 376 * If adding a task, if @self is defined, the task is only added if @self
 377 * is also included in @pid_list. This happens on fork and tasks should
 378 * only be added when the parent is listed. If @self is NULL, then the
 379 * @task pid will be removed from the list, which would happen on exit
 380 * of a task.
 381 */
 382void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 383                                  struct task_struct *self,
 384                                  struct task_struct *task)
 385{
 386        if (!pid_list)
 387                return;
 388
 389        /* For forks, we only add if the forking task is listed */
 390        if (self) {
 391                if (!trace_find_filtered_pid(pid_list, self->pid))
 392                        return;
 393        }
 394
 395        /* Sorry, but we don't support pid_max changing after setting */
 396        if (task->pid >= pid_list->pid_max)
 397                return;
 398
 399        /* "self" is set for forks, and NULL for exits */
 400        if (self)
 401                set_bit(task->pid, pid_list->pids);
 402        else
 403                clear_bit(task->pid, pid_list->pids);
 404}
 405
 406/**
 407 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 408 * @pid_list: The pid list to show
 409 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 410 * @pos: The position of the file
 411 *
 412 * This is used by the seq_file "next" operation to iterate the pids
 413 * listed in a trace_pid_list structure.
 414 *
 415 * Returns the pid+1 as we want to display pid of zero, but NULL would
 416 * stop the iteration.
 417 */
 418void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 419{
 420        unsigned long pid = (unsigned long)v;
 421
 422        (*pos)++;
 423
 424        /* pid already is +1 of the actual prevous bit */
 425        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 426
 427        /* Return pid + 1 to allow zero to be represented */
 428        if (pid < pid_list->pid_max)
 429                return (void *)(pid + 1);
 430
 431        return NULL;
 432}
 433
 434/**
 435 * trace_pid_start - Used for seq_file to start reading pid lists
 436 * @pid_list: The pid list to show
 437 * @pos: The position of the file
 438 *
 439 * This is used by seq_file "start" operation to start the iteration
 440 * of listing pids.
 441 *
 442 * Returns the pid+1 as we want to display pid of zero, but NULL would
 443 * stop the iteration.
 444 */
 445void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 446{
 447        unsigned long pid;
 448        loff_t l = 0;
 449
 450        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 451        if (pid >= pid_list->pid_max)
 452                return NULL;
 453
 454        /* Return pid + 1 so that zero can be the exit value */
 455        for (pid++; pid && l < *pos;
 456             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 457                ;
 458        return (void *)pid;
 459}
 460
 461/**
 462 * trace_pid_show - show the current pid in seq_file processing
 463 * @m: The seq_file structure to write into
 464 * @v: A void pointer of the pid (+1) value to display
 465 *
 466 * Can be directly used by seq_file operations to display the current
 467 * pid value.
 468 */
 469int trace_pid_show(struct seq_file *m, void *v)
 470{
 471        unsigned long pid = (unsigned long)v - 1;
 472
 473        seq_printf(m, "%lu\n", pid);
 474        return 0;
 475}
 476
 477/* 128 should be much more than enough */
 478#define PID_BUF_SIZE            127
 479
 480int trace_pid_write(struct trace_pid_list *filtered_pids,
 481                    struct trace_pid_list **new_pid_list,
 482                    const char __user *ubuf, size_t cnt)
 483{
 484        struct trace_pid_list *pid_list;
 485        struct trace_parser parser;
 486        unsigned long val;
 487        int nr_pids = 0;
 488        ssize_t read = 0;
 489        ssize_t ret = 0;
 490        loff_t pos;
 491        pid_t pid;
 492
 493        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 494                return -ENOMEM;
 495
 496        /*
 497         * Always recreate a new array. The write is an all or nothing
 498         * operation. Always create a new array when adding new pids by
 499         * the user. If the operation fails, then the current list is
 500         * not modified.
 501         */
 502        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 503        if (!pid_list)
 504                return -ENOMEM;
 505
 506        pid_list->pid_max = READ_ONCE(pid_max);
 507
 508        /* Only truncating will shrink pid_max */
 509        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 510                pid_list->pid_max = filtered_pids->pid_max;
 511
 512        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 513        if (!pid_list->pids) {
 514                kfree(pid_list);
 515                return -ENOMEM;
 516        }
 517
 518        if (filtered_pids) {
 519                /* copy the current bits to the new max */
 520                for_each_set_bit(pid, filtered_pids->pids,
 521                                 filtered_pids->pid_max) {
 522                        set_bit(pid, pid_list->pids);
 523                        nr_pids++;
 524                }
 525        }
 526
 527        while (cnt > 0) {
 528
 529                pos = 0;
 530
 531                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 532                if (ret < 0 || !trace_parser_loaded(&parser))
 533                        break;
 534
 535                read += ret;
 536                ubuf += ret;
 537                cnt -= ret;
 538
 539                parser.buffer[parser.idx] = 0;
 540
 541                ret = -EINVAL;
 542                if (kstrtoul(parser.buffer, 0, &val))
 543                        break;
 544                if (val >= pid_list->pid_max)
 545                        break;
 546
 547                pid = (pid_t)val;
 548
 549                set_bit(pid, pid_list->pids);
 550                nr_pids++;
 551
 552                trace_parser_clear(&parser);
 553                ret = 0;
 554        }
 555        trace_parser_put(&parser);
 556
 557        if (ret < 0) {
 558                trace_free_pid_list(pid_list);
 559                return ret;
 560        }
 561
 562        if (!nr_pids) {
 563                /* Cleared the list of pids */
 564                trace_free_pid_list(pid_list);
 565                read = ret;
 566                pid_list = NULL;
 567        }
 568
 569        *new_pid_list = pid_list;
 570
 571        return read;
 572}
 573
 574static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 575{
 576        u64 ts;
 577
 578        /* Early boot up does not have a buffer yet */
 579        if (!buf->buffer)
 580                return trace_clock_local();
 581
 582        ts = ring_buffer_time_stamp(buf->buffer, cpu);
 583        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 584
 585        return ts;
 586}
 587
 588cycle_t ftrace_now(int cpu)
 589{
 590        return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
 591}
 592
 593/**
 594 * tracing_is_enabled - Show if global_trace has been disabled
 595 *
 596 * Shows if the global trace has been enabled or not. It uses the
 597 * mirror flag "buffer_disabled" to be used in fast paths such as for
 598 * the irqsoff tracer. But it may be inaccurate due to races. If you
 599 * need to know the accurate state, use tracing_is_on() which is a little
 600 * slower, but accurate.
 601 */
 602int tracing_is_enabled(void)
 603{
 604        /*
 605         * For quick access (irqsoff uses this in fast path), just
 606         * return the mirror variable of the state of the ring buffer.
 607         * It's a little racy, but we don't really care.
 608         */
 609        smp_rmb();
 610        return !global_trace.buffer_disabled;
 611}
 612
 613/*
 614 * trace_buf_size is the size in bytes that is allocated
 615 * for a buffer. Note, the number of bytes is always rounded
 616 * to page size.
 617 *
 618 * This number is purposely set to a low number of 16384.
 619 * If the dump on oops happens, it will be much appreciated
 620 * to not have to wait for all that output. Anyway this can be
 621 * boot time and run time configurable.
 622 */
 623#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 624
 625static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 626
 627/* trace_types holds a link list of available tracers. */
 628static struct tracer            *trace_types __read_mostly;
 629
 630/*
 631 * trace_types_lock is used to protect the trace_types list.
 632 */
 633DEFINE_MUTEX(trace_types_lock);
 634
 635/*
 636 * serialize the access of the ring buffer
 637 *
 638 * ring buffer serializes readers, but it is low level protection.
 639 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 640 * are not protected by ring buffer.
 641 *
 642 * The content of events may become garbage if we allow other process consumes
 643 * these events concurrently:
 644 *   A) the page of the consumed events may become a normal page
 645 *      (not reader page) in ring buffer, and this page will be rewrited
 646 *      by events producer.
 647 *   B) The page of the consumed events may become a page for splice_read,
 648 *      and this page will be returned to system.
 649 *
 650 * These primitives allow multi process access to different cpu ring buffer
 651 * concurrently.
 652 *
 653 * These primitives don't distinguish read-only and read-consume access.
 654 * Multi read-only access are also serialized.
 655 */
 656
 657#ifdef CONFIG_SMP
 658static DECLARE_RWSEM(all_cpu_access_lock);
 659static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 660
 661static inline void trace_access_lock(int cpu)
 662{
 663        if (cpu == RING_BUFFER_ALL_CPUS) {
 664                /* gain it for accessing the whole ring buffer. */
 665                down_write(&all_cpu_access_lock);
 666        } else {
 667                /* gain it for accessing a cpu ring buffer. */
 668
 669                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 670                down_read(&all_cpu_access_lock);
 671
 672                /* Secondly block other access to this @cpu ring buffer. */
 673                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 674        }
 675}
 676
 677static inline void trace_access_unlock(int cpu)
 678{
 679        if (cpu == RING_BUFFER_ALL_CPUS) {
 680                up_write(&all_cpu_access_lock);
 681        } else {
 682                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 683                up_read(&all_cpu_access_lock);
 684        }
 685}
 686
 687static inline void trace_access_lock_init(void)
 688{
 689        int cpu;
 690
 691        for_each_possible_cpu(cpu)
 692                mutex_init(&per_cpu(cpu_access_lock, cpu));
 693}
 694
 695#else
 696
 697static DEFINE_MUTEX(access_lock);
 698
 699static inline void trace_access_lock(int cpu)
 700{
 701        (void)cpu;
 702        mutex_lock(&access_lock);
 703}
 704
 705static inline void trace_access_unlock(int cpu)
 706{
 707        (void)cpu;
 708        mutex_unlock(&access_lock);
 709}
 710
 711static inline void trace_access_lock_init(void)
 712{
 713}
 714
 715#endif
 716
 717#ifdef CONFIG_STACKTRACE
 718static void __ftrace_trace_stack(struct ring_buffer *buffer,
 719                                 unsigned long flags,
 720                                 int skip, int pc, struct pt_regs *regs);
 721static inline void ftrace_trace_stack(struct trace_array *tr,
 722                                      struct ring_buffer *buffer,
 723                                      unsigned long flags,
 724                                      int skip, int pc, struct pt_regs *regs);
 725
 726#else
 727static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
 728                                        unsigned long flags,
 729                                        int skip, int pc, struct pt_regs *regs)
 730{
 731}
 732static inline void ftrace_trace_stack(struct trace_array *tr,
 733                                      struct ring_buffer *buffer,
 734                                      unsigned long flags,
 735                                      int skip, int pc, struct pt_regs *regs)
 736{
 737}
 738
 739#endif
 740
 741static void tracer_tracing_on(struct trace_array *tr)
 742{
 743        if (tr->trace_buffer.buffer)
 744                ring_buffer_record_on(tr->trace_buffer.buffer);
 745        /*
 746         * This flag is looked at when buffers haven't been allocated
 747         * yet, or by some tracers (like irqsoff), that just want to
 748         * know if the ring buffer has been disabled, but it can handle
 749         * races of where it gets disabled but we still do a record.
 750         * As the check is in the fast path of the tracers, it is more
 751         * important to be fast than accurate.
 752         */
 753        tr->buffer_disabled = 0;
 754        /* Make the flag seen by readers */
 755        smp_wmb();
 756}
 757
 758/**
 759 * tracing_on - enable tracing buffers
 760 *
 761 * This function enables tracing buffers that may have been
 762 * disabled with tracing_off.
 763 */
 764void tracing_on(void)
 765{
 766        tracer_tracing_on(&global_trace);
 767}
 768EXPORT_SYMBOL_GPL(tracing_on);
 769
 770/**
 771 * __trace_puts - write a constant string into the trace buffer.
 772 * @ip:    The address of the caller
 773 * @str:   The constant string to write
 774 * @size:  The size of the string.
 775 */
 776int __trace_puts(unsigned long ip, const char *str, int size)
 777{
 778        struct ring_buffer_event *event;
 779        struct ring_buffer *buffer;
 780        struct print_entry *entry;
 781        unsigned long irq_flags;
 782        int alloc;
 783        int pc;
 784
 785        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 786                return 0;
 787
 788        pc = preempt_count();
 789
 790        if (unlikely(tracing_selftest_running || tracing_disabled))
 791                return 0;
 792
 793        alloc = sizeof(*entry) + size + 2; /* possible \n added */
 794
 795        local_save_flags(irq_flags);
 796        buffer = global_trace.trace_buffer.buffer;
 797        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
 798                                          irq_flags, pc);
 799        if (!event)
 800                return 0;
 801
 802        entry = ring_buffer_event_data(event);
 803        entry->ip = ip;
 804
 805        memcpy(&entry->buf, str, size);
 806
 807        /* Add a newline if necessary */
 808        if (entry->buf[size - 1] != '\n') {
 809                entry->buf[size] = '\n';
 810                entry->buf[size + 1] = '\0';
 811        } else
 812                entry->buf[size] = '\0';
 813
 814        __buffer_unlock_commit(buffer, event);
 815        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 816
 817        return size;
 818}
 819EXPORT_SYMBOL_GPL(__trace_puts);
 820
 821/**
 822 * __trace_bputs - write the pointer to a constant string into trace buffer
 823 * @ip:    The address of the caller
 824 * @str:   The constant string to write to the buffer to
 825 */
 826int __trace_bputs(unsigned long ip, const char *str)
 827{
 828        struct ring_buffer_event *event;
 829        struct ring_buffer *buffer;
 830        struct bputs_entry *entry;
 831        unsigned long irq_flags;
 832        int size = sizeof(struct bputs_entry);
 833        int pc;
 834
 835        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 836                return 0;
 837
 838        pc = preempt_count();
 839
 840        if (unlikely(tracing_selftest_running || tracing_disabled))
 841                return 0;
 842
 843        local_save_flags(irq_flags);
 844        buffer = global_trace.trace_buffer.buffer;
 845        event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
 846                                          irq_flags, pc);
 847        if (!event)
 848                return 0;
 849
 850        entry = ring_buffer_event_data(event);
 851        entry->ip                       = ip;
 852        entry->str                      = str;
 853
 854        __buffer_unlock_commit(buffer, event);
 855        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 856
 857        return 1;
 858}
 859EXPORT_SYMBOL_GPL(__trace_bputs);
 860
 861#ifdef CONFIG_TRACER_SNAPSHOT
 862/**
 863 * trace_snapshot - take a snapshot of the current buffer.
 864 *
 865 * This causes a swap between the snapshot buffer and the current live
 866 * tracing buffer. You can use this to take snapshots of the live
 867 * trace when some condition is triggered, but continue to trace.
 868 *
 869 * Note, make sure to allocate the snapshot with either
 870 * a tracing_snapshot_alloc(), or by doing it manually
 871 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 872 *
 873 * If the snapshot buffer is not allocated, it will stop tracing.
 874 * Basically making a permanent snapshot.
 875 */
 876void tracing_snapshot(void)
 877{
 878        struct trace_array *tr = &global_trace;
 879        struct tracer *tracer = tr->current_trace;
 880        unsigned long flags;
 881
 882        if (in_nmi()) {
 883                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
 884                internal_trace_puts("*** snapshot is being ignored        ***\n");
 885                return;
 886        }
 887
 888        if (!tr->allocated_snapshot) {
 889                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
 890                internal_trace_puts("*** stopping trace here!   ***\n");
 891                tracing_off();
 892                return;
 893        }
 894
 895        /* Note, snapshot can not be used when the tracer uses it */
 896        if (tracer->use_max_tr) {
 897                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
 898                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
 899                return;
 900        }
 901
 902        local_irq_save(flags);
 903        update_max_tr(tr, current, smp_processor_id());
 904        local_irq_restore(flags);
 905}
 906EXPORT_SYMBOL_GPL(tracing_snapshot);
 907
 908static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
 909                                        struct trace_buffer *size_buf, int cpu_id);
 910static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
 911
 912static int alloc_snapshot(struct trace_array *tr)
 913{
 914        int ret;
 915
 916        if (!tr->allocated_snapshot) {
 917
 918                /* allocate spare buffer */
 919                ret = resize_buffer_duplicate_size(&tr->max_buffer,
 920                                   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
 921                if (ret < 0)
 922                        return ret;
 923
 924                tr->allocated_snapshot = true;
 925        }
 926
 927        return 0;
 928}
 929
 930static void free_snapshot(struct trace_array *tr)
 931{
 932        /*
 933         * We don't free the ring buffer. instead, resize it because
 934         * The max_tr ring buffer has some state (e.g. ring->clock) and
 935         * we want preserve it.
 936         */
 937        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
 938        set_buffer_entries(&tr->max_buffer, 1);
 939        tracing_reset_online_cpus(&tr->max_buffer);
 940        tr->allocated_snapshot = false;
 941}
 942
 943/**
 944 * tracing_alloc_snapshot - allocate snapshot buffer.
 945 *
 946 * This only allocates the snapshot buffer if it isn't already
 947 * allocated - it doesn't also take a snapshot.
 948 *
 949 * This is meant to be used in cases where the snapshot buffer needs
 950 * to be set up for events that can't sleep but need to be able to
 951 * trigger a snapshot.
 952 */
 953int tracing_alloc_snapshot(void)
 954{
 955        struct trace_array *tr = &global_trace;
 956        int ret;
 957
 958        ret = alloc_snapshot(tr);
 959        WARN_ON(ret < 0);
 960
 961        return ret;
 962}
 963EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
 964
 965/**
 966 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
 967 *
 968 * This is similar to trace_snapshot(), but it will allocate the
 969 * snapshot buffer if it isn't already allocated. Use this only
 970 * where it is safe to sleep, as the allocation may sleep.
 971 *
 972 * This causes a swap between the snapshot buffer and the current live
 973 * tracing buffer. You can use this to take snapshots of the live
 974 * trace when some condition is triggered, but continue to trace.
 975 */
 976void tracing_snapshot_alloc(void)
 977{
 978        int ret;
 979
 980        ret = tracing_alloc_snapshot();
 981        if (ret < 0)
 982                return;
 983
 984        tracing_snapshot();
 985}
 986EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
 987#else
 988void tracing_snapshot(void)
 989{
 990        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
 991}
 992EXPORT_SYMBOL_GPL(tracing_snapshot);
 993int tracing_alloc_snapshot(void)
 994{
 995        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
 996        return -ENODEV;
 997}
 998EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
 999void tracing_snapshot_alloc(void)
1000{
1001        /* Give warning */
1002        tracing_snapshot();
1003}
1004EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005#endif /* CONFIG_TRACER_SNAPSHOT */
1006
1007static void tracer_tracing_off(struct trace_array *tr)
1008{
1009        if (tr->trace_buffer.buffer)
1010                ring_buffer_record_off(tr->trace_buffer.buffer);
1011        /*
1012         * This flag is looked at when buffers haven't been allocated
1013         * yet, or by some tracers (like irqsoff), that just want to
1014         * know if the ring buffer has been disabled, but it can handle
1015         * races of where it gets disabled but we still do a record.
1016         * As the check is in the fast path of the tracers, it is more
1017         * important to be fast than accurate.
1018         */
1019        tr->buffer_disabled = 1;
1020        /* Make the flag seen by readers */
1021        smp_wmb();
1022}
1023
1024/**
1025 * tracing_off - turn off tracing buffers
1026 *
1027 * This function stops the tracing buffers from recording data.
1028 * It does not disable any overhead the tracers themselves may
1029 * be causing. This function simply causes all recording to
1030 * the ring buffers to fail.
1031 */
1032void tracing_off(void)
1033{
1034        tracer_tracing_off(&global_trace);
1035}
1036EXPORT_SYMBOL_GPL(tracing_off);
1037
1038void disable_trace_on_warning(void)
1039{
1040        if (__disable_trace_on_warning)
1041                tracing_off();
1042}
1043
1044/**
1045 * tracer_tracing_is_on - show real state of ring buffer enabled
1046 * @tr : the trace array to know if ring buffer is enabled
1047 *
1048 * Shows real state of the ring buffer if it is enabled or not.
1049 */
1050static int tracer_tracing_is_on(struct trace_array *tr)
1051{
1052        if (tr->trace_buffer.buffer)
1053                return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054        return !tr->buffer_disabled;
1055}
1056
1057/**
1058 * tracing_is_on - show state of ring buffers enabled
1059 */
1060int tracing_is_on(void)
1061{
1062        return tracer_tracing_is_on(&global_trace);
1063}
1064EXPORT_SYMBOL_GPL(tracing_is_on);
1065
1066static int __init set_buf_size(char *str)
1067{
1068        unsigned long buf_size;
1069
1070        if (!str)
1071                return 0;
1072        buf_size = memparse(str, &str);
1073        /* nr_entries can not be zero */
1074        if (buf_size == 0)
1075                return 0;
1076        trace_buf_size = buf_size;
1077        return 1;
1078}
1079__setup("trace_buf_size=", set_buf_size);
1080
1081static int __init set_tracing_thresh(char *str)
1082{
1083        unsigned long threshold;
1084        int ret;
1085
1086        if (!str)
1087                return 0;
1088        ret = kstrtoul(str, 0, &threshold);
1089        if (ret < 0)
1090                return 0;
1091        tracing_thresh = threshold * 1000;
1092        return 1;
1093}
1094__setup("tracing_thresh=", set_tracing_thresh);
1095
1096unsigned long nsecs_to_usecs(unsigned long nsecs)
1097{
1098        return nsecs / 1000;
1099}
1100
1101/*
1102 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105 * of strings in the order that the enums were defined.
1106 */
1107#undef C
1108#define C(a, b) b
1109
1110/* These must match the bit postions in trace_iterator_flags */
1111static const char *trace_options[] = {
1112        TRACE_FLAGS
1113        NULL
1114};
1115
1116static struct {
1117        u64 (*func)(void);
1118        const char *name;
1119        int in_ns;              /* is this clock in nanoseconds? */
1120} trace_clocks[] = {
1121        { trace_clock_local,            "local",        1 },
1122        { trace_clock_global,           "global",       1 },
1123        { trace_clock_counter,          "counter",      0 },
1124        { trace_clock_jiffies,          "uptime",       0 },
1125        { trace_clock,                  "perf",         1 },
1126        { ktime_get_mono_fast_ns,       "mono",         1 },
1127        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1128        ARCH_TRACE_CLOCKS
1129};
1130
1131/*
1132 * trace_parser_get_init - gets the buffer for trace parser
1133 */
1134int trace_parser_get_init(struct trace_parser *parser, int size)
1135{
1136        memset(parser, 0, sizeof(*parser));
1137
1138        parser->buffer = kmalloc(size, GFP_KERNEL);
1139        if (!parser->buffer)
1140                return 1;
1141
1142        parser->size = size;
1143        return 0;
1144}
1145
1146/*
1147 * trace_parser_put - frees the buffer for trace parser
1148 */
1149void trace_parser_put(struct trace_parser *parser)
1150{
1151        kfree(parser->buffer);
1152}
1153
1154/*
1155 * trace_get_user - reads the user input string separated by  space
1156 * (matched by isspace(ch))
1157 *
1158 * For each string found the 'struct trace_parser' is updated,
1159 * and the function returns.
1160 *
1161 * Returns number of bytes read.
1162 *
1163 * See kernel/trace/trace.h for 'struct trace_parser' details.
1164 */
1165int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166        size_t cnt, loff_t *ppos)
1167{
1168        char ch;
1169        size_t read = 0;
1170        ssize_t ret;
1171
1172        if (!*ppos)
1173                trace_parser_clear(parser);
1174
1175        ret = get_user(ch, ubuf++);
1176        if (ret)
1177                goto out;
1178
1179        read++;
1180        cnt--;
1181
1182        /*
1183         * The parser is not finished with the last write,
1184         * continue reading the user input without skipping spaces.
1185         */
1186        if (!parser->cont) {
1187                /* skip white space */
1188                while (cnt && isspace(ch)) {
1189                        ret = get_user(ch, ubuf++);
1190                        if (ret)
1191                                goto out;
1192                        read++;
1193                        cnt--;
1194                }
1195
1196                /* only spaces were written */
1197                if (isspace(ch)) {
1198                        *ppos += read;
1199                        ret = read;
1200                        goto out;
1201                }
1202
1203                parser->idx = 0;
1204        }
1205
1206        /* read the non-space input */
1207        while (cnt && !isspace(ch)) {
1208                if (parser->idx < parser->size - 1)
1209                        parser->buffer[parser->idx++] = ch;
1210                else {
1211                        ret = -EINVAL;
1212                        goto out;
1213                }
1214                ret = get_user(ch, ubuf++);
1215                if (ret)
1216                        goto out;
1217                read++;
1218                cnt--;
1219        }
1220
1221        /* We either got finished input or we have to wait for another call. */
1222        if (isspace(ch)) {
1223                parser->buffer[parser->idx] = 0;
1224                parser->cont = false;
1225        } else if (parser->idx < parser->size - 1) {
1226                parser->cont = true;
1227                parser->buffer[parser->idx++] = ch;
1228        } else {
1229                ret = -EINVAL;
1230                goto out;
1231        }
1232
1233        *ppos += read;
1234        ret = read;
1235
1236out:
1237        return ret;
1238}
1239
1240/* TODO add a seq_buf_to_buffer() */
1241static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242{
1243        int len;
1244
1245        if (trace_seq_used(s) <= s->seq.readpos)
1246                return -EBUSY;
1247
1248        len = trace_seq_used(s) - s->seq.readpos;
1249        if (cnt > len)
1250                cnt = len;
1251        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252
1253        s->seq.readpos += cnt;
1254        return cnt;
1255}
1256
1257unsigned long __read_mostly     tracing_thresh;
1258
1259#ifdef CONFIG_TRACER_MAX_TRACE
1260/*
1261 * Copy the new maximum trace into the separate maximum-trace
1262 * structure. (this way the maximum trace is permanently saved,
1263 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264 */
1265static void
1266__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267{
1268        struct trace_buffer *trace_buf = &tr->trace_buffer;
1269        struct trace_buffer *max_buf = &tr->max_buffer;
1270        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272
1273        max_buf->cpu = cpu;
1274        max_buf->time_start = data->preempt_timestamp;
1275
1276        max_data->saved_latency = tr->max_latency;
1277        max_data->critical_start = data->critical_start;
1278        max_data->critical_end = data->critical_end;
1279
1280        memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281        max_data->pid = tsk->pid;
1282        /*
1283         * If tsk == current, then use current_uid(), as that does not use
1284         * RCU. The irq tracer can be called out of RCU scope.
1285         */
1286        if (tsk == current)
1287                max_data->uid = current_uid();
1288        else
1289                max_data->uid = task_uid(tsk);
1290
1291        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292        max_data->policy = tsk->policy;
1293        max_data->rt_priority = tsk->rt_priority;
1294
1295        /* record this tasks comm */
1296        tracing_record_cmdline(tsk);
1297}
1298
1299/**
1300 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301 * @tr: tracer
1302 * @tsk: the task with the latency
1303 * @cpu: The cpu that initiated the trace.
1304 *
1305 * Flip the buffers between the @tr and the max_tr and record information
1306 * about which task was the cause of this latency.
1307 */
1308void
1309update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310{
1311        struct ring_buffer *buf;
1312
1313        if (tr->stop_count)
1314                return;
1315
1316        WARN_ON_ONCE(!irqs_disabled());
1317
1318        if (!tr->allocated_snapshot) {
1319                /* Only the nop tracer should hit this when disabling */
1320                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321                return;
1322        }
1323
1324        arch_spin_lock(&tr->max_lock);
1325
1326        buf = tr->trace_buffer.buffer;
1327        tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328        tr->max_buffer.buffer = buf;
1329
1330        __update_max_tr(tr, tsk, cpu);
1331        arch_spin_unlock(&tr->max_lock);
1332}
1333
1334/**
1335 * update_max_tr_single - only copy one trace over, and reset the rest
1336 * @tr - tracer
1337 * @tsk - task with the latency
1338 * @cpu - the cpu of the buffer to copy.
1339 *
1340 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341 */
1342void
1343update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344{
1345        int ret;
1346
1347        if (tr->stop_count)
1348                return;
1349
1350        WARN_ON_ONCE(!irqs_disabled());
1351        if (!tr->allocated_snapshot) {
1352                /* Only the nop tracer should hit this when disabling */
1353                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354                return;
1355        }
1356
1357        arch_spin_lock(&tr->max_lock);
1358
1359        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360
1361        if (ret == -EBUSY) {
1362                /*
1363                 * We failed to swap the buffer due to a commit taking
1364                 * place on this CPU. We fail to record, but we reset
1365                 * the max trace buffer (no one writes directly to it)
1366                 * and flag that it failed.
1367                 */
1368                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369                        "Failed to swap buffers due to commit in progress\n");
1370        }
1371
1372        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373
1374        __update_max_tr(tr, tsk, cpu);
1375        arch_spin_unlock(&tr->max_lock);
1376}
1377#endif /* CONFIG_TRACER_MAX_TRACE */
1378
1379static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380{
1381        /* Iterators are static, they should be filled or empty */
1382        if (trace_buffer_iter(iter, iter->cpu_file))
1383                return 0;
1384
1385        return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386                                full);
1387}
1388
1389#ifdef CONFIG_FTRACE_STARTUP_TEST
1390static int run_tracer_selftest(struct tracer *type)
1391{
1392        struct trace_array *tr = &global_trace;
1393        struct tracer *saved_tracer = tr->current_trace;
1394        int ret;
1395
1396        if (!type->selftest || tracing_selftest_disabled)
1397                return 0;
1398
1399        /*
1400         * Run a selftest on this tracer.
1401         * Here we reset the trace buffer, and set the current
1402         * tracer to be this tracer. The tracer can then run some
1403         * internal tracing to verify that everything is in order.
1404         * If we fail, we do not register this tracer.
1405         */
1406        tracing_reset_online_cpus(&tr->trace_buffer);
1407
1408        tr->current_trace = type;
1409
1410#ifdef CONFIG_TRACER_MAX_TRACE
1411        if (type->use_max_tr) {
1412                /* If we expanded the buffers, make sure the max is expanded too */
1413                if (ring_buffer_expanded)
1414                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415                                           RING_BUFFER_ALL_CPUS);
1416                tr->allocated_snapshot = true;
1417        }
1418#endif
1419
1420        /* the test is responsible for initializing and enabling */
1421        pr_info("Testing tracer %s: ", type->name);
1422        ret = type->selftest(type, tr);
1423        /* the test is responsible for resetting too */
1424        tr->current_trace = saved_tracer;
1425        if (ret) {
1426                printk(KERN_CONT "FAILED!\n");
1427                /* Add the warning after printing 'FAILED' */
1428                WARN_ON(1);
1429                return -1;
1430        }
1431        /* Only reset on passing, to avoid touching corrupted buffers */
1432        tracing_reset_online_cpus(&tr->trace_buffer);
1433
1434#ifdef CONFIG_TRACER_MAX_TRACE
1435        if (type->use_max_tr) {
1436                tr->allocated_snapshot = false;
1437
1438                /* Shrink the max buffer again */
1439                if (ring_buffer_expanded)
1440                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1441                                           RING_BUFFER_ALL_CPUS);
1442        }
1443#endif
1444
1445        printk(KERN_CONT "PASSED\n");
1446        return 0;
1447}
1448#else
1449static inline int run_tracer_selftest(struct tracer *type)
1450{
1451        return 0;
1452}
1453#endif /* CONFIG_FTRACE_STARTUP_TEST */
1454
1455static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456
1457static void __init apply_trace_boot_options(void);
1458
1459/**
1460 * register_tracer - register a tracer with the ftrace system.
1461 * @type - the plugin for the tracer
1462 *
1463 * Register a new plugin tracer.
1464 */
1465int __init register_tracer(struct tracer *type)
1466{
1467        struct tracer *t;
1468        int ret = 0;
1469
1470        if (!type->name) {
1471                pr_info("Tracer must have a name\n");
1472                return -1;
1473        }
1474
1475        if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477                return -1;
1478        }
1479
1480        mutex_lock(&trace_types_lock);
1481
1482        tracing_selftest_running = true;
1483
1484        for (t = trace_types; t; t = t->next) {
1485                if (strcmp(type->name, t->name) == 0) {
1486                        /* already found */
1487                        pr_info("Tracer %s already registered\n",
1488                                type->name);
1489                        ret = -1;
1490                        goto out;
1491                }
1492        }
1493
1494        if (!type->set_flag)
1495                type->set_flag = &dummy_set_flag;
1496        if (!type->flags) {
1497                /*allocate a dummy tracer_flags*/
1498                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499                if (!type->flags) {
1500                        ret = -ENOMEM;
1501                        goto out;
1502                }
1503                type->flags->val = 0;
1504                type->flags->opts = dummy_tracer_opt;
1505        } else
1506                if (!type->flags->opts)
1507                        type->flags->opts = dummy_tracer_opt;
1508
1509        /* store the tracer for __set_tracer_option */
1510        type->flags->trace = type;
1511
1512        ret = run_tracer_selftest(type);
1513        if (ret < 0)
1514                goto out;
1515
1516        type->next = trace_types;
1517        trace_types = type;
1518        add_tracer_options(&global_trace, type);
1519
1520 out:
1521        tracing_selftest_running = false;
1522        mutex_unlock(&trace_types_lock);
1523
1524        if (ret || !default_bootup_tracer)
1525                goto out_unlock;
1526
1527        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528                goto out_unlock;
1529
1530        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531        /* Do we want this tracer to start on bootup? */
1532        tracing_set_tracer(&global_trace, type->name);
1533        default_bootup_tracer = NULL;
1534
1535        apply_trace_boot_options();
1536
1537        /* disable other selftests, since this will break it. */
1538        tracing_selftest_disabled = true;
1539#ifdef CONFIG_FTRACE_STARTUP_TEST
1540        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541               type->name);
1542#endif
1543
1544 out_unlock:
1545        return ret;
1546}
1547
1548void tracing_reset(struct trace_buffer *buf, int cpu)
1549{
1550        struct ring_buffer *buffer = buf->buffer;
1551
1552        if (!buffer)
1553                return;
1554
1555        ring_buffer_record_disable(buffer);
1556
1557        /* Make sure all commits have finished */
1558        synchronize_sched();
1559        ring_buffer_reset_cpu(buffer, cpu);
1560
1561        ring_buffer_record_enable(buffer);
1562}
1563
1564void tracing_reset_online_cpus(struct trace_buffer *buf)
1565{
1566        struct ring_buffer *buffer = buf->buffer;
1567        int cpu;
1568
1569        if (!buffer)
1570                return;
1571
1572        ring_buffer_record_disable(buffer);
1573
1574        /* Make sure all commits have finished */
1575        synchronize_sched();
1576
1577        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578
1579        for_each_online_cpu(cpu)
1580                ring_buffer_reset_cpu(buffer, cpu);
1581
1582        ring_buffer_record_enable(buffer);
1583}
1584
1585/* Must have trace_types_lock held */
1586void tracing_reset_all_online_cpus(void)
1587{
1588        struct trace_array *tr;
1589
1590        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591                tracing_reset_online_cpus(&tr->trace_buffer);
1592#ifdef CONFIG_TRACER_MAX_TRACE
1593                tracing_reset_online_cpus(&tr->max_buffer);
1594#endif
1595        }
1596}
1597
1598#define SAVED_CMDLINES_DEFAULT 128
1599#define NO_CMDLINE_MAP UINT_MAX
1600static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601struct saved_cmdlines_buffer {
1602        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603        unsigned *map_cmdline_to_pid;
1604        unsigned cmdline_num;
1605        int cmdline_idx;
1606        char *saved_cmdlines;
1607};
1608static struct saved_cmdlines_buffer *savedcmd;
1609
1610/* temporary disable recording */
1611static atomic_t trace_record_cmdline_disabled __read_mostly;
1612
1613static inline char *get_saved_cmdlines(int idx)
1614{
1615        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616}
1617
1618static inline void set_cmdline(int idx, const char *cmdline)
1619{
1620        memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621}
1622
1623static int allocate_cmdlines_buffer(unsigned int val,
1624                                    struct saved_cmdlines_buffer *s)
1625{
1626        s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627                                        GFP_KERNEL);
1628        if (!s->map_cmdline_to_pid)
1629                return -ENOMEM;
1630
1631        s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632        if (!s->saved_cmdlines) {
1633                kfree(s->map_cmdline_to_pid);
1634                return -ENOMEM;
1635        }
1636
1637        s->cmdline_idx = 0;
1638        s->cmdline_num = val;
1639        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640               sizeof(s->map_pid_to_cmdline));
1641        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642               val * sizeof(*s->map_cmdline_to_pid));
1643
1644        return 0;
1645}
1646
1647static int trace_create_savedcmd(void)
1648{
1649        int ret;
1650
1651        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652        if (!savedcmd)
1653                return -ENOMEM;
1654
1655        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656        if (ret < 0) {
1657                kfree(savedcmd);
1658                savedcmd = NULL;
1659                return -ENOMEM;
1660        }
1661
1662        return 0;
1663}
1664
1665int is_tracing_stopped(void)
1666{
1667        return global_trace.stop_count;
1668}
1669
1670/**
1671 * tracing_start - quick start of the tracer
1672 *
1673 * If tracing is enabled but was stopped by tracing_stop,
1674 * this will start the tracer back up.
1675 */
1676void tracing_start(void)
1677{
1678        struct ring_buffer *buffer;
1679        unsigned long flags;
1680
1681        if (tracing_disabled)
1682                return;
1683
1684        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685        if (--global_trace.stop_count) {
1686                if (global_trace.stop_count < 0) {
1687                        /* Someone screwed up their debugging */
1688                        WARN_ON_ONCE(1);
1689                        global_trace.stop_count = 0;
1690                }
1691                goto out;
1692        }
1693
1694        /* Prevent the buffers from switching */
1695        arch_spin_lock(&global_trace.max_lock);
1696
1697        buffer = global_trace.trace_buffer.buffer;
1698        if (buffer)
1699                ring_buffer_record_enable(buffer);
1700
1701#ifdef CONFIG_TRACER_MAX_TRACE
1702        buffer = global_trace.max_buffer.buffer;
1703        if (buffer)
1704                ring_buffer_record_enable(buffer);
1705#endif
1706
1707        arch_spin_unlock(&global_trace.max_lock);
1708
1709 out:
1710        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711}
1712
1713static void tracing_start_tr(struct trace_array *tr)
1714{
1715        struct ring_buffer *buffer;
1716        unsigned long flags;
1717
1718        if (tracing_disabled)
1719                return;
1720
1721        /* If global, we need to also start the max tracer */
1722        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723                return tracing_start();
1724
1725        raw_spin_lock_irqsave(&tr->start_lock, flags);
1726
1727        if (--tr->stop_count) {
1728                if (tr->stop_count < 0) {
1729                        /* Someone screwed up their debugging */
1730                        WARN_ON_ONCE(1);
1731                        tr->stop_count = 0;
1732                }
1733                goto out;
1734        }
1735
1736        buffer = tr->trace_buffer.buffer;
1737        if (buffer)
1738                ring_buffer_record_enable(buffer);
1739
1740 out:
1741        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742}
1743
1744/**
1745 * tracing_stop - quick stop of the tracer
1746 *
1747 * Light weight way to stop tracing. Use in conjunction with
1748 * tracing_start.
1749 */
1750void tracing_stop(void)
1751{
1752        struct ring_buffer *buffer;
1753        unsigned long flags;
1754
1755        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756        if (global_trace.stop_count++)
1757                goto out;
1758
1759        /* Prevent the buffers from switching */
1760        arch_spin_lock(&global_trace.max_lock);
1761
1762        buffer = global_trace.trace_buffer.buffer;
1763        if (buffer)
1764                ring_buffer_record_disable(buffer);
1765
1766#ifdef CONFIG_TRACER_MAX_TRACE
1767        buffer = global_trace.max_buffer.buffer;
1768        if (buffer)
1769                ring_buffer_record_disable(buffer);
1770#endif
1771
1772        arch_spin_unlock(&global_trace.max_lock);
1773
1774 out:
1775        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776}
1777
1778static void tracing_stop_tr(struct trace_array *tr)
1779{
1780        struct ring_buffer *buffer;
1781        unsigned long flags;
1782
1783        /* If global, we need to also stop the max tracer */
1784        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785                return tracing_stop();
1786
1787        raw_spin_lock_irqsave(&tr->start_lock, flags);
1788        if (tr->stop_count++)
1789                goto out;
1790
1791        buffer = tr->trace_buffer.buffer;
1792        if (buffer)
1793                ring_buffer_record_disable(buffer);
1794
1795 out:
1796        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797}
1798
1799void trace_stop_cmdline_recording(void);
1800
1801static int trace_save_cmdline(struct task_struct *tsk)
1802{
1803        unsigned pid, idx;
1804
1805        if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806                return 0;
1807
1808        /*
1809         * It's not the end of the world if we don't get
1810         * the lock, but we also don't want to spin
1811         * nor do we want to disable interrupts,
1812         * so if we miss here, then better luck next time.
1813         */
1814        if (!arch_spin_trylock(&trace_cmdline_lock))
1815                return 0;
1816
1817        idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818        if (idx == NO_CMDLINE_MAP) {
1819                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820
1821                /*
1822                 * Check whether the cmdline buffer at idx has a pid
1823                 * mapped. We are going to overwrite that entry so we
1824                 * need to clear the map_pid_to_cmdline. Otherwise we
1825                 * would read the new comm for the old pid.
1826                 */
1827                pid = savedcmd->map_cmdline_to_pid[idx];
1828                if (pid != NO_CMDLINE_MAP)
1829                        savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830
1831                savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832                savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833
1834                savedcmd->cmdline_idx = idx;
1835        }
1836
1837        set_cmdline(idx, tsk->comm);
1838
1839        arch_spin_unlock(&trace_cmdline_lock);
1840
1841        return 1;
1842}
1843
1844static void __trace_find_cmdline(int pid, char comm[])
1845{
1846        unsigned map;
1847
1848        if (!pid) {
1849                strcpy(comm, "<idle>");
1850                return;
1851        }
1852
1853        if (WARN_ON_ONCE(pid < 0)) {
1854                strcpy(comm, "<XXX>");
1855                return;
1856        }
1857
1858        if (pid > PID_MAX_DEFAULT) {
1859                strcpy(comm, "<...>");
1860                return;
1861        }
1862
1863        map = savedcmd->map_pid_to_cmdline[pid];
1864        if (map != NO_CMDLINE_MAP)
1865                strcpy(comm, get_saved_cmdlines(map));
1866        else
1867                strcpy(comm, "<...>");
1868}
1869
1870void trace_find_cmdline(int pid, char comm[])
1871{
1872        preempt_disable();
1873        arch_spin_lock(&trace_cmdline_lock);
1874
1875        __trace_find_cmdline(pid, comm);
1876
1877        arch_spin_unlock(&trace_cmdline_lock);
1878        preempt_enable();
1879}
1880
1881void tracing_record_cmdline(struct task_struct *tsk)
1882{
1883        if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884                return;
1885
1886        if (!__this_cpu_read(trace_cmdline_save))
1887                return;
1888
1889        if (trace_save_cmdline(tsk))
1890                __this_cpu_write(trace_cmdline_save, false);
1891}
1892
1893void
1894tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895                             int pc)
1896{
1897        struct task_struct *tsk = current;
1898
1899        entry->preempt_count            = pc & 0xff;
1900        entry->pid                      = (tsk) ? tsk->pid : 0;
1901        entry->flags =
1902#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904#else
1905                TRACE_FLAG_IRQS_NOSUPPORT |
1906#endif
1907                ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909                ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910                (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911                (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912}
1913EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914
1915static __always_inline void
1916trace_event_setup(struct ring_buffer_event *event,
1917                  int type, unsigned long flags, int pc)
1918{
1919        struct trace_entry *ent = ring_buffer_event_data(event);
1920
1921        tracing_generic_entry_update(ent, flags, pc);
1922        ent->type = type;
1923}
1924
1925struct ring_buffer_event *
1926trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927                          int type,
1928                          unsigned long len,
1929                          unsigned long flags, int pc)
1930{
1931        struct ring_buffer_event *event;
1932
1933        event = ring_buffer_lock_reserve(buffer, len);
1934        if (event != NULL)
1935                trace_event_setup(event, type, flags, pc);
1936
1937        return event;
1938}
1939
1940DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942static int trace_buffered_event_ref;
1943
1944/**
1945 * trace_buffered_event_enable - enable buffering events
1946 *
1947 * When events are being filtered, it is quicker to use a temporary
1948 * buffer to write the event data into if there's a likely chance
1949 * that it will not be committed. The discard of the ring buffer
1950 * is not as fast as committing, and is much slower than copying
1951 * a commit.
1952 *
1953 * When an event is to be filtered, allocate per cpu buffers to
1954 * write the event data into, and if the event is filtered and discarded
1955 * it is simply dropped, otherwise, the entire data is to be committed
1956 * in one shot.
1957 */
1958void trace_buffered_event_enable(void)
1959{
1960        struct ring_buffer_event *event;
1961        struct page *page;
1962        int cpu;
1963
1964        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965
1966        if (trace_buffered_event_ref++)
1967                return;
1968
1969        for_each_tracing_cpu(cpu) {
1970                page = alloc_pages_node(cpu_to_node(cpu),
1971                                        GFP_KERNEL | __GFP_NORETRY, 0);
1972                if (!page)
1973                        goto failed;
1974
1975                event = page_address(page);
1976                memset(event, 0, sizeof(*event));
1977
1978                per_cpu(trace_buffered_event, cpu) = event;
1979
1980                preempt_disable();
1981                if (cpu == smp_processor_id() &&
1982                    this_cpu_read(trace_buffered_event) !=
1983                    per_cpu(trace_buffered_event, cpu))
1984                        WARN_ON_ONCE(1);
1985                preempt_enable();
1986        }
1987
1988        return;
1989 failed:
1990        trace_buffered_event_disable();
1991}
1992
1993static void enable_trace_buffered_event(void *data)
1994{
1995        /* Probably not needed, but do it anyway */
1996        smp_rmb();
1997        this_cpu_dec(trace_buffered_event_cnt);
1998}
1999
2000static void disable_trace_buffered_event(void *data)
2001{
2002        this_cpu_inc(trace_buffered_event_cnt);
2003}
2004
2005/**
2006 * trace_buffered_event_disable - disable buffering events
2007 *
2008 * When a filter is removed, it is faster to not use the buffered
2009 * events, and to commit directly into the ring buffer. Free up
2010 * the temp buffers when there are no more users. This requires
2011 * special synchronization with current events.
2012 */
2013void trace_buffered_event_disable(void)
2014{
2015        int cpu;
2016
2017        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018
2019        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020                return;
2021
2022        if (--trace_buffered_event_ref)
2023                return;
2024
2025        preempt_disable();
2026        /* For each CPU, set the buffer as used. */
2027        smp_call_function_many(tracing_buffer_mask,
2028                               disable_trace_buffered_event, NULL, 1);
2029        preempt_enable();
2030
2031        /* Wait for all current users to finish */
2032        synchronize_sched();
2033
2034        for_each_tracing_cpu(cpu) {
2035                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036                per_cpu(trace_buffered_event, cpu) = NULL;
2037        }
2038        /*
2039         * Make sure trace_buffered_event is NULL before clearing
2040         * trace_buffered_event_cnt.
2041         */
2042        smp_wmb();
2043
2044        preempt_disable();
2045        /* Do the work on each cpu */
2046        smp_call_function_many(tracing_buffer_mask,
2047                               enable_trace_buffered_event, NULL, 1);
2048        preempt_enable();
2049}
2050
2051void
2052__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053{
2054        __this_cpu_write(trace_cmdline_save, true);
2055
2056        /* If this is the temp buffer, we need to commit fully */
2057        if (this_cpu_read(trace_buffered_event) == event) {
2058                /* Length is in event->array[0] */
2059                ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060                /* Release the temp buffer */
2061                this_cpu_dec(trace_buffered_event_cnt);
2062        } else
2063                ring_buffer_unlock_commit(buffer, event);
2064}
2065
2066static struct ring_buffer *temp_buffer;
2067
2068struct ring_buffer_event *
2069trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070                          struct trace_event_file *trace_file,
2071                          int type, unsigned long len,
2072                          unsigned long flags, int pc)
2073{
2074        struct ring_buffer_event *entry;
2075        int val;
2076
2077        *current_rb = trace_file->tr->trace_buffer.buffer;
2078
2079        if ((trace_file->flags &
2080             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081            (entry = this_cpu_read(trace_buffered_event))) {
2082                /* Try to use the per cpu buffer first */
2083                val = this_cpu_inc_return(trace_buffered_event_cnt);
2084                if (val == 1) {
2085                        trace_event_setup(entry, type, flags, pc);
2086                        entry->array[0] = len;
2087                        return entry;
2088                }
2089                this_cpu_dec(trace_buffered_event_cnt);
2090        }
2091
2092        entry = trace_buffer_lock_reserve(*current_rb,
2093                                         type, len, flags, pc);
2094        /*
2095         * If tracing is off, but we have triggers enabled
2096         * we still need to look at the event data. Use the temp_buffer
2097         * to store the trace event for the tigger to use. It's recusive
2098         * safe and will not be recorded anywhere.
2099         */
2100        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101                *current_rb = temp_buffer;
2102                entry = trace_buffer_lock_reserve(*current_rb,
2103                                                  type, len, flags, pc);
2104        }
2105        return entry;
2106}
2107EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108
2109void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110                                     struct ring_buffer *buffer,
2111                                     struct ring_buffer_event *event,
2112                                     unsigned long flags, int pc,
2113                                     struct pt_regs *regs)
2114{
2115        __buffer_unlock_commit(buffer, event);
2116
2117        /*
2118         * If regs is not set, then skip the following callers:
2119         *   trace_buffer_unlock_commit_regs
2120         *   event_trigger_unlock_commit
2121         *   trace_event_buffer_commit
2122         *   trace_event_raw_event_sched_switch
2123         * Note, we can still get here via blktrace, wakeup tracer
2124         * and mmiotrace, but that's ok if they lose a function or
2125         * two. They are that meaningful.
2126         */
2127        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128        ftrace_trace_userstack(buffer, flags, pc);
2129}
2130
2131void
2132trace_function(struct trace_array *tr,
2133               unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134               int pc)
2135{
2136        struct trace_event_call *call = &event_function;
2137        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138        struct ring_buffer_event *event;
2139        struct ftrace_entry *entry;
2140
2141        event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142                                          flags, pc);
2143        if (!event)
2144                return;
2145        entry   = ring_buffer_event_data(event);
2146        entry->ip                       = ip;
2147        entry->parent_ip                = parent_ip;
2148
2149        if (!call_filter_check_discard(call, entry, buffer, event))
2150                __buffer_unlock_commit(buffer, event);
2151}
2152
2153#ifdef CONFIG_STACKTRACE
2154
2155#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156struct ftrace_stack {
2157        unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2158};
2159
2160static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162
2163static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164                                 unsigned long flags,
2165                                 int skip, int pc, struct pt_regs *regs)
2166{
2167        struct trace_event_call *call = &event_kernel_stack;
2168        struct ring_buffer_event *event;
2169        struct stack_entry *entry;
2170        struct stack_trace trace;
2171        int use_stack;
2172        int size = FTRACE_STACK_ENTRIES;
2173
2174        trace.nr_entries        = 0;
2175        trace.skip              = skip;
2176
2177        /*
2178         * Add two, for this function and the call to save_stack_trace()
2179         * If regs is set, then these functions will not be in the way.
2180         */
2181        if (!regs)
2182                trace.skip += 2;
2183
2184        /*
2185         * Since events can happen in NMIs there's no safe way to
2186         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187         * or NMI comes in, it will just have to use the default
2188         * FTRACE_STACK_SIZE.
2189         */
2190        preempt_disable_notrace();
2191
2192        use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193        /*
2194         * We don't need any atomic variables, just a barrier.
2195         * If an interrupt comes in, we don't care, because it would
2196         * have exited and put the counter back to what we want.
2197         * We just need a barrier to keep gcc from moving things
2198         * around.
2199         */
2200        barrier();
2201        if (use_stack == 1) {
2202                trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2203                trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2204
2205                if (regs)
2206                        save_stack_trace_regs(regs, &trace);
2207                else
2208                        save_stack_trace(&trace);
2209
2210                if (trace.nr_entries > size)
2211                        size = trace.nr_entries;
2212        } else
2213                /* From now on, use_stack is a boolean */
2214                use_stack = 0;
2215
2216        size *= sizeof(unsigned long);
2217
2218        event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219                                          sizeof(*entry) + size, flags, pc);
2220        if (!event)
2221                goto out;
2222        entry = ring_buffer_event_data(event);
2223
2224        memset(&entry->caller, 0, size);
2225
2226        if (use_stack)
2227                memcpy(&entry->caller, trace.entries,
2228                       trace.nr_entries * sizeof(unsigned long));
2229        else {
2230                trace.max_entries       = FTRACE_STACK_ENTRIES;
2231                trace.entries           = entry->caller;
2232                if (regs)
2233                        save_stack_trace_regs(regs, &trace);
2234                else
2235                        save_stack_trace(&trace);
2236        }
2237
2238        entry->size = trace.nr_entries;
2239
2240        if (!call_filter_check_discard(call, entry, buffer, event))
2241                __buffer_unlock_commit(buffer, event);
2242
2243 out:
2244        /* Again, don't let gcc optimize things here */
2245        barrier();
2246        __this_cpu_dec(ftrace_stack_reserve);
2247        preempt_enable_notrace();
2248
2249}
2250
2251static inline void ftrace_trace_stack(struct trace_array *tr,
2252                                      struct ring_buffer *buffer,
2253                                      unsigned long flags,
2254                                      int skip, int pc, struct pt_regs *regs)
2255{
2256        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257                return;
2258
2259        __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260}
2261
2262void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263                   int pc)
2264{
2265        __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266}
2267
2268/**
2269 * trace_dump_stack - record a stack back trace in the trace buffer
2270 * @skip: Number of functions to skip (helper handlers)
2271 */
2272void trace_dump_stack(int skip)
2273{
2274        unsigned long flags;
2275
2276        if (tracing_disabled || tracing_selftest_running)
2277                return;
2278
2279        local_save_flags(flags);
2280
2281        /*
2282         * Skip 3 more, seems to get us at the caller of
2283         * this function.
2284         */
2285        skip += 3;
2286        __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287                             flags, skip, preempt_count(), NULL);
2288}
2289
2290static DEFINE_PER_CPU(int, user_stack_count);
2291
2292void
2293ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294{
2295        struct trace_event_call *call = &event_user_stack;
2296        struct ring_buffer_event *event;
2297        struct userstack_entry *entry;
2298        struct stack_trace trace;
2299
2300        if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301                return;
2302
2303        /*
2304         * NMIs can not handle page faults, even with fix ups.
2305         * The save user stack can (and often does) fault.
2306         */
2307        if (unlikely(in_nmi()))
2308                return;
2309
2310        /*
2311         * prevent recursion, since the user stack tracing may
2312         * trigger other kernel events.
2313         */
2314        preempt_disable();
2315        if (__this_cpu_read(user_stack_count))
2316                goto out;
2317
2318        __this_cpu_inc(user_stack_count);
2319
2320        event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321                                          sizeof(*entry), flags, pc);
2322        if (!event)
2323                goto out_drop_count;
2324        entry   = ring_buffer_event_data(event);
2325
2326        entry->tgid             = current->tgid;
2327        memset(&entry->caller, 0, sizeof(entry->caller));
2328
2329        trace.nr_entries        = 0;
2330        trace.max_entries       = FTRACE_STACK_ENTRIES;
2331        trace.skip              = 0;
2332        trace.entries           = entry->caller;
2333
2334        save_stack_trace_user(&trace);
2335        if (!call_filter_check_discard(call, entry, buffer, event))
2336                __buffer_unlock_commit(buffer, event);
2337
2338 out_drop_count:
2339        __this_cpu_dec(user_stack_count);
2340 out:
2341        preempt_enable();
2342}
2343
2344#ifdef UNUSED
2345static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346{
2347        ftrace_trace_userstack(tr, flags, preempt_count());
2348}
2349#endif /* UNUSED */
2350
2351#endif /* CONFIG_STACKTRACE */
2352
2353/* created for use with alloc_percpu */
2354struct trace_buffer_struct {
2355        int nesting;
2356        char buffer[4][TRACE_BUF_SIZE];
2357};
2358
2359static struct trace_buffer_struct *trace_percpu_buffer;
2360
2361/*
2362 * Thise allows for lockless recording.  If we're nested too deeply, then
2363 * this returns NULL.
2364 */
2365static char *get_trace_buf(void)
2366{
2367        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368
2369        if (!buffer || buffer->nesting >= 4)
2370                return NULL;
2371
2372        return &buffer->buffer[buffer->nesting++][0];
2373}
2374
2375static void put_trace_buf(void)
2376{
2377        this_cpu_dec(trace_percpu_buffer->nesting);
2378}
2379
2380static int alloc_percpu_trace_buffer(void)
2381{
2382        struct trace_buffer_struct *buffers;
2383
2384        buffers = alloc_percpu(struct trace_buffer_struct);
2385        if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386                return -ENOMEM;
2387
2388        trace_percpu_buffer = buffers;
2389        return 0;
2390}
2391
2392static int buffers_allocated;
2393
2394void trace_printk_init_buffers(void)
2395{
2396        if (buffers_allocated)
2397                return;
2398
2399        if (alloc_percpu_trace_buffer())
2400                return;
2401
2402        /* trace_printk() is for debug use only. Don't use it in production. */
2403
2404        pr_warn("\n");
2405        pr_warn("**********************************************************\n");
2406        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407        pr_warn("**                                                      **\n");
2408        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409        pr_warn("**                                                      **\n");
2410        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411        pr_warn("** unsafe for production use.                           **\n");
2412        pr_warn("**                                                      **\n");
2413        pr_warn("** If you see this message and you are not debugging    **\n");
2414        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415        pr_warn("**                                                      **\n");
2416        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417        pr_warn("**********************************************************\n");
2418
2419        /* Expand the buffers to set size */
2420        tracing_update_buffers();
2421
2422        buffers_allocated = 1;
2423
2424        /*
2425         * trace_printk_init_buffers() can be called by modules.
2426         * If that happens, then we need to start cmdline recording
2427         * directly here. If the global_trace.buffer is already
2428         * allocated here, then this was called by module code.
2429         */
2430        if (global_trace.trace_buffer.buffer)
2431                tracing_start_cmdline_record();
2432}
2433
2434void trace_printk_start_comm(void)
2435{
2436        /* Start tracing comms if trace printk is set */
2437        if (!buffers_allocated)
2438                return;
2439        tracing_start_cmdline_record();
2440}
2441
2442static void trace_printk_start_stop_comm(int enabled)
2443{
2444        if (!buffers_allocated)
2445                return;
2446
2447        if (enabled)
2448                tracing_start_cmdline_record();
2449        else
2450                tracing_stop_cmdline_record();
2451}
2452
2453/**
2454 * trace_vbprintk - write binary msg to tracing buffer
2455 *
2456 */
2457int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458{
2459        struct trace_event_call *call = &event_bprint;
2460        struct ring_buffer_event *event;
2461        struct ring_buffer *buffer;
2462        struct trace_array *tr = &global_trace;
2463        struct bprint_entry *entry;
2464        unsigned long flags;
2465        char *tbuffer;
2466        int len = 0, size, pc;
2467
2468        if (unlikely(tracing_selftest_running || tracing_disabled))
2469                return 0;
2470
2471        /* Don't pollute graph traces with trace_vprintk internals */
2472        pause_graph_tracing();
2473
2474        pc = preempt_count();
2475        preempt_disable_notrace();
2476
2477        tbuffer = get_trace_buf();
2478        if (!tbuffer) {
2479                len = 0;
2480                goto out_nobuffer;
2481        }
2482
2483        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484
2485        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486                goto out;
2487
2488        local_save_flags(flags);
2489        size = sizeof(*entry) + sizeof(u32) * len;
2490        buffer = tr->trace_buffer.buffer;
2491        event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492                                          flags, pc);
2493        if (!event)
2494                goto out;
2495        entry = ring_buffer_event_data(event);
2496        entry->ip                       = ip;
2497        entry->fmt                      = fmt;
2498
2499        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500        if (!call_filter_check_discard(call, entry, buffer, event)) {
2501                __buffer_unlock_commit(buffer, event);
2502                ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503        }
2504
2505out:
2506        put_trace_buf();
2507
2508out_nobuffer:
2509        preempt_enable_notrace();
2510        unpause_graph_tracing();
2511
2512        return len;
2513}
2514EXPORT_SYMBOL_GPL(trace_vbprintk);
2515
2516static int
2517__trace_array_vprintk(struct ring_buffer *buffer,
2518                      unsigned long ip, const char *fmt, va_list args)
2519{
2520        struct trace_event_call *call = &event_print;
2521        struct ring_buffer_event *event;
2522        int len = 0, size, pc;
2523        struct print_entry *entry;
2524        unsigned long flags;
2525        char *tbuffer;
2526
2527        if (tracing_disabled || tracing_selftest_running)
2528                return 0;
2529
2530        /* Don't pollute graph traces with trace_vprintk internals */
2531        pause_graph_tracing();
2532
2533        pc = preempt_count();
2534        preempt_disable_notrace();
2535
2536
2537        tbuffer = get_trace_buf();
2538        if (!tbuffer) {
2539                len = 0;
2540                goto out_nobuffer;
2541        }
2542
2543        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544
2545        local_save_flags(flags);
2546        size = sizeof(*entry) + len + 1;
2547        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548                                          flags, pc);
2549        if (!event)
2550                goto out;
2551        entry = ring_buffer_event_data(event);
2552        entry->ip = ip;
2553
2554        memcpy(&entry->buf, tbuffer, len + 1);
2555        if (!call_filter_check_discard(call, entry, buffer, event)) {
2556                __buffer_unlock_commit(buffer, event);
2557                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558        }
2559
2560out:
2561        put_trace_buf();
2562
2563out_nobuffer:
2564        preempt_enable_notrace();
2565        unpause_graph_tracing();
2566
2567        return len;
2568}
2569
2570int trace_array_vprintk(struct trace_array *tr,
2571                        unsigned long ip, const char *fmt, va_list args)
2572{
2573        return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574}
2575
2576int trace_array_printk(struct trace_array *tr,
2577                       unsigned long ip, const char *fmt, ...)
2578{
2579        int ret;
2580        va_list ap;
2581
2582        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583                return 0;
2584
2585        va_start(ap, fmt);
2586        ret = trace_array_vprintk(tr, ip, fmt, ap);
2587        va_end(ap);
2588        return ret;
2589}
2590
2591int trace_array_printk_buf(struct ring_buffer *buffer,
2592                           unsigned long ip, const char *fmt, ...)
2593{
2594        int ret;
2595        va_list ap;
2596
2597        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598                return 0;
2599
2600        va_start(ap, fmt);
2601        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602        va_end(ap);
2603        return ret;
2604}
2605
2606int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607{
2608        return trace_array_vprintk(&global_trace, ip, fmt, args);
2609}
2610EXPORT_SYMBOL_GPL(trace_vprintk);
2611
2612static void trace_iterator_increment(struct trace_iterator *iter)
2613{
2614        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615
2616        iter->idx++;
2617        if (buf_iter)
2618                ring_buffer_read(buf_iter, NULL);
2619}
2620
2621static struct trace_entry *
2622peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623                unsigned long *lost_events)
2624{
2625        struct ring_buffer_event *event;
2626        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627
2628        if (buf_iter)
2629                event = ring_buffer_iter_peek(buf_iter, ts);
2630        else
2631                event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632                                         lost_events);
2633
2634        if (event) {
2635                iter->ent_size = ring_buffer_event_length(event);
2636                return ring_buffer_event_data(event);
2637        }
2638        iter->ent_size = 0;
2639        return NULL;
2640}
2641
2642static struct trace_entry *
2643__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644                  unsigned long *missing_events, u64 *ent_ts)
2645{
2646        struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647        struct trace_entry *ent, *next = NULL;
2648        unsigned long lost_events = 0, next_lost = 0;
2649        int cpu_file = iter->cpu_file;
2650        u64 next_ts = 0, ts;
2651        int next_cpu = -1;
2652        int next_size = 0;
2653        int cpu;
2654
2655        /*
2656         * If we are in a per_cpu trace file, don't bother by iterating over
2657         * all cpu and peek directly.
2658         */
2659        if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660                if (ring_buffer_empty_cpu(buffer, cpu_file))
2661                        return NULL;
2662                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663                if (ent_cpu)
2664                        *ent_cpu = cpu_file;
2665
2666                return ent;
2667        }
2668
2669        for_each_tracing_cpu(cpu) {
2670
2671                if (ring_buffer_empty_cpu(buffer, cpu))
2672                        continue;
2673
2674                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675
2676                /*
2677                 * Pick the entry with the smallest timestamp:
2678                 */
2679                if (ent && (!next || ts < next_ts)) {
2680                        next = ent;
2681                        next_cpu = cpu;
2682                        next_ts = ts;
2683                        next_lost = lost_events;
2684                        next_size = iter->ent_size;
2685                }
2686        }
2687
2688        iter->ent_size = next_size;
2689
2690        if (ent_cpu)
2691                *ent_cpu = next_cpu;
2692
2693        if (ent_ts)
2694                *ent_ts = next_ts;
2695
2696        if (missing_events)
2697                *missing_events = next_lost;
2698
2699        return next;
2700}
2701
2702/* Find the next real entry, without updating the iterator itself */
2703struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704                                          int *ent_cpu, u64 *ent_ts)
2705{
2706        return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707}
2708
2709/* Find the next real entry, and increment the iterator to the next entry */
2710void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711{
2712        iter->ent = __find_next_entry(iter, &iter->cpu,
2713                                      &iter->lost_events, &iter->ts);
2714
2715        if (iter->ent)
2716                trace_iterator_increment(iter);
2717
2718        return iter->ent ? iter : NULL;
2719}
2720
2721static void trace_consume(struct trace_iterator *iter)
2722{
2723        ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724                            &iter->lost_events);
2725}
2726
2727static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728{
2729        struct trace_iterator *iter = m->private;
2730        int i = (int)*pos;
2731        void *ent;
2732
2733        WARN_ON_ONCE(iter->leftover);
2734
2735        (*pos)++;
2736
2737        /* can't go backwards */
2738        if (iter->idx > i)
2739                return NULL;
2740
2741        if (iter->idx < 0)
2742                ent = trace_find_next_entry_inc(iter);
2743        else
2744                ent = iter;
2745
2746        while (ent && iter->idx < i)
2747                ent = trace_find_next_entry_inc(iter);
2748
2749        iter->pos = *pos;
2750
2751        return ent;
2752}
2753
2754void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755{
2756        struct ring_buffer_event *event;
2757        struct ring_buffer_iter *buf_iter;
2758        unsigned long entries = 0;
2759        u64 ts;
2760
2761        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762
2763        buf_iter = trace_buffer_iter(iter, cpu);
2764        if (!buf_iter)
2765                return;
2766
2767        ring_buffer_iter_reset(buf_iter);
2768
2769        /*
2770         * We could have the case with the max latency tracers
2771         * that a reset never took place on a cpu. This is evident
2772         * by the timestamp being before the start of the buffer.
2773         */
2774        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775                if (ts >= iter->trace_buffer->time_start)
2776                        break;
2777                entries++;
2778                ring_buffer_read(buf_iter, NULL);
2779        }
2780
2781        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782}
2783
2784/*
2785 * The current tracer is copied to avoid a global locking
2786 * all around.
2787 */
2788static void *s_start(struct seq_file *m, loff_t *pos)
2789{
2790        struct trace_iterator *iter = m->private;
2791        struct trace_array *tr = iter->tr;
2792        int cpu_file = iter->cpu_file;
2793        void *p = NULL;
2794        loff_t l = 0;
2795        int cpu;
2796
2797        /*
2798         * copy the tracer to avoid using a global lock all around.
2799         * iter->trace is a copy of current_trace, the pointer to the
2800         * name may be used instead of a strcmp(), as iter->trace->name
2801         * will point to the same string as current_trace->name.
2802         */
2803        mutex_lock(&trace_types_lock);
2804        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805                *iter->trace = *tr->current_trace;
2806        mutex_unlock(&trace_types_lock);
2807
2808#ifdef CONFIG_TRACER_MAX_TRACE
2809        if (iter->snapshot && iter->trace->use_max_tr)
2810                return ERR_PTR(-EBUSY);
2811#endif
2812
2813        if (!iter->snapshot)
2814                atomic_inc(&trace_record_cmdline_disabled);
2815
2816        if (*pos != iter->pos) {
2817                iter->ent = NULL;
2818                iter->cpu = 0;
2819                iter->idx = -1;
2820
2821                if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822                        for_each_tracing_cpu(cpu)
2823                                tracing_iter_reset(iter, cpu);
2824                } else
2825                        tracing_iter_reset(iter, cpu_file);
2826
2827                iter->leftover = 0;
2828                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829                        ;
2830
2831        } else {
2832                /*
2833                 * If we overflowed the seq_file before, then we want
2834                 * to just reuse the trace_seq buffer again.
2835                 */
2836                if (iter->leftover)
2837                        p = iter;
2838                else {
2839                        l = *pos - 1;
2840                        p = s_next(m, p, &l);
2841                }
2842        }
2843
2844        trace_event_read_lock();
2845        trace_access_lock(cpu_file);
2846        return p;
2847}
2848
2849static void s_stop(struct seq_file *m, void *p)
2850{
2851        struct trace_iterator *iter = m->private;
2852
2853#ifdef CONFIG_TRACER_MAX_TRACE
2854        if (iter->snapshot && iter->trace->use_max_tr)
2855                return;
2856#endif
2857
2858        if (!iter->snapshot)
2859                atomic_dec(&trace_record_cmdline_disabled);
2860
2861        trace_access_unlock(iter->cpu_file);
2862        trace_event_read_unlock();
2863}
2864
2865static void
2866get_total_entries(struct trace_buffer *buf,
2867                  unsigned long *total, unsigned long *entries)
2868{
2869        unsigned long count;
2870        int cpu;
2871
2872        *total = 0;
2873        *entries = 0;
2874
2875        for_each_tracing_cpu(cpu) {
2876                count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877                /*
2878                 * If this buffer has skipped entries, then we hold all
2879                 * entries for the trace and we need to ignore the
2880                 * ones before the time stamp.
2881                 */
2882                if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883                        count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884                        /* total is the same as the entries */
2885                        *total += count;
2886                } else
2887                        *total += count +
2888                                ring_buffer_overrun_cpu(buf->buffer, cpu);
2889                *entries += count;
2890        }
2891}
2892
2893static void print_lat_help_header(struct seq_file *m)
2894{
2895        seq_puts(m, "#                  _------=> CPU#            \n"
2896                    "#                 / _-----=> irqs-off        \n"
2897                    "#                | / _----=> need-resched    \n"
2898                    "#                || / _---=> hardirq/softirq \n"
2899                    "#                ||| / _--=> preempt-depth   \n"
2900                    "#                |||| /     delay            \n"
2901                    "#  cmd     pid   ||||| time  |   caller      \n"
2902                    "#     \\   /      |||||  \\    |   /         \n");
2903}
2904
2905static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906{
2907        unsigned long total;
2908        unsigned long entries;
2909
2910        get_total_entries(buf, &total, &entries);
2911        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912                   entries, total, num_online_cpus());
2913        seq_puts(m, "#\n");
2914}
2915
2916static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917{
2918        print_event_info(buf, m);
2919        seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920                    "#              | |       |          |         |\n");
2921}
2922
2923static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924{
2925        print_event_info(buf, m);
2926        seq_puts(m, "#                              _-----=> irqs-off\n"
2927                    "#                             / _----=> need-resched\n"
2928                    "#                            | / _---=> hardirq/softirq\n"
2929                    "#                            || / _--=> preempt-depth\n"
2930                    "#                            ||| /     delay\n"
2931                    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932                    "#              | |       |   ||||       |         |\n");
2933}
2934
2935void
2936print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937{
2938        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939        struct trace_buffer *buf = iter->trace_buffer;
2940        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941        struct tracer *type = iter->trace;
2942        unsigned long entries;
2943        unsigned long total;
2944        const char *name = "preemption";
2945
2946        name = type->name;
2947
2948        get_total_entries(buf, &total, &entries);
2949
2950        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951                   name, UTS_RELEASE);
2952        seq_puts(m, "# -----------------------------------"
2953                 "---------------------------------\n");
2954        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956                   nsecs_to_usecs(data->saved_latency),
2957                   entries,
2958                   total,
2959                   buf->cpu,
2960#if defined(CONFIG_PREEMPT_NONE)
2961                   "server",
2962#elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963                   "desktop",
2964#elif defined(CONFIG_PREEMPT)
2965                   "preempt",
2966#else
2967                   "unknown",
2968#endif
2969                   /* These are reserved for later use */
2970                   0, 0, 0, 0);
2971#ifdef CONFIG_SMP
2972        seq_printf(m, " #P:%d)\n", num_online_cpus());
2973#else
2974        seq_puts(m, ")\n");
2975#endif
2976        seq_puts(m, "#    -----------------\n");
2977        seq_printf(m, "#    | task: %.16s-%d "
2978                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979                   data->comm, data->pid,
2980                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981                   data->policy, data->rt_priority);
2982        seq_puts(m, "#    -----------------\n");
2983
2984        if (data->critical_start) {
2985                seq_puts(m, "#  => started at: ");
2986                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987                trace_print_seq(m, &iter->seq);
2988                seq_puts(m, "\n#  => ended at:   ");
2989                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990                trace_print_seq(m, &iter->seq);
2991                seq_puts(m, "\n#\n");
2992        }
2993
2994        seq_puts(m, "#\n");
2995}
2996
2997static void test_cpu_buff_start(struct trace_iterator *iter)
2998{
2999        struct trace_seq *s = &iter->seq;
3000        struct trace_array *tr = iter->tr;
3001
3002        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003                return;
3004
3005        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006                return;
3007
3008        if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009                return;
3010
3011        if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012                return;
3013
3014        if (iter->started)
3015                cpumask_set_cpu(iter->cpu, iter->started);
3016
3017        /* Don't print started cpu buffer for the first entry of the trace */
3018        if (iter->idx > 1)
3019                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020                                iter->cpu);
3021}
3022
3023static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024{
3025        struct trace_array *tr = iter->tr;
3026        struct trace_seq *s = &iter->seq;
3027        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028        struct trace_entry *entry;
3029        struct trace_event *event;
3030
3031        entry = iter->ent;
3032
3033        test_cpu_buff_start(iter);
3034
3035        event = ftrace_find_event(entry->type);
3036
3037        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039                        trace_print_lat_context(iter);
3040                else
3041                        trace_print_context(iter);
3042        }
3043
3044        if (trace_seq_has_overflowed(s))
3045                return TRACE_TYPE_PARTIAL_LINE;
3046
3047        if (event)
3048                return event->funcs->trace(iter, sym_flags, event);
3049
3050        trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051
3052        return trace_handle_return(s);
3053}
3054
3055static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056{
3057        struct trace_array *tr = iter->tr;
3058        struct trace_seq *s = &iter->seq;
3059        struct trace_entry *entry;
3060        struct trace_event *event;
3061
3062        entry = iter->ent;
3063
3064        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065                trace_seq_printf(s, "%d %d %llu ",
3066                                 entry->pid, iter->cpu, iter->ts);
3067
3068        if (trace_seq_has_overflowed(s))
3069                return TRACE_TYPE_PARTIAL_LINE;
3070
3071        event = ftrace_find_event(entry->type);
3072        if (event)
3073                return event->funcs->raw(iter, 0, event);
3074
3075        trace_seq_printf(s, "%d ?\n", entry->type);
3076
3077        return trace_handle_return(s);
3078}
3079
3080static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081{
3082        struct trace_array *tr = iter->tr;
3083        struct trace_seq *s = &iter->seq;
3084        unsigned char newline = '\n';
3085        struct trace_entry *entry;
3086        struct trace_event *event;
3087
3088        entry = iter->ent;
3089
3090        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091                SEQ_PUT_HEX_FIELD(s, entry->pid);
3092                SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093                SEQ_PUT_HEX_FIELD(s, iter->ts);
3094                if (trace_seq_has_overflowed(s))
3095                        return TRACE_TYPE_PARTIAL_LINE;
3096        }
3097
3098        event = ftrace_find_event(entry->type);
3099        if (event) {
3100                enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101                if (ret != TRACE_TYPE_HANDLED)
3102                        return ret;
3103        }
3104
3105        SEQ_PUT_FIELD(s, newline);
3106
3107        return trace_handle_return(s);
3108}
3109
3110static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111{
3112        struct trace_array *tr = iter->tr;
3113        struct trace_seq *s = &iter->seq;
3114        struct trace_entry *entry;
3115        struct trace_event *event;
3116
3117        entry = iter->ent;
3118
3119        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120                SEQ_PUT_FIELD(s, entry->pid);
3121                SEQ_PUT_FIELD(s, iter->cpu);
3122                SEQ_PUT_FIELD(s, iter->ts);
3123                if (trace_seq_has_overflowed(s))
3124                        return TRACE_TYPE_PARTIAL_LINE;
3125        }
3126
3127        event = ftrace_find_event(entry->type);
3128        return event ? event->funcs->binary(iter, 0, event) :
3129                TRACE_TYPE_HANDLED;
3130}
3131
3132int trace_empty(struct trace_iterator *iter)
3133{
3134        struct ring_buffer_iter *buf_iter;
3135        int cpu;
3136
3137        /* If we are looking at one CPU buffer, only check that one */
3138        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139                cpu = iter->cpu_file;
3140                buf_iter = trace_buffer_iter(iter, cpu);
3141                if (buf_iter) {
3142                        if (!ring_buffer_iter_empty(buf_iter))
3143                                return 0;
3144                } else {
3145                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146                                return 0;
3147                }
3148                return 1;
3149        }
3150
3151        for_each_tracing_cpu(cpu) {
3152                buf_iter = trace_buffer_iter(iter, cpu);
3153                if (buf_iter) {
3154                        if (!ring_buffer_iter_empty(buf_iter))
3155                                return 0;
3156                } else {
3157                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158                                return 0;
3159                }
3160        }
3161
3162        return 1;
3163}
3164
3165/*  Called with trace_event_read_lock() held. */
3166enum print_line_t print_trace_line(struct trace_iterator *iter)
3167{
3168        struct trace_array *tr = iter->tr;
3169        unsigned long trace_flags = tr->trace_flags;
3170        enum print_line_t ret;
3171
3172        if (iter->lost_events) {
3173                trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174                                 iter->cpu, iter->lost_events);
3175                if (trace_seq_has_overflowed(&iter->seq))
3176                        return TRACE_TYPE_PARTIAL_LINE;
3177        }
3178
3179        if (iter->trace && iter->trace->print_line) {
3180                ret = iter->trace->print_line(iter);
3181                if (ret != TRACE_TYPE_UNHANDLED)
3182                        return ret;
3183        }
3184
3185        if (iter->ent->type == TRACE_BPUTS &&
3186                        trace_flags & TRACE_ITER_PRINTK &&
3187                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188                return trace_print_bputs_msg_only(iter);
3189
3190        if (iter->ent->type == TRACE_BPRINT &&
3191                        trace_flags & TRACE_ITER_PRINTK &&
3192                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193                return trace_print_bprintk_msg_only(iter);
3194
3195        if (iter->ent->type == TRACE_PRINT &&
3196                        trace_flags & TRACE_ITER_PRINTK &&
3197                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198                return trace_print_printk_msg_only(iter);
3199
3200        if (trace_flags & TRACE_ITER_BIN)
3201                return print_bin_fmt(iter);
3202
3203        if (trace_flags & TRACE_ITER_HEX)
3204                return print_hex_fmt(iter);
3205
3206        if (trace_flags & TRACE_ITER_RAW)
3207                return print_raw_fmt(iter);
3208
3209        return print_trace_fmt(iter);
3210}
3211
3212void trace_latency_header(struct seq_file *m)
3213{
3214        struct trace_iterator *iter = m->private;
3215        struct trace_array *tr = iter->tr;
3216
3217        /* print nothing if the buffers are empty */
3218        if (trace_empty(iter))
3219                return;
3220
3221        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222                print_trace_header(m, iter);
3223
3224        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225                print_lat_help_header(m);
3226}
3227
3228void trace_default_header(struct seq_file *m)
3229{
3230        struct trace_iterator *iter = m->private;
3231        struct trace_array *tr = iter->tr;
3232        unsigned long trace_flags = tr->trace_flags;
3233
3234        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235                return;
3236
3237        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238                /* print nothing if the buffers are empty */
3239                if (trace_empty(iter))
3240                        return;
3241                print_trace_header(m, iter);
3242                if (!(trace_flags & TRACE_ITER_VERBOSE))
3243                        print_lat_help_header(m);
3244        } else {
3245                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246                        if (trace_flags & TRACE_ITER_IRQ_INFO)
3247                                print_func_help_header_irq(iter->trace_buffer, m);
3248                        else
3249                                print_func_help_header(iter->trace_buffer, m);
3250                }
3251        }
3252}
3253
3254static void test_ftrace_alive(struct seq_file *m)
3255{
3256        if (!ftrace_is_dead())
3257                return;
3258        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259                    "#          MAY BE MISSING FUNCTION EVENTS\n");
3260}
3261
3262#ifdef CONFIG_TRACER_MAX_TRACE
3263static void show_snapshot_main_help(struct seq_file *m)
3264{
3265        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267                    "#                      Takes a snapshot of the main buffer.\n"
3268                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269                    "#                      (Doesn't have to be '2' works with any number that\n"
3270                    "#                       is not a '0' or '1')\n");
3271}
3272
3273static void show_snapshot_percpu_help(struct seq_file *m)
3274{
3275        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279#else
3280        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281                    "#                     Must use main snapshot file to allocate.\n");
3282#endif
3283        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284                    "#                      (Doesn't have to be '2' works with any number that\n"
3285                    "#                       is not a '0' or '1')\n");
3286}
3287
3288static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289{
3290        if (iter->tr->allocated_snapshot)
3291                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292        else
3293                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294
3295        seq_puts(m, "# Snapshot commands:\n");
3296        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297                show_snapshot_main_help(m);
3298        else
3299                show_snapshot_percpu_help(m);
3300}
3301#else
3302/* Should never be called */
3303static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304#endif
3305
3306static int s_show(struct seq_file *m, void *v)
3307{
3308        struct trace_iterator *iter = v;
3309        int ret;
3310
3311        if (iter->ent == NULL) {
3312                if (iter->tr) {
3313                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314                        seq_puts(m, "#\n");
3315                        test_ftrace_alive(m);
3316                }
3317                if (iter->snapshot && trace_empty(iter))
3318                        print_snapshot_help(m, iter);
3319                else if (iter->trace && iter->trace->print_header)
3320                        iter->trace->print_header(m);
3321                else
3322                        trace_default_header(m);
3323
3324        } else if (iter->leftover) {
3325                /*
3326                 * If we filled the seq_file buffer earlier, we
3327                 * want to just show it now.
3328                 */
3329                ret = trace_print_seq(m, &iter->seq);
3330
3331                /* ret should this time be zero, but you never know */
3332                iter->leftover = ret;
3333
3334        } else {
3335                print_trace_line(iter);
3336                ret = trace_print_seq(m, &iter->seq);
3337                /*
3338                 * If we overflow the seq_file buffer, then it will
3339                 * ask us for this data again at start up.
3340                 * Use that instead.
3341                 *  ret is 0 if seq_file write succeeded.
3342                 *        -1 otherwise.
3343                 */
3344                iter->leftover = ret;
3345        }
3346
3347        return 0;
3348}
3349
3350/*
3351 * Should be used after trace_array_get(), trace_types_lock
3352 * ensures that i_cdev was already initialized.
3353 */
3354static inline int tracing_get_cpu(struct inode *inode)
3355{
3356        if (inode->i_cdev) /* See trace_create_cpu_file() */
3357                return (long)inode->i_cdev - 1;
3358        return RING_BUFFER_ALL_CPUS;
3359}
3360
3361static const struct seq_operations tracer_seq_ops = {
3362        .start          = s_start,
3363        .next           = s_next,
3364        .stop           = s_stop,
3365        .show           = s_show,
3366};
3367
3368static struct trace_iterator *
3369__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370{
3371        struct trace_array *tr = inode->i_private;
3372        struct trace_iterator *iter;
3373        int cpu;
3374
3375        if (tracing_disabled)
3376                return ERR_PTR(-ENODEV);
3377
3378        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379        if (!iter)
3380                return ERR_PTR(-ENOMEM);
3381
3382        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383                                    GFP_KERNEL);
3384        if (!iter->buffer_iter)
3385                goto release;
3386
3387        /*
3388         * We make a copy of the current tracer to avoid concurrent
3389         * changes on it while we are reading.
3390         */
3391        mutex_lock(&trace_types_lock);
3392        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393        if (!iter->trace)
3394                goto fail;
3395
3396        *iter->trace = *tr->current_trace;
3397
3398        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399                goto fail;
3400
3401        iter->tr = tr;
3402
3403#ifdef CONFIG_TRACER_MAX_TRACE
3404        /* Currently only the top directory has a snapshot */
3405        if (tr->current_trace->print_max || snapshot)
3406                iter->trace_buffer = &tr->max_buffer;
3407        else
3408#endif
3409                iter->trace_buffer = &tr->trace_buffer;
3410        iter->snapshot = snapshot;
3411        iter->pos = -1;
3412        iter->cpu_file = tracing_get_cpu(inode);
3413        mutex_init(&iter->mutex);
3414
3415        /* Notify the tracer early; before we stop tracing. */
3416        if (iter->trace && iter->trace->open)
3417                iter->trace->open(iter);
3418
3419        /* Annotate start of buffers if we had overruns */
3420        if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421                iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422
3423        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424        if (trace_clocks[tr->clock_id].in_ns)
3425                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426
3427        /* stop the trace while dumping if we are not opening "snapshot" */
3428        if (!iter->snapshot)
3429                tracing_stop_tr(tr);
3430
3431        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432                for_each_tracing_cpu(cpu) {
3433                        iter->buffer_iter[cpu] =
3434                                ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435                }
3436                ring_buffer_read_prepare_sync();
3437                for_each_tracing_cpu(cpu) {
3438                        ring_buffer_read_start(iter->buffer_iter[cpu]);
3439                        tracing_iter_reset(iter, cpu);
3440                }
3441        } else {
3442                cpu = iter->cpu_file;
3443                iter->buffer_iter[cpu] =
3444                        ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445                ring_buffer_read_prepare_sync();
3446                ring_buffer_read_start(iter->buffer_iter[cpu]);
3447                tracing_iter_reset(iter, cpu);
3448        }
3449
3450        mutex_unlock(&trace_types_lock);
3451
3452        return iter;
3453
3454 fail:
3455        mutex_unlock(&trace_types_lock);
3456        kfree(iter->trace);
3457        kfree(iter->buffer_iter);
3458release:
3459        seq_release_private(inode, file);
3460        return ERR_PTR(-ENOMEM);
3461}
3462
3463int tracing_open_generic(struct inode *inode, struct file *filp)
3464{
3465        if (tracing_disabled)
3466                return -ENODEV;
3467
3468        filp->private_data = inode->i_private;
3469        return 0;
3470}
3471
3472bool tracing_is_disabled(void)
3473{
3474        return (tracing_disabled) ? true: false;
3475}
3476
3477/*
3478 * Open and update trace_array ref count.
3479 * Must have the current trace_array passed to it.
3480 */
3481static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482{
3483        struct trace_array *tr = inode->i_private;
3484
3485        if (tracing_disabled)
3486                return -ENODEV;
3487
3488        if (trace_array_get(tr) < 0)
3489                return -ENODEV;
3490
3491        filp->private_data = inode->i_private;
3492
3493        return 0;
3494}
3495
3496static int tracing_release(struct inode *inode, struct file *file)
3497{
3498        struct trace_array *tr = inode->i_private;
3499        struct seq_file *m = file->private_data;
3500        struct trace_iterator *iter;
3501        int cpu;
3502
3503        if (!(file->f_mode & FMODE_READ)) {
3504                trace_array_put(tr);
3505                return 0;
3506        }
3507
3508        /* Writes do not use seq_file */
3509        iter = m->private;
3510        mutex_lock(&trace_types_lock);
3511
3512        for_each_tracing_cpu(cpu) {
3513                if (iter->buffer_iter[cpu])
3514                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515        }
3516
3517        if (iter->trace && iter->trace->close)
3518                iter->trace->close(iter);
3519
3520        if (!iter->snapshot)
3521                /* reenable tracing if it was previously enabled */
3522                tracing_start_tr(tr);
3523
3524        __trace_array_put(tr);
3525
3526        mutex_unlock(&trace_types_lock);
3527
3528        mutex_destroy(&iter->mutex);
3529        free_cpumask_var(iter->started);
3530        kfree(iter->trace);
3531        kfree(iter->buffer_iter);
3532        seq_release_private(inode, file);
3533
3534        return 0;
3535}
3536
3537static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538{
3539        struct trace_array *tr = inode->i_private;
3540
3541        trace_array_put(tr);
3542        return 0;
3543}
3544
3545static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546{
3547        struct trace_array *tr = inode->i_private;
3548
3549        trace_array_put(tr);
3550
3551        return single_release(inode, file);
3552}
3553
3554static int tracing_open(struct inode *inode, struct file *file)
3555{
3556        struct trace_array *tr = inode->i_private;
3557        struct trace_iterator *iter;
3558        int ret = 0;
3559
3560        if (trace_array_get(tr) < 0)
3561                return -ENODEV;
3562
3563        /* If this file was open for write, then erase contents */
3564        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565                int cpu = tracing_get_cpu(inode);
3566
3567                if (cpu == RING_BUFFER_ALL_CPUS)
3568                        tracing_reset_online_cpus(&tr->trace_buffer);
3569                else
3570                        tracing_reset(&tr->trace_buffer, cpu);
3571        }
3572
3573        if (file->f_mode & FMODE_READ) {
3574                iter = __tracing_open(inode, file, false);
3575                if (IS_ERR(iter))
3576                        ret = PTR_ERR(iter);
3577                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579        }
3580
3581        if (ret < 0)
3582                trace_array_put(tr);
3583
3584        return ret;
3585}
3586
3587/*
3588 * Some tracers are not suitable for instance buffers.
3589 * A tracer is always available for the global array (toplevel)
3590 * or if it explicitly states that it is.
3591 */
3592static bool
3593trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594{
3595        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596}
3597
3598/* Find the next tracer that this trace array may use */
3599static struct tracer *
3600get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601{
3602        while (t && !trace_ok_for_array(t, tr))
3603                t = t->next;
3604
3605        return t;
3606}
3607
3608static void *
3609t_next(struct seq_file *m, void *v, loff_t *pos)
3610{
3611        struct trace_array *tr = m->private;
3612        struct tracer *t = v;
3613
3614        (*pos)++;
3615
3616        if (t)
3617                t = get_tracer_for_array(tr, t->next);
3618
3619        return t;
3620}
3621
3622static void *t_start(struct seq_file *m, loff_t *pos)
3623{
3624        struct trace_array *tr = m->private;
3625        struct tracer *t;
3626        loff_t l = 0;
3627
3628        mutex_lock(&trace_types_lock);
3629
3630        t = get_tracer_for_array(tr, trace_types);
3631        for (; t && l < *pos; t = t_next(m, t, &l))
3632                        ;
3633
3634        return t;
3635}
3636
3637static void t_stop(struct seq_file *m, void *p)
3638{
3639        mutex_unlock(&trace_types_lock);
3640}
3641
3642static int t_show(struct seq_file *m, void *v)
3643{
3644        struct tracer *t = v;
3645
3646        if (!t)
3647                return 0;
3648
3649        seq_puts(m, t->name);
3650        if (t->next)
3651                seq_putc(m, ' ');
3652        else
3653                seq_putc(m, '\n');
3654
3655        return 0;
3656}
3657
3658static const struct seq_operations show_traces_seq_ops = {
3659        .start          = t_start,
3660        .next           = t_next,
3661        .stop           = t_stop,
3662        .show           = t_show,
3663};
3664
3665static int show_traces_open(struct inode *inode, struct file *file)
3666{
3667        struct trace_array *tr = inode->i_private;
3668        struct seq_file *m;
3669        int ret;
3670
3671        if (tracing_disabled)
3672                return -ENODEV;
3673
3674        ret = seq_open(file, &show_traces_seq_ops);
3675        if (ret)
3676                return ret;
3677
3678        m = file->private_data;
3679        m->private = tr;
3680
3681        return 0;
3682}
3683
3684static ssize_t
3685tracing_write_stub(struct file *filp, const char __user *ubuf,
3686                   size_t count, loff_t *ppos)
3687{
3688        return count;
3689}
3690
3691loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692{
3693        int ret;
3694
3695        if (file->f_mode & FMODE_READ)
3696                ret = seq_lseek(file, offset, whence);
3697        else
3698                file->f_pos = ret = 0;
3699
3700        return ret;
3701}
3702
3703static const struct file_operations tracing_fops = {
3704        .open           = tracing_open,
3705        .read           = seq_read,
3706        .write          = tracing_write_stub,
3707        .llseek         = tracing_lseek,
3708        .release        = tracing_release,
3709};
3710
3711static const struct file_operations show_traces_fops = {
3712        .open           = show_traces_open,
3713        .read           = seq_read,
3714        .release        = seq_release,
3715        .llseek         = seq_lseek,
3716};
3717
3718/*
3719 * The tracer itself will not take this lock, but still we want
3720 * to provide a consistent cpumask to user-space:
3721 */
3722static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723
3724/*
3725 * Temporary storage for the character representation of the
3726 * CPU bitmask (and one more byte for the newline):
3727 */
3728static char mask_str[NR_CPUS + 1];
3729
3730static ssize_t
3731tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732                     size_t count, loff_t *ppos)
3733{
3734        struct trace_array *tr = file_inode(filp)->i_private;
3735        int len;
3736
3737        mutex_lock(&tracing_cpumask_update_lock);
3738
3739        len = snprintf(mask_str, count, "%*pb\n",
3740                       cpumask_pr_args(tr->tracing_cpumask));
3741        if (len >= count) {
3742                count = -EINVAL;
3743                goto out_err;
3744        }
3745        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746
3747out_err:
3748        mutex_unlock(&tracing_cpumask_update_lock);
3749
3750        return count;
3751}
3752
3753static ssize_t
3754tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755                      size_t count, loff_t *ppos)
3756{
3757        struct trace_array *tr = file_inode(filp)->i_private;
3758        cpumask_var_t tracing_cpumask_new;
3759        int err, cpu;
3760
3761        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762                return -ENOMEM;
3763
3764        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765        if (err)
3766                goto err_unlock;
3767
3768        mutex_lock(&tracing_cpumask_update_lock);
3769
3770        local_irq_disable();
3771        arch_spin_lock(&tr->max_lock);
3772        for_each_tracing_cpu(cpu) {
3773                /*
3774                 * Increase/decrease the disabled counter if we are
3775                 * about to flip a bit in the cpumask:
3776                 */
3777                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779                        atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780                        ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781                }
3782                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784                        atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785                        ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786                }
3787        }
3788        arch_spin_unlock(&tr->max_lock);
3789        local_irq_enable();
3790
3791        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792
3793        mutex_unlock(&tracing_cpumask_update_lock);
3794        free_cpumask_var(tracing_cpumask_new);
3795
3796        return count;
3797
3798err_unlock:
3799        free_cpumask_var(tracing_cpumask_new);
3800
3801        return err;
3802}
3803
3804static const struct file_operations tracing_cpumask_fops = {
3805        .open           = tracing_open_generic_tr,
3806        .read           = tracing_cpumask_read,
3807        .write          = tracing_cpumask_write,
3808        .release        = tracing_release_generic_tr,
3809        .llseek         = generic_file_llseek,
3810};
3811
3812static int tracing_trace_options_show(struct seq_file *m, void *v)
3813{
3814        struct tracer_opt *trace_opts;
3815        struct trace_array *tr = m->private;
3816        u32 tracer_flags;
3817        int i;
3818
3819        mutex_lock(&trace_types_lock);
3820        tracer_flags = tr->current_trace->flags->val;
3821        trace_opts = tr->current_trace->flags->opts;
3822
3823        for (i = 0; trace_options[i]; i++) {
3824                if (tr->trace_flags & (1 << i))
3825                        seq_printf(m, "%s\n", trace_options[i]);
3826                else
3827                        seq_printf(m, "no%s\n", trace_options[i]);
3828        }
3829
3830        for (i = 0; trace_opts[i].name; i++) {
3831                if (tracer_flags & trace_opts[i].bit)
3832                        seq_printf(m, "%s\n", trace_opts[i].name);
3833                else
3834                        seq_printf(m, "no%s\n", trace_opts[i].name);
3835        }
3836        mutex_unlock(&trace_types_lock);
3837
3838        return 0;
3839}
3840
3841static int __set_tracer_option(struct trace_array *tr,
3842                               struct tracer_flags *tracer_flags,
3843                               struct tracer_opt *opts, int neg)
3844{
3845        struct tracer *trace = tracer_flags->trace;
3846        int ret;
3847
3848        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849        if (ret)
3850                return ret;
3851
3852        if (neg)
3853                tracer_flags->val &= ~opts->bit;
3854        else
3855                tracer_flags->val |= opts->bit;
3856        return 0;
3857}
3858
3859/* Try to assign a tracer specific option */
3860static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861{
3862        struct tracer *trace = tr->current_trace;
3863        struct tracer_flags *tracer_flags = trace->flags;
3864        struct tracer_opt *opts = NULL;
3865        int i;
3866
3867        for (i = 0; tracer_flags->opts[i].name; i++) {
3868                opts = &tracer_flags->opts[i];
3869
3870                if (strcmp(cmp, opts->name) == 0)
3871                        return __set_tracer_option(tr, trace->flags, opts, neg);
3872        }
3873
3874        return -EINVAL;
3875}
3876
3877/* Some tracers require overwrite to stay enabled */
3878int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879{
3880        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881                return -1;
3882
3883        return 0;
3884}
3885
3886int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887{
3888        /* do nothing if flag is already set */
3889        if (!!(tr->trace_flags & mask) == !!enabled)
3890                return 0;
3891
3892        /* Give the tracer a chance to approve the change */
3893        if (tr->current_trace->flag_changed)
3894                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895                        return -EINVAL;
3896
3897        if (enabled)
3898                tr->trace_flags |= mask;
3899        else
3900                tr->trace_flags &= ~mask;
3901
3902        if (mask == TRACE_ITER_RECORD_CMD)
3903                trace_event_enable_cmd_record(enabled);
3904
3905        if (mask == TRACE_ITER_EVENT_FORK)
3906                trace_event_follow_fork(tr, enabled);
3907
3908        if (mask == TRACE_ITER_OVERWRITE) {
3909                ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910#ifdef CONFIG_TRACER_MAX_TRACE
3911                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912#endif
3913        }
3914
3915        if (mask == TRACE_ITER_PRINTK) {
3916                trace_printk_start_stop_comm(enabled);
3917                trace_printk_control(enabled);
3918        }
3919
3920        return 0;
3921}
3922
3923static int trace_set_options(struct trace_array *tr, char *option)
3924{
3925        char *cmp;
3926        int neg = 0;
3927        int ret = -ENODEV;
3928        int i;
3929        size_t orig_len = strlen(option);
3930
3931        cmp = strstrip(option);
3932
3933        if (strncmp(cmp, "no", 2) == 0) {
3934                neg = 1;
3935                cmp += 2;
3936        }
3937
3938        mutex_lock(&trace_types_lock);
3939
3940        for (i = 0; trace_options[i]; i++) {
3941                if (strcmp(cmp, trace_options[i]) == 0) {
3942                        ret = set_tracer_flag(tr, 1 << i, !neg);
3943                        break;
3944                }
3945        }
3946
3947        /* If no option could be set, test the specific tracer options */
3948        if (!trace_options[i])
3949                ret = set_tracer_option(tr, cmp, neg);
3950
3951        mutex_unlock(&trace_types_lock);
3952
3953        /*
3954         * If the first trailing whitespace is replaced with '\0' by strstrip,
3955         * turn it back into a space.
3956         */
3957        if (orig_len > strlen(option))
3958                option[strlen(option)] = ' ';
3959
3960        return ret;
3961}
3962
3963static void __init apply_trace_boot_options(void)
3964{
3965        char *buf = trace_boot_options_buf;
3966        char *option;
3967
3968        while (true) {
3969                option = strsep(&buf, ",");
3970
3971                if (!option)
3972                        break;
3973
3974                if (*option)
3975                        trace_set_options(&global_trace, option);
3976
3977                /* Put back the comma to allow this to be called again */
3978                if (buf)
3979                        *(buf - 1) = ',';
3980        }
3981}
3982
3983static ssize_t
3984tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985                        size_t cnt, loff_t *ppos)
3986{
3987        struct seq_file *m = filp->private_data;
3988        struct trace_array *tr = m->private;
3989        char buf[64];
3990        int ret;
3991
3992        if (cnt >= sizeof(buf))
3993                return -EINVAL;
3994
3995        if (copy_from_user(buf, ubuf, cnt))
3996                return -EFAULT;
3997
3998        buf[cnt] = 0;
3999
4000        ret = trace_set_options(tr, buf);
4001        if (ret < 0)
4002                return ret;
4003
4004        *ppos += cnt;
4005
4006        return cnt;
4007}
4008
4009static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010{
4011        struct trace_array *tr = inode->i_private;
4012        int ret;
4013
4014        if (tracing_disabled)
4015                return -ENODEV;
4016
4017        if (trace_array_get(tr) < 0)
4018                return -ENODEV;
4019
4020        ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021        if (ret < 0)
4022                trace_array_put(tr);
4023
4024        return ret;
4025}
4026
4027static const struct file_operations tracing_iter_fops = {
4028        .open           = tracing_trace_options_open,
4029        .read           = seq_read,
4030        .llseek         = seq_lseek,
4031        .release        = tracing_single_release_tr,
4032        .write          = tracing_trace_options_write,
4033};
4034
4035static const char readme_msg[] =
4036        "tracing mini-HOWTO:\n\n"
4037        "# echo 0 > tracing_on : quick way to disable tracing\n"
4038        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039        " Important files:\n"
4040        "  trace\t\t\t- The static contents of the buffer\n"
4041        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043        "  current_tracer\t- function and latency tracers\n"
4044        "  available_tracers\t- list of configured tracers for current_tracer\n"
4045        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047        "  trace_clock\t\t-change the clock used to order events\n"
4048        "       local:   Per cpu clock but may not be synced across CPUs\n"
4049        "      global:   Synced across CPUs but slows tracing down.\n"
4050        "     counter:   Not a clock, but just an increment\n"
4051        "      uptime:   Jiffy counter from time of boot\n"
4052        "        perf:   Same clock that perf events use\n"
4053#ifdef CONFIG_X86_64
4054        "     x86-tsc:   TSC cycle counter\n"
4055#endif
4056        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057        "  tracing_cpumask\t- Limit which CPUs to trace\n"
4058        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4059        "\t\t\t  Remove sub-buffer with rmdir\n"
4060        "  trace_options\t\t- Set format or modify how tracing happens\n"
4061        "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4062        "\t\t\t  option name\n"
4063        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4064#ifdef CONFIG_DYNAMIC_FTRACE
4065        "\n  available_filter_functions - list of functions that can be filtered on\n"
4066        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4067        "\t\t\t  functions\n"
4068        "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4069        "\t     modules: Can select a group via module\n"
4070        "\t      Format: :mod:<module-name>\n"
4071        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4072        "\t    triggers: a command to perform when function is hit\n"
4073        "\t      Format: <function>:<trigger>[:count]\n"
4074        "\t     trigger: traceon, traceoff\n"
4075        "\t\t      enable_event:<system>:<event>\n"
4076        "\t\t      disable_event:<system>:<event>\n"
4077#ifdef CONFIG_STACKTRACE
4078        "\t\t      stacktrace\n"
4079#endif
4080#ifdef CONFIG_TRACER_SNAPSHOT
4081        "\t\t      snapshot\n"
4082#endif
4083        "\t\t      dump\n"
4084        "\t\t      cpudump\n"
4085        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4086        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4087        "\t     The first one will disable tracing every time do_fault is hit\n"
4088        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4089        "\t       The first time do trap is hit and it disables tracing, the\n"
4090        "\t       counter will decrement to 2. If tracing is already disabled,\n"
4091        "\t       the counter will not decrement. It only decrements when the\n"
4092        "\t       trigger did work\n"
4093        "\t     To remove trigger without count:\n"
4094        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4095        "\t     To remove trigger with a count:\n"
4096        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4097        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4098        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4099        "\t    modules: Can select a group via module command :mod:\n"
4100        "\t    Does not accept triggers\n"
4101#endif /* CONFIG_DYNAMIC_FTRACE */
4102#ifdef CONFIG_FUNCTION_TRACER
4103        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4104        "\t\t    (function)\n"
4105#endif
4106#ifdef CONFIG_FUNCTION_GRAPH_TRACER
4107        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4108        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4109        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4110#endif
4111#ifdef CONFIG_TRACER_SNAPSHOT
4112        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4113        "\t\t\t  snapshot buffer. Read the contents for more\n"
4114        "\t\t\t  information\n"
4115#endif
4116#ifdef CONFIG_STACK_TRACER
4117        "  stack_trace\t\t- Shows the max stack trace when active\n"
4118        "  stack_max_size\t- Shows current max stack size that was traced\n"
4119        "\t\t\t  Write into this file to reset the max size (trigger a\n"
4120        "\t\t\t  new trace)\n"
4121#ifdef CONFIG_DYNAMIC_FTRACE
4122        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4123        "\t\t\t  traces\n"
4124#endif
4125#endif /* CONFIG_STACK_TRACER */
4126        "  events/\t\t- Directory containing all trace event subsystems:\n"
4127        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4128        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4129        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4130        "\t\t\t  events\n"
4131        "      filter\t\t- If set, only events passing filter are traced\n"
4132        "  events/<system>/<event>/\t- Directory containing control files for\n"
4133        "\t\t\t  <event>:\n"
4134        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4135        "      filter\t\t- If set, only events passing filter are traced\n"
4136        "      trigger\t\t- If set, a command to perform when event is hit\n"
4137        "\t    Format: <trigger>[:count][if <filter>]\n"
4138        "\t   trigger: traceon, traceoff\n"
4139        "\t            enable_event:<system>:<event>\n"
4140        "\t            disable_event:<system>:<event>\n"
4141#ifdef CONFIG_HIST_TRIGGERS
4142        "\t            enable_hist:<system>:<event>\n"
4143        "\t            disable_hist:<system>:<event>\n"
4144#endif
4145#ifdef CONFIG_STACKTRACE
4146        "\t\t    stacktrace\n"
4147#endif
4148#ifdef CONFIG_TRACER_SNAPSHOT
4149        "\t\t    snapshot\n"
4150#endif
4151#ifdef CONFIG_HIST_TRIGGERS
4152        "\t\t    hist (see below)\n"
4153#endif
4154        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4155        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4156        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4157        "\t                  events/block/block_unplug/trigger\n"
4158        "\t   The first disables tracing every time block_unplug is hit.\n"
4159        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4160        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4161        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4162        "\t   Like function triggers, the counter is only decremented if it\n"
4163        "\t    enabled or disabled tracing.\n"
4164        "\t   To remove a trigger without a count:\n"
4165        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4166        "\t   To remove a trigger with a count:\n"
4167        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4168        "\t   Filters can be ignored when removing a trigger.\n"
4169#ifdef CONFIG_HIST_TRIGGERS
4170        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4171        "\t    Format: hist:keys=<field1[,field2,...]>\n"
4172        "\t            [:values=<field1[,field2,...]>]\n"
4173        "\t            [:sort=<field1[,field2,...]>]\n"
4174        "\t            [:size=#entries]\n"
4175        "\t            [:pause][:continue][:clear]\n"
4176        "\t            [:name=histname1]\n"
4177        "\t            [if <filter>]\n\n"
4178        "\t    When a matching event is hit, an entry is added to a hash\n"
4179        "\t    table using the key(s) and value(s) named, and the value of a\n"
4180        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4181        "\t    correspond to fields in the event's format description.  Keys\n"
4182        "\t    can be any field, or the special string 'stacktrace'.\n"
4183        "\t    Compound keys consisting of up to two fields can be specified\n"
4184        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4185        "\t    fields.  Sort keys consisting of up to two fields can be\n"
4186        "\t    specified using the 'sort' keyword.  The sort direction can\n"
4187        "\t    be modified by appending '.descending' or '.ascending' to a\n"
4188        "\t    sort field.  The 'size' parameter can be used to specify more\n"
4189        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4190        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4191        "\t    its histogram data will be shared with other triggers of the\n"
4192        "\t    same name, and trigger hits will update this common data.\n\n"
4193        "\t    Reading the 'hist' file for the event will dump the hash\n"
4194        "\t    table in its entirety to stdout.  If there are multiple hist\n"
4195        "\t    triggers attached to an event, there will be a table for each\n"
4196        "\t    trigger in the output.  The table displayed for a named\n"
4197        "\t    trigger will be the same as any other instance having the\n"
4198        "\t    same name.  The default format used to display a given field\n"
4199        "\t    can be modified by appending any of the following modifiers\n"
4200        "\t    to the field name, as applicable:\n\n"
4201        "\t            .hex        display a number as a hex value\n"
4202        "\t            .sym        display an address as a symbol\n"
4203        "\t            .sym-offset display an address as a symbol and offset\n"
4204        "\t            .execname   display a common_pid as a program name\n"
4205        "\t            .syscall    display a syscall id as a syscall name\n\n"
4206        "\t            .log2       display log2 value rather than raw number\n\n"
4207        "\t    The 'pause' parameter can be used to pause an existing hist\n"
4208        "\t    trigger or to start a hist trigger but not log any events\n"
4209        "\t    until told to do so.  'continue' can be used to start or\n"
4210        "\t    restart a paused hist trigger.\n\n"
4211        "\t    The 'clear' parameter will clear the contents of a running\n"
4212        "\t    hist trigger and leave its current paused/active state\n"
4213        "\t    unchanged.\n\n"
4214        "\t    The enable_hist and disable_hist triggers can be used to\n"
4215        "\t    have one event conditionally start and stop another event's\n"
4216        "\t    already-attached hist trigger.  The syntax is analagous to\n"
4217        "\t    the enable_event and disable_event triggers.\n"
4218#endif
4219;
4220
4221static ssize_t
4222tracing_readme_read(struct file *filp, char __user *ubuf,
4223                       size_t cnt, loff_t *ppos)
4224{
4225        return simple_read_from_buffer(ubuf, cnt, ppos,
4226                                        readme_msg, strlen(readme_msg));
4227}
4228
4229static const struct file_operations tracing_readme_fops = {
4230        .open           = tracing_open_generic,
4231        .read           = tracing_readme_read,
4232        .llseek         = generic_file_llseek,
4233};
4234
4235static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4236{
4237        unsigned int *ptr = v;
4238
4239        if (*pos || m->count)
4240                ptr++;
4241
4242        (*pos)++;
4243
4244        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4245             ptr++) {
4246                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4247                        continue;
4248
4249                return ptr;
4250        }
4251
4252        return NULL;
4253}
4254
4255static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4256{
4257        void *v;
4258        loff_t l = 0;
4259
4260        preempt_disable();
4261        arch_spin_lock(&trace_cmdline_lock);
4262
4263        v = &savedcmd->map_cmdline_to_pid[0];
4264        while (l <= *pos) {
4265                v = saved_cmdlines_next(m, v, &l);
4266                if (!v)
4267                        return NULL;
4268        }
4269
4270        return v;
4271}
4272
4273static void saved_cmdlines_stop(struct seq_file *m, void *v)
4274{
4275        arch_spin_unlock(&trace_cmdline_lock);
4276        preempt_enable();
4277}
4278
4279static int saved_cmdlines_show(struct seq_file *m, void *v)
4280{
4281        char buf[TASK_COMM_LEN];
4282        unsigned int *pid = v;
4283
4284        __trace_find_cmdline(*pid, buf);
4285        seq_printf(m, "%d %s\n", *pid, buf);
4286        return 0;
4287}
4288
4289static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4290        .start          = saved_cmdlines_start,
4291        .next           = saved_cmdlines_next,
4292        .stop           = saved_cmdlines_stop,
4293        .show           = saved_cmdlines_show,
4294};
4295
4296static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4297{
4298        if (tracing_disabled)
4299                return -ENODEV;
4300
4301        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4302}
4303
4304static const struct file_operations tracing_saved_cmdlines_fops = {
4305        .open           = tracing_saved_cmdlines_open,
4306        .read           = seq_read,
4307        .llseek         = seq_lseek,
4308        .release        = seq_release,
4309};
4310
4311static ssize_t
4312tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4313                                 size_t cnt, loff_t *ppos)
4314{
4315        char buf[64];
4316        int r;
4317
4318        arch_spin_lock(&trace_cmdline_lock);
4319        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4320        arch_spin_unlock(&trace_cmdline_lock);
4321
4322        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4323}
4324
4325static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4326{
4327        kfree(s->saved_cmdlines);
4328        kfree(s->map_cmdline_to_pid);
4329        kfree(s);
4330}
4331
4332static int tracing_resize_saved_cmdlines(unsigned int val)
4333{
4334        struct saved_cmdlines_buffer *s, *savedcmd_temp;
4335
4336        s = kmalloc(sizeof(*s), GFP_KERNEL);
4337        if (!s)
4338                return -ENOMEM;
4339
4340        if (allocate_cmdlines_buffer(val, s) < 0) {
4341                kfree(s);
4342                return -ENOMEM;
4343        }
4344
4345        arch_spin_lock(&trace_cmdline_lock);
4346        savedcmd_temp = savedcmd;
4347        savedcmd = s;
4348        arch_spin_unlock(&trace_cmdline_lock);
4349        free_saved_cmdlines_buffer(savedcmd_temp);
4350
4351        return 0;
4352}
4353
4354static ssize_t
4355tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4356                                  size_t cnt, loff_t *ppos)
4357{
4358        unsigned long val;
4359        int ret;
4360
4361        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4362        if (ret)
4363                return ret;
4364
4365        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4366        if (!val || val > PID_MAX_DEFAULT)
4367                return -EINVAL;
4368
4369        ret = tracing_resize_saved_cmdlines((unsigned int)val);
4370        if (ret < 0)
4371                return ret;
4372
4373        *ppos += cnt;
4374
4375        return cnt;
4376}
4377
4378static const struct file_operations tracing_saved_cmdlines_size_fops = {
4379        .open           = tracing_open_generic,
4380        .read           = tracing_saved_cmdlines_size_read,
4381        .write          = tracing_saved_cmdlines_size_write,
4382};
4383
4384#ifdef CONFIG_TRACE_ENUM_MAP_FILE
4385static union trace_enum_map_item *
4386update_enum_map(union trace_enum_map_item *ptr)
4387{
4388        if (!ptr->map.enum_string) {
4389                if (ptr->tail.next) {
4390                        ptr = ptr->tail.next;
4391                        /* Set ptr to the next real item (skip head) */
4392                        ptr++;
4393                } else
4394                        return NULL;
4395        }
4396        return ptr;
4397}
4398
4399static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4400{
4401        union trace_enum_map_item *ptr = v;
4402
4403        /*
4404         * Paranoid! If ptr points to end, we don't want to increment past it.
4405         * This really should never happen.
4406         */
4407        ptr = update_enum_map(ptr);
4408        if (WARN_ON_ONCE(!ptr))
4409                return NULL;
4410
4411        ptr++;
4412
4413        (*pos)++;
4414
4415        ptr = update_enum_map(ptr);
4416
4417        return ptr;
4418}
4419
4420static void *enum_map_start(struct seq_file *m, loff_t *pos)
4421{
4422        union trace_enum_map_item *v;
4423        loff_t l = 0;
4424
4425        mutex_lock(&trace_enum_mutex);
4426
4427        v = trace_enum_maps;
4428        if (v)
4429                v++;
4430
4431        while (v && l < *pos) {
4432                v = enum_map_next(m, v, &l);
4433        }
4434
4435        return v;
4436}
4437
4438static void enum_map_stop(struct seq_file *m, void *v)
4439{
4440        mutex_unlock(&trace_enum_mutex);
4441}
4442
4443static int enum_map_show(struct seq_file *m, void *v)
4444{
4445        union trace_enum_map_item *ptr = v;
4446
4447        seq_printf(m, "%s %ld (%s)\n",
4448                   ptr->map.enum_string, ptr->map.enum_value,
4449                   ptr->map.system);
4450
4451        return 0;
4452}
4453
4454static const struct seq_operations tracing_enum_map_seq_ops = {
4455        .start          = enum_map_start,
4456        .next           = enum_map_next,
4457        .stop           = enum_map_stop,
4458        .show           = enum_map_show,
4459};
4460
4461static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4462{
4463        if (tracing_disabled)
4464                return -ENODEV;
4465
4466        return seq_open(filp, &tracing_enum_map_seq_ops);
4467}
4468
4469static const struct file_operations tracing_enum_map_fops = {
4470        .open           = tracing_enum_map_open,
4471        .read           = seq_read,
4472        .llseek         = seq_lseek,
4473        .release        = seq_release,
4474};
4475
4476static inline union trace_enum_map_item *
4477trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4478{
4479        /* Return tail of array given the head */
4480        return ptr + ptr->head.length + 1;
4481}
4482
4483static void
4484trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4485                           int len)
4486{
4487        struct trace_enum_map **stop;
4488        struct trace_enum_map **map;
4489        union trace_enum_map_item *map_array;
4490        union trace_enum_map_item *ptr;
4491
4492        stop = start + len;
4493
4494        /*
4495         * The trace_enum_maps contains the map plus a head and tail item,
4496         * where the head holds the module and length of array, and the
4497         * tail holds a pointer to the next list.
4498         */
4499        map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4500        if (!map_array) {
4501                pr_warn("Unable to allocate trace enum mapping\n");
4502                return;
4503        }
4504
4505        mutex_lock(&trace_enum_mutex);
4506
4507        if (!trace_enum_maps)
4508                trace_enum_maps = map_array;
4509        else {
4510                ptr = trace_enum_maps;
4511                for (;;) {
4512                        ptr = trace_enum_jmp_to_tail(ptr);
4513                        if (!ptr->tail.next)
4514                                break;
4515                        ptr = ptr->tail.next;
4516
4517                }
4518                ptr->tail.next = map_array;
4519        }
4520        map_array->head.mod = mod;
4521        map_array->head.length = len;
4522        map_array++;
4523
4524        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4525                map_array->map = **map;
4526                map_array++;
4527        }
4528        memset(map_array, 0, sizeof(*map_array));
4529
4530        mutex_unlock(&trace_enum_mutex);
4531}
4532
4533static void trace_create_enum_file(struct dentry *d_tracer)
4534{
4535        trace_create_file("enum_map", 0444, d_tracer,
4536                          NULL, &tracing_enum_map_fops);
4537}
4538
4539#else /* CONFIG_TRACE_ENUM_MAP_FILE */
4540static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4541static inline void trace_insert_enum_map_file(struct module *mod,
4542                              struct trace_enum_map **start, int len) { }
4543#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4544
4545static void trace_insert_enum_map(struct module *mod,
4546                                  struct trace_enum_map **start, int len)
4547{
4548        struct trace_enum_map **map;
4549
4550        if (len <= 0)
4551                return;
4552
4553        map = start;
4554
4555        trace_event_enum_update(map, len);
4556
4557        trace_insert_enum_map_file(mod, start, len);
4558}
4559
4560static ssize_t
4561tracing_set_trace_read(struct file *filp, char __user *ubuf,
4562                       size_t cnt, loff_t *ppos)
4563{
4564        struct trace_array *tr = filp->private_data;
4565        char buf[MAX_TRACER_SIZE+2];
4566        int r;
4567
4568        mutex_lock(&trace_types_lock);
4569        r = sprintf(buf, "%s\n", tr->current_trace->name);
4570        mutex_unlock(&trace_types_lock);
4571
4572        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4573}
4574
4575int tracer_init(struct tracer *t, struct trace_array *tr)
4576{
4577        tracing_reset_online_cpus(&tr->trace_buffer);
4578        return t->init(tr);
4579}
4580
4581static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4582{
4583        int cpu;
4584
4585        for_each_tracing_cpu(cpu)
4586                per_cpu_ptr(buf->data, cpu)->entries = val;
4587}
4588
4589#ifdef CONFIG_TRACER_MAX_TRACE
4590/* resize @tr's buffer to the size of @size_tr's entries */
4591static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4592                                        struct trace_buffer *size_buf, int cpu_id)
4593{
4594        int cpu, ret = 0;
4595
4596        if (cpu_id == RING_BUFFER_ALL_CPUS) {
4597                for_each_tracing_cpu(cpu) {
4598                        ret = ring_buffer_resize(trace_buf->buffer,
4599                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4600                        if (ret < 0)
4601                                break;
4602                        per_cpu_ptr(trace_buf->data, cpu)->entries =
4603                                per_cpu_ptr(size_buf->data, cpu)->entries;
4604                }
4605        } else {
4606                ret = ring_buffer_resize(trace_buf->buffer,
4607                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4608                if (ret == 0)
4609                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4610                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
4611        }
4612
4613        return ret;
4614}
4615#endif /* CONFIG_TRACER_MAX_TRACE */
4616
4617static int __tracing_resize_ring_buffer(struct trace_array *tr,
4618                                        unsigned long size, int cpu)
4619{
4620        int ret;
4621
4622        /*
4623         * If kernel or user changes the size of the ring buffer
4624         * we use the size that was given, and we can forget about
4625         * expanding it later.
4626         */
4627        ring_buffer_expanded = true;
4628
4629        /* May be called before buffers are initialized */
4630        if (!tr->trace_buffer.buffer)
4631                return 0;
4632
4633        ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4634        if (ret < 0)
4635                return ret;
4636
4637#ifdef CONFIG_TRACER_MAX_TRACE
4638        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4639            !tr->current_trace->use_max_tr)
4640                goto out;
4641
4642        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4643        if (ret < 0) {
4644                int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4645                                                     &tr->trace_buffer, cpu);
4646                if (r < 0) {
4647                        /*
4648                         * AARGH! We are left with different
4649                         * size max buffer!!!!
4650                         * The max buffer is our "snapshot" buffer.
4651                         * When a tracer needs a snapshot (one of the
4652                         * latency tracers), it swaps the max buffer
4653                         * with the saved snap shot. We succeeded to
4654                         * update the size of the main buffer, but failed to
4655                         * update the size of the max buffer. But when we tried
4656                         * to reset the main buffer to the original size, we
4657                         * failed there too. This is very unlikely to
4658                         * happen, but if it does, warn and kill all
4659                         * tracing.
4660                         */
4661                        WARN_ON(1);
4662                        tracing_disabled = 1;
4663                }
4664                return ret;
4665        }
4666
4667        if (cpu == RING_BUFFER_ALL_CPUS)
4668                set_buffer_entries(&tr->max_buffer, size);
4669        else
4670                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4671
4672 out:
4673#endif /* CONFIG_TRACER_MAX_TRACE */
4674
4675        if (cpu == RING_BUFFER_ALL_CPUS)
4676                set_buffer_entries(&tr->trace_buffer, size);
4677        else
4678                per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4679
4680        return ret;
4681}
4682
4683static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4684                                          unsigned long size, int cpu_id)
4685{
4686        int ret = size;
4687
4688        mutex_lock(&trace_types_lock);
4689
4690        if (cpu_id != RING_BUFFER_ALL_CPUS) {
4691                /* make sure, this cpu is enabled in the mask */
4692                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4693                        ret = -EINVAL;
4694                        goto out;
4695                }
4696        }
4697
4698        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4699        if (ret < 0)
4700                ret = -ENOMEM;
4701
4702out:
4703        mutex_unlock(&trace_types_lock);
4704
4705        return ret;
4706}
4707
4708
4709/**
4710 * tracing_update_buffers - used by tracing facility to expand ring buffers
4711 *
4712 * To save on memory when the tracing is never used on a system with it
4713 * configured in. The ring buffers are set to a minimum size. But once
4714 * a user starts to use the tracing facility, then they need to grow
4715 * to their default size.
4716 *
4717 * This function is to be called when a tracer is about to be used.
4718 */
4719int tracing_update_buffers(void)
4720{
4721        int ret = 0;
4722
4723        mutex_lock(&trace_types_lock);
4724        if (!ring_buffer_expanded)
4725                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4726                                                RING_BUFFER_ALL_CPUS);
4727        mutex_unlock(&trace_types_lock);
4728
4729        return ret;
4730}
4731
4732struct trace_option_dentry;
4733
4734static void
4735create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4736
4737/*
4738 * Used to clear out the tracer before deletion of an instance.
4739 * Must have trace_types_lock held.
4740 */
4741static void tracing_set_nop(struct trace_array *tr)
4742{
4743        if (tr->current_trace == &nop_trace)
4744                return;
4745        
4746        tr->current_trace->enabled--;
4747
4748        if (tr->current_trace->reset)
4749                tr->current_trace->reset(tr);
4750
4751        tr->current_trace = &nop_trace;
4752}
4753
4754static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4755{
4756        /* Only enable if the directory has been created already. */
4757        if (!tr->dir)
4758                return;
4759
4760        create_trace_option_files(tr, t);
4761}
4762
4763static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4764{
4765        struct tracer *t;
4766#ifdef CONFIG_TRACER_MAX_TRACE
4767        bool had_max_tr;
4768#endif
4769        int ret = 0;
4770
4771        mutex_lock(&trace_types_lock);
4772
4773        if (!ring_buffer_expanded) {
4774                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4775                                                RING_BUFFER_ALL_CPUS);
4776                if (ret < 0)
4777                        goto out;
4778                ret = 0;
4779        }
4780
4781        for (t = trace_types; t; t = t->next) {
4782                if (strcmp(t->name, buf) == 0)
4783                        break;
4784        }
4785        if (!t) {
4786                ret = -EINVAL;
4787                goto out;
4788        }
4789        if (t == tr->current_trace)
4790                goto out;
4791
4792        /* Some tracers are only allowed for the top level buffer */
4793        if (!trace_ok_for_array(t, tr)) {
4794                ret = -EINVAL;
4795                goto out;
4796        }
4797
4798        /* If trace pipe files are being read, we can't change the tracer */
4799        if (tr->current_trace->ref) {
4800                ret = -EBUSY;
4801                goto out;
4802        }
4803
4804        trace_branch_disable();
4805
4806        tr->current_trace->enabled--;
4807
4808        if (tr->current_trace->reset)
4809                tr->current_trace->reset(tr);
4810
4811        /* Current trace needs to be nop_trace before synchronize_sched */
4812        tr->current_trace = &nop_trace;
4813
4814#ifdef CONFIG_TRACER_MAX_TRACE
4815        had_max_tr = tr->allocated_snapshot;
4816
4817        if (had_max_tr && !t->use_max_tr) {
4818                /*
4819                 * We need to make sure that the update_max_tr sees that
4820                 * current_trace changed to nop_trace to keep it from
4821                 * swapping the buffers after we resize it.
4822                 * The update_max_tr is called from interrupts disabled
4823                 * so a synchronized_sched() is sufficient.
4824                 */
4825                synchronize_sched();
4826                free_snapshot(tr);
4827        }
4828#endif
4829
4830#ifdef CONFIG_TRACER_MAX_TRACE
4831        if (t->use_max_tr && !had_max_tr) {
4832                ret = alloc_snapshot(tr);
4833                if (ret < 0)
4834                        goto out;
4835        }
4836#endif
4837
4838        if (t->init) {
4839                ret = tracer_init(t, tr);
4840                if (ret)
4841                        goto out;
4842        }
4843
4844        tr->current_trace = t;
4845        tr->current_trace->enabled++;
4846        trace_branch_enable(tr);
4847 out:
4848        mutex_unlock(&trace_types_lock);
4849
4850        return ret;
4851}
4852
4853static ssize_t
4854tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4855                        size_t cnt, loff_t *ppos)
4856{
4857        struct trace_array *tr = filp->private_data;
4858        char buf[MAX_TRACER_SIZE+1];
4859        int i;
4860        size_t ret;
4861        int err;
4862
4863        ret = cnt;
4864
4865        if (cnt > MAX_TRACER_SIZE)
4866                cnt = MAX_TRACER_SIZE;
4867
4868        if (copy_from_user(buf, ubuf, cnt))
4869                return -EFAULT;
4870
4871        buf[cnt] = 0;
4872
4873        /* strip ending whitespace. */
4874        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4875                buf[i] = 0;
4876
4877        err = tracing_set_tracer(tr, buf);
4878        if (err)
4879                return err;
4880
4881        *ppos += ret;
4882
4883        return ret;
4884}
4885
4886static ssize_t
4887tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4888                   size_t cnt, loff_t *ppos)
4889{
4890        char buf[64];
4891        int r;
4892
4893        r = snprintf(buf, sizeof(buf), "%ld\n",
4894                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4895        if (r > sizeof(buf))
4896                r = sizeof(buf);
4897        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4898}
4899
4900static ssize_t
4901tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4902                    size_t cnt, loff_t *ppos)
4903{
4904        unsigned long val;
4905        int ret;
4906
4907        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4908        if (ret)
4909                return ret;
4910
4911        *ptr = val * 1000;
4912
4913        return cnt;
4914}
4915
4916static ssize_t
4917tracing_thresh_read(struct file *filp, char __user *ubuf,
4918                    size_t cnt, loff_t *ppos)
4919{
4920        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4921}
4922
4923static ssize_t
4924tracing_thresh_write(struct file *filp, const char __user *ubuf,
4925                     size_t cnt, loff_t *ppos)
4926{
4927        struct trace_array *tr = filp->private_data;
4928        int ret;
4929
4930        mutex_lock(&trace_types_lock);
4931        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4932        if (ret < 0)
4933                goto out;
4934
4935        if (tr->current_trace->update_thresh) {
4936                ret = tr->current_trace->update_thresh(tr);
4937                if (ret < 0)
4938                        goto out;
4939        }
4940
4941        ret = cnt;
4942out:
4943        mutex_unlock(&trace_types_lock);
4944
4945        return ret;
4946}
4947
4948#ifdef CONFIG_TRACER_MAX_TRACE
4949
4950static ssize_t
4951tracing_max_lat_read(struct file *filp, char __user *ubuf,
4952                     size_t cnt, loff_t *ppos)
4953{
4954        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4955}
4956
4957static ssize_t
4958tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4959                      size_t cnt, loff_t *ppos)
4960{
4961        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4962}
4963
4964#endif
4965
4966static int tracing_open_pipe(struct inode *inode, struct file *filp)
4967{
4968        struct trace_array *tr = inode->i_private;
4969        struct trace_iterator *iter;
4970        int ret = 0;
4971
4972        if (tracing_disabled)
4973                return -ENODEV;
4974
4975        if (trace_array_get(tr) < 0)
4976                return -ENODEV;
4977
4978        mutex_lock(&trace_types_lock);
4979
4980        /* create a buffer to store the information to pass to userspace */
4981        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4982        if (!iter) {
4983                ret = -ENOMEM;
4984                __trace_array_put(tr);
4985                goto out;
4986        }
4987
4988        trace_seq_init(&iter->seq);
4989        iter->trace = tr->current_trace;
4990
4991        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4992                ret = -ENOMEM;
4993                goto fail;
4994        }
4995
4996        /* trace pipe does not show start of buffer */
4997        cpumask_setall(iter->started);
4998
4999        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5000                iter->iter_flags |= TRACE_FILE_LAT_FMT;
5001
5002        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5003        if (trace_clocks[tr->clock_id].in_ns)
5004                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5005
5006        iter->tr = tr;
5007        iter->trace_buffer = &tr->trace_buffer;
5008        iter->cpu_file = tracing_get_cpu(inode);
5009        mutex_init(&iter->mutex);
5010        filp->private_data = iter;
5011
5012        if (iter->trace->pipe_open)
5013                iter->trace->pipe_open(iter);
5014
5015        nonseekable_open(inode, filp);
5016
5017        tr->current_trace->ref++;
5018out:
5019        mutex_unlock(&trace_types_lock);
5020        return ret;
5021
5022fail:
5023        kfree(iter->trace);
5024        kfree(iter);
5025        __trace_array_put(tr);
5026        mutex_unlock(&trace_types_lock);
5027        return ret;
5028}
5029
5030static int tracing_release_pipe(struct inode *inode, struct file *file)
5031{
5032        struct trace_iterator *iter = file->private_data;
5033        struct trace_array *tr = inode->i_private;
5034
5035        mutex_lock(&trace_types_lock);
5036
5037        tr->current_trace->ref--;
5038
5039        if (iter->trace->pipe_close)
5040                iter->trace->pipe_close(iter);
5041
5042        mutex_unlock(&trace_types_lock);
5043
5044        free_cpumask_var(iter->started);
5045        mutex_destroy(&iter->mutex);
5046        kfree(iter);
5047
5048        trace_array_put(tr);
5049
5050        return 0;
5051}
5052
5053static unsigned int
5054trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5055{
5056        struct trace_array *tr = iter->tr;
5057
5058        /* Iterators are static, they should be filled or empty */
5059        if (trace_buffer_iter(iter, iter->cpu_file))
5060                return POLLIN | POLLRDNORM;
5061
5062        if (tr->trace_flags & TRACE_ITER_BLOCK)
5063                /*
5064                 * Always select as readable when in blocking mode
5065                 */
5066                return POLLIN | POLLRDNORM;
5067        else
5068                return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5069                                             filp, poll_table);
5070}
5071
5072static unsigned int
5073tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5074{
5075        struct trace_iterator *iter = filp->private_data;
5076
5077        return trace_poll(iter, filp, poll_table);
5078}
5079
5080/* Must be called with iter->mutex held. */
5081static int tracing_wait_pipe(struct file *filp)
5082{
5083        struct trace_iterator *iter = filp->private_data;
5084        int ret;
5085
5086        while (trace_empty(iter)) {
5087
5088                if ((filp->f_flags & O_NONBLOCK)) {
5089                        return -EAGAIN;
5090                }
5091
5092                /*
5093                 * We block until we read something and tracing is disabled.
5094                 * We still block if tracing is disabled, but we have never
5095                 * read anything. This allows a user to cat this file, and
5096                 * then enable tracing. But after we have read something,
5097                 * we give an EOF when tracing is again disabled.
5098                 *
5099                 * iter->pos will be 0 if we haven't read anything.
5100                 */
5101                if (!tracing_is_on() && iter->pos)
5102                        break;
5103
5104                mutex_unlock(&iter->mutex);
5105
5106                ret = wait_on_pipe(iter, false);
5107
5108                mutex_lock(&iter->mutex);
5109
5110                if (ret)
5111                        return ret;
5112        }
5113
5114        return 1;
5115}
5116
5117/*
5118 * Consumer reader.
5119 */
5120static ssize_t
5121tracing_read_pipe(struct file *filp, char __user *ubuf,
5122                  size_t cnt, loff_t *ppos)
5123{
5124        struct trace_iterator *iter = filp->private_data;
5125        ssize_t sret;
5126
5127        /*
5128         * Avoid more than one consumer on a single file descriptor
5129         * This is just a matter of traces coherency, the ring buffer itself
5130         * is protected.
5131         */
5132        mutex_lock(&iter->mutex);
5133
5134        /* return any leftover data */
5135        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5136        if (sret != -EBUSY)
5137                goto out;
5138
5139        trace_seq_init(&iter->seq);
5140
5141        if (iter->trace->read) {
5142                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5143                if (sret)
5144                        goto out;
5145        }
5146
5147waitagain:
5148        sret = tracing_wait_pipe(filp);
5149        if (sret <= 0)
5150                goto out;
5151
5152        /* stop when tracing is finished */
5153        if (trace_empty(iter)) {
5154                sret = 0;
5155                goto out;
5156        }
5157
5158        if (cnt >= PAGE_SIZE)
5159                cnt = PAGE_SIZE - 1;
5160
5161        /* reset all but tr, trace, and overruns */
5162        memset(&iter->seq, 0,
5163               sizeof(struct trace_iterator) -
5164               offsetof(struct trace_iterator, seq));
5165        cpumask_clear(iter->started);
5166        iter->pos = -1;
5167
5168        trace_event_read_lock();
5169        trace_access_lock(iter->cpu_file);
5170        while (trace_find_next_entry_inc(iter) != NULL) {
5171                enum print_line_t ret;
5172                int save_len = iter->seq.seq.len;
5173
5174                ret = print_trace_line(iter);
5175                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5176                        /* don't print partial lines */
5177                        iter->seq.seq.len = save_len;
5178                        break;
5179                }
5180                if (ret != TRACE_TYPE_NO_CONSUME)
5181                        trace_consume(iter);
5182
5183                if (trace_seq_used(&iter->seq) >= cnt)
5184                        break;
5185
5186                /*
5187                 * Setting the full flag means we reached the trace_seq buffer
5188                 * size and we should leave by partial output condition above.
5189                 * One of the trace_seq_* functions is not used properly.
5190                 */
5191                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5192                          iter->ent->type);
5193        }
5194        trace_access_unlock(iter->cpu_file);
5195        trace_event_read_unlock();
5196
5197        /* Now copy what we have to the user */
5198        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5199        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5200                trace_seq_init(&iter->seq);
5201
5202        /*
5203         * If there was nothing to send to user, in spite of consuming trace
5204         * entries, go back to wait for more entries.
5205         */
5206        if (sret == -EBUSY)
5207                goto waitagain;
5208
5209out:
5210        mutex_unlock(&iter->mutex);
5211
5212        return sret;
5213}
5214
5215static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5216                                     unsigned int idx)
5217{
5218        __free_page(spd->pages[idx]);
5219}
5220
5221static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5222        .can_merge              = 0,
5223        .confirm                = generic_pipe_buf_confirm,
5224        .release                = generic_pipe_buf_release,
5225        .steal                  = generic_pipe_buf_steal,
5226        .get                    = generic_pipe_buf_get,
5227};
5228
5229static size_t
5230tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5231{
5232        size_t count;
5233        int save_len;
5234        int ret;
5235
5236        /* Seq buffer is page-sized, exactly what we need. */
5237        for (;;) {
5238                save_len = iter->seq.seq.len;
5239                ret = print_trace_line(iter);
5240
5241                if (trace_seq_has_overflowed(&iter->seq)) {
5242                        iter->seq.seq.len = save_len;
5243                        break;
5244                }
5245
5246                /*
5247                 * This should not be hit, because it should only
5248                 * be set if the iter->seq overflowed. But check it
5249                 * anyway to be safe.
5250                 */
5251                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5252                        iter->seq.seq.len = save_len;
5253                        break;
5254                }
5255
5256                count = trace_seq_used(&iter->seq) - save_len;
5257                if (rem < count) {
5258                        rem = 0;
5259                        iter->seq.seq.len = save_len;
5260                        break;
5261                }
5262
5263                if (ret != TRACE_TYPE_NO_CONSUME)
5264                        trace_consume(iter);
5265                rem -= count;
5266                if (!trace_find_next_entry_inc(iter))   {
5267                        rem = 0;
5268                        iter->ent = NULL;
5269                        break;
5270                }
5271        }
5272
5273        return rem;
5274}
5275
5276static ssize_t tracing_splice_read_pipe(struct file *filp,
5277                                        loff_t *ppos,
5278                                        struct pipe_inode_info *pipe,
5279                                        size_t len,
5280                                        unsigned int flags)
5281{
5282        struct page *pages_def[PIPE_DEF_BUFFERS];
5283        struct partial_page partial_def[PIPE_DEF_BUFFERS];
5284        struct trace_iterator *iter = filp->private_data;
5285        struct splice_pipe_desc spd = {
5286                .pages          = pages_def,
5287                .partial        = partial_def,
5288                .nr_pages       = 0, /* This gets updated below. */
5289                .nr_pages_max   = PIPE_DEF_BUFFERS,
5290                .flags          = flags,
5291                .ops            = &tracing_pipe_buf_ops,
5292                .spd_release    = tracing_spd_release_pipe,
5293        };
5294        ssize_t ret;
5295        size_t rem;
5296        unsigned int i;
5297
5298        if (splice_grow_spd(pipe, &spd))
5299                return -ENOMEM;
5300
5301        mutex_lock(&iter->mutex);
5302
5303        if (iter->trace->splice_read) {
5304                ret = iter->trace->splice_read(iter, filp,
5305                                               ppos, pipe, len, flags);
5306                if (ret)
5307                        goto out_err;
5308        }
5309
5310        ret = tracing_wait_pipe(filp);
5311        if (ret <= 0)
5312                goto out_err;
5313
5314        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5315                ret = -EFAULT;
5316                goto out_err;
5317        }
5318
5319        trace_event_read_lock();
5320        trace_access_lock(iter->cpu_file);
5321
5322        /* Fill as many pages as possible. */
5323        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5324                spd.pages[i] = alloc_page(GFP_KERNEL);
5325                if (!spd.pages[i])
5326                        break;
5327
5328                rem = tracing_fill_pipe_page(rem, iter);
5329
5330                /* Copy the data into the page, so we can start over. */
5331                ret = trace_seq_to_buffer(&iter->seq,
5332                                          page_address(spd.pages[i]),
5333                                          trace_seq_used(&iter->seq));
5334                if (ret < 0) {
5335                        __free_page(spd.pages[i]);
5336                        break;
5337                }
5338                spd.partial[i].offset = 0;
5339                spd.partial[i].len = trace_seq_used(&iter->seq);
5340
5341                trace_seq_init(&iter->seq);
5342        }
5343
5344        trace_access_unlock(iter->cpu_file);
5345        trace_event_read_unlock();
5346        mutex_unlock(&iter->mutex);
5347
5348        spd.nr_pages = i;
5349
5350        if (i)
5351                ret = splice_to_pipe(pipe, &spd);
5352        else
5353                ret = 0;
5354out:
5355        splice_shrink_spd(&spd);
5356        return ret;
5357
5358out_err:
5359        mutex_unlock(&iter->mutex);
5360        goto out;
5361}
5362
5363static ssize_t
5364tracing_entries_read(struct file *filp, char __user *ubuf,
5365                     size_t cnt, loff_t *ppos)
5366{
5367        struct inode *inode = file_inode(filp);
5368        struct trace_array *tr = inode->i_private;
5369        int cpu = tracing_get_cpu(inode);
5370        char buf[64];
5371        int r = 0;
5372        ssize_t ret;
5373
5374        mutex_lock(&trace_types_lock);
5375
5376        if (cpu == RING_BUFFER_ALL_CPUS) {
5377                int cpu, buf_size_same;
5378                unsigned long size;
5379
5380                size = 0;
5381                buf_size_same = 1;
5382                /* check if all cpu sizes are same */
5383                for_each_tracing_cpu(cpu) {
5384                        /* fill in the size from first enabled cpu */
5385                        if (size == 0)
5386                                size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5387                        if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5388                                buf_size_same = 0;
5389                                break;
5390                        }
5391                }
5392
5393                if (buf_size_same) {
5394                        if (!ring_buffer_expanded)
5395                                r = sprintf(buf, "%lu (expanded: %lu)\n",
5396                                            size >> 10,
5397                                            trace_buf_size >> 10);
5398                        else
5399                                r = sprintf(buf, "%lu\n", size >> 10);
5400                } else
5401                        r = sprintf(buf, "X\n");
5402        } else
5403                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5404
5405        mutex_unlock(&trace_types_lock);
5406
5407        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5408        return ret;
5409}
5410
5411static ssize_t
5412tracing_entries_write(struct file *filp, const char __user *ubuf,
5413                      size_t cnt, loff_t *ppos)
5414{
5415        struct inode *inode = file_inode(filp);
5416        struct trace_array *tr = inode->i_private;
5417        unsigned long val;
5418        int ret;
5419
5420        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5421        if (ret)
5422                return ret;
5423
5424        /* must have at least 1 entry */
5425        if (!val)
5426                return -EINVAL;
5427
5428        /* value is in KB */
5429        val <<= 10;
5430        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5431        if (ret < 0)
5432                return ret;
5433
5434        *ppos += cnt;
5435
5436        return cnt;
5437}
5438
5439static ssize_t
5440tracing_total_entries_read(struct file *filp, char __user *ubuf,
5441                                size_t cnt, loff_t *ppos)
5442{
5443        struct trace_array *tr = filp->private_data;
5444        char buf[64];
5445        int r, cpu;
5446        unsigned long size = 0, expanded_size = 0;
5447
5448        mutex_lock(&trace_types_lock);
5449        for_each_tracing_cpu(cpu) {
5450                size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5451                if (!ring_buffer_expanded)
5452                        expanded_size += trace_buf_size >> 10;
5453        }
5454        if (ring_buffer_expanded)
5455                r = sprintf(buf, "%lu\n", size);
5456        else
5457                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5458        mutex_unlock(&trace_types_lock);
5459
5460        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5461}
5462
5463static ssize_t
5464tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5465                          size_t cnt, loff_t *ppos)
5466{
5467        /*
5468         * There is no need to read what the user has written, this function
5469         * is just to make sure that there is no error when "echo" is used
5470         */
5471
5472        *ppos += cnt;
5473
5474        return cnt;
5475}
5476
5477static int
5478tracing_free_buffer_release(struct inode *inode, struct file *filp)
5479{
5480        struct trace_array *tr = inode->i_private;
5481
5482        /* disable tracing ? */
5483        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5484                tracer_tracing_off(tr);
5485        /* resize the ring buffer to 0 */
5486        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5487
5488        trace_array_put(tr);
5489
5490        return 0;
5491}
5492
5493static ssize_t
5494tracing_mark_write(struct file *filp, const char __user *ubuf,
5495                                        size_t cnt, loff_t *fpos)
5496{
5497        unsigned long addr = (unsigned long)ubuf;
5498        struct trace_array *tr = filp->private_data;
5499        struct ring_buffer_event *event;
5500        struct ring_buffer *buffer;
5501        struct print_entry *entry;
5502        unsigned long irq_flags;
5503        struct page *pages[2];
5504        void *map_page[2];
5505        int nr_pages = 1;
5506        ssize_t written;
5507        int offset;
5508        int size;
5509        int len;
5510        int ret;
5511        int i;
5512
5513        if (tracing_disabled)
5514                return -EINVAL;
5515
5516        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5517                return -EINVAL;
5518
5519        if (cnt > TRACE_BUF_SIZE)
5520                cnt = TRACE_BUF_SIZE;
5521
5522        /*
5523         * Userspace is injecting traces into the kernel trace buffer.
5524         * We want to be as non intrusive as possible.
5525         * To do so, we do not want to allocate any special buffers
5526         * or take any locks, but instead write the userspace data
5527         * straight into the ring buffer.
5528         *
5529         * First we need to pin the userspace buffer into memory,
5530         * which, most likely it is, because it just referenced it.
5531         * But there's no guarantee that it is. By using get_user_pages_fast()
5532         * and kmap_atomic/kunmap_atomic() we can get access to the
5533         * pages directly. We then write the data directly into the
5534         * ring buffer.
5535         */
5536        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5537
5538        /* check if we cross pages */
5539        if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5540                nr_pages = 2;
5541
5542        offset = addr & (PAGE_SIZE - 1);
5543        addr &= PAGE_MASK;
5544
5545        ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5546        if (ret < nr_pages) {
5547                while (--ret >= 0)
5548                        put_page(pages[ret]);
5549                written = -EFAULT;
5550                goto out;
5551        }
5552
5553        for (i = 0; i < nr_pages; i++)
5554                map_page[i] = kmap_atomic(pages[i]);
5555
5556        local_save_flags(irq_flags);
5557        size = sizeof(*entry) + cnt + 2; /* possible \n added */
5558        buffer = tr->trace_buffer.buffer;
5559        event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5560                                          irq_flags, preempt_count());
5561        if (!event) {
5562                /* Ring buffer disabled, return as if not open for write */
5563                written = -EBADF;
5564                goto out_unlock;
5565        }
5566
5567        entry = ring_buffer_event_data(event);
5568        entry->ip = _THIS_IP_;
5569
5570        if (nr_pages == 2) {
5571                len = PAGE_SIZE - offset;
5572                memcpy(&entry->buf, map_page[0] + offset, len);
5573                memcpy(&entry->buf[len], map_page[1], cnt - len);
5574        } else
5575                memcpy(&entry->buf, map_page[0] + offset, cnt);
5576
5577        if (entry->buf[cnt - 1] != '\n') {
5578                entry->buf[cnt] = '\n';
5579                entry->buf[cnt + 1] = '\0';
5580        } else
5581                entry->buf[cnt] = '\0';
5582
5583        __buffer_unlock_commit(buffer, event);
5584
5585        written = cnt;
5586
5587        *fpos += written;
5588
5589 out_unlock:
5590        for (i = nr_pages - 1; i >= 0; i--) {
5591                kunmap_atomic(map_page[i]);
5592                put_page(pages[i]);
5593        }
5594 out:
5595        return written;
5596}
5597
5598static int tracing_clock_show(struct seq_file *m, void *v)
5599{
5600        struct trace_array *tr = m->private;
5601        int i;
5602
5603        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5604                seq_printf(m,
5605                        "%s%s%s%s", i ? " " : "",
5606                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5607                        i == tr->clock_id ? "]" : "");
5608        seq_putc(m, '\n');
5609
5610        return 0;
5611}
5612
5613static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5614{
5615        int i;
5616
5617        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5618                if (strcmp(trace_clocks[i].name, clockstr) == 0)
5619                        break;
5620        }
5621        if (i == ARRAY_SIZE(trace_clocks))
5622                return -EINVAL;
5623
5624        mutex_lock(&trace_types_lock);
5625
5626        tr->clock_id = i;
5627
5628        ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5629
5630        /*
5631         * New clock may not be consistent with the previous clock.
5632         * Reset the buffer so that it doesn't have incomparable timestamps.
5633         */
5634        tracing_reset_online_cpus(&tr->trace_buffer);
5635
5636#ifdef CONFIG_TRACER_MAX_TRACE
5637        if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5638                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5639        tracing_reset_online_cpus(&tr->max_buffer);
5640#endif
5641
5642        mutex_unlock(&trace_types_lock);
5643
5644        return 0;
5645}
5646
5647static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5648                                   size_t cnt, loff_t *fpos)
5649{
5650        struct seq_file *m = filp->private_data;
5651        struct trace_array *tr = m->private;
5652        char buf[64];
5653        const char *clockstr;
5654        int ret;
5655
5656        if (cnt >= sizeof(buf))
5657                return -EINVAL;
5658
5659        if (copy_from_user(buf, ubuf, cnt))
5660                return -EFAULT;
5661
5662        buf[cnt] = 0;
5663
5664        clockstr = strstrip(buf);
5665
5666        ret = tracing_set_clock(tr, clockstr);
5667        if (ret)
5668                return ret;
5669
5670        *fpos += cnt;
5671
5672        return cnt;
5673}
5674
5675static int tracing_clock_open(struct inode *inode, struct file *file)
5676{
5677        struct trace_array *tr = inode->i_private;
5678        int ret;
5679
5680        if (tracing_disabled)
5681                return -ENODEV;
5682
5683        if (trace_array_get(tr))
5684                return -ENODEV;
5685
5686        ret = single_open(file, tracing_clock_show, inode->i_private);
5687        if (ret < 0)
5688                trace_array_put(tr);
5689
5690        return ret;
5691}
5692
5693struct ftrace_buffer_info {
5694        struct trace_iterator   iter;
5695        void                    *spare;
5696        unsigned int            read;
5697};
5698
5699#ifdef CONFIG_TRACER_SNAPSHOT
5700static int tracing_snapshot_open(struct inode *inode, struct file *file)
5701{
5702        struct trace_array *tr = inode->i_private;
5703        struct trace_iterator *iter;
5704        struct seq_file *m;
5705        int ret = 0;
5706
5707        if (trace_array_get(tr) < 0)
5708                return -ENODEV;
5709
5710        if (file->f_mode & FMODE_READ) {
5711                iter = __tracing_open(inode, file, true);
5712                if (IS_ERR(iter))
5713                        ret = PTR_ERR(iter);
5714        } else {
5715                /* Writes still need the seq_file to hold the private data */
5716                ret = -ENOMEM;
5717                m = kzalloc(sizeof(*m), GFP_KERNEL);
5718                if (!m)
5719                        goto out;
5720                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5721                if (!iter) {
5722                        kfree(m);
5723                        goto out;
5724                }
5725                ret = 0;
5726
5727                iter->tr = tr;
5728                iter->trace_buffer = &tr->max_buffer;
5729                iter->cpu_file = tracing_get_cpu(inode);
5730                m->private = iter;
5731                file->private_data = m;
5732        }
5733out:
5734        if (ret < 0)
5735                trace_array_put(tr);
5736
5737        return ret;
5738}
5739
5740static ssize_t
5741tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5742                       loff_t *ppos)
5743{
5744        struct seq_file *m = filp->private_data;
5745        struct trace_iterator *iter = m->private;
5746        struct trace_array *tr = iter->tr;
5747        unsigned long val;
5748        int ret;
5749
5750        ret = tracing_update_buffers();
5751        if (ret < 0)
5752                return ret;
5753
5754        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5755        if (ret)
5756                return ret;
5757
5758        mutex_lock(&trace_types_lock);
5759
5760        if (tr->current_trace->use_max_tr) {
5761                ret = -EBUSY;
5762                goto out;
5763        }
5764
5765        switch (val) {
5766        case 0:
5767                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5768                        ret = -EINVAL;
5769                        break;
5770                }
5771                if (tr->allocated_snapshot)
5772                        free_snapshot(tr);
5773                break;
5774        case 1:
5775/* Only allow per-cpu swap if the ring buffer supports it */
5776#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5777                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5778                        ret = -EINVAL;
5779                        break;
5780                }
5781#endif
5782                if (!tr->allocated_snapshot) {
5783                        ret = alloc_snapshot(tr);
5784                        if (ret < 0)
5785                                break;
5786                }
5787                local_irq_disable();
5788                /* Now, we're going to swap */
5789                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5790                        update_max_tr(tr, current, smp_processor_id());
5791                else
5792                        update_max_tr_single(tr, current, iter->cpu_file);
5793                local_irq_enable();
5794                break;
5795        default:
5796                if (tr->allocated_snapshot) {
5797                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5798                                tracing_reset_online_cpus(&tr->max_buffer);
5799                        else
5800                                tracing_reset(&tr->max_buffer, iter->cpu_file);
5801                }
5802                break;
5803        }
5804
5805        if (ret >= 0) {
5806                *ppos += cnt;
5807                ret = cnt;
5808        }
5809out:
5810        mutex_unlock(&trace_types_lock);
5811        return ret;
5812}
5813
5814static int tracing_snapshot_release(struct inode *inode, struct file *file)
5815{
5816        struct seq_file *m = file->private_data;
5817        int ret;
5818
5819        ret = tracing_release(inode, file);
5820
5821        if (file->f_mode & FMODE_READ)
5822                return ret;
5823
5824        /* If write only, the seq_file is just a stub */
5825        if (m)
5826                kfree(m->private);
5827        kfree(m);
5828
5829        return 0;
5830}
5831
5832static int tracing_buffers_open(struct inode *inode, struct file *filp);
5833static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5834                                    size_t count, loff_t *ppos);
5835static int tracing_buffers_release(struct inode *inode, struct file *file);
5836static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5837                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5838
5839static int snapshot_raw_open(struct inode *inode, struct file *filp)
5840{
5841        struct ftrace_buffer_info *info;
5842        int ret;
5843
5844        ret = tracing_buffers_open(inode, filp);
5845        if (ret < 0)
5846                return ret;
5847
5848        info = filp->private_data;
5849
5850        if (info->iter.trace->use_max_tr) {
5851                tracing_buffers_release(inode, filp);
5852                return -EBUSY;
5853        }
5854
5855        info->iter.snapshot = true;
5856        info->iter.trace_buffer = &info->iter.tr->max_buffer;
5857
5858        return ret;
5859}
5860
5861#endif /* CONFIG_TRACER_SNAPSHOT */
5862
5863
5864static const struct file_operations tracing_thresh_fops = {
5865        .open           = tracing_open_generic,
5866        .read           = tracing_thresh_read,
5867        .write          = tracing_thresh_write,
5868        .llseek         = generic_file_llseek,
5869};
5870
5871#ifdef CONFIG_TRACER_MAX_TRACE
5872static const struct file_operations tracing_max_lat_fops = {
5873        .open           = tracing_open_generic,
5874        .read           = tracing_max_lat_read,
5875        .write          = tracing_max_lat_write,
5876        .llseek         = generic_file_llseek,
5877};
5878#endif
5879
5880static const struct file_operations set_tracer_fops = {
5881        .open           = tracing_open_generic,
5882        .read           = tracing_set_trace_read,
5883        .write          = tracing_set_trace_write,
5884        .llseek         = generic_file_llseek,
5885};
5886
5887static const struct file_operations tracing_pipe_fops = {
5888        .open           = tracing_open_pipe,
5889        .poll           = tracing_poll_pipe,
5890        .read           = tracing_read_pipe,
5891        .splice_read    = tracing_splice_read_pipe,
5892        .release        = tracing_release_pipe,
5893        .llseek         = no_llseek,
5894};
5895
5896static const struct file_operations tracing_entries_fops = {
5897        .open           = tracing_open_generic_tr,
5898        .read           = tracing_entries_read,
5899        .write          = tracing_entries_write,
5900        .llseek         = generic_file_llseek,
5901        .release        = tracing_release_generic_tr,
5902};
5903
5904static const struct file_operations tracing_total_entries_fops = {
5905        .open           = tracing_open_generic_tr,
5906        .read           = tracing_total_entries_read,
5907        .llseek         = generic_file_llseek,
5908        .release        = tracing_release_generic_tr,
5909};
5910
5911static const struct file_operations tracing_free_buffer_fops = {
5912        .open           = tracing_open_generic_tr,
5913        .write          = tracing_free_buffer_write,
5914        .release        = tracing_free_buffer_release,
5915};
5916
5917static const struct file_operations tracing_mark_fops = {
5918        .open           = tracing_open_generic_tr,
5919        .write          = tracing_mark_write,
5920        .llseek         = generic_file_llseek,
5921        .release        = tracing_release_generic_tr,
5922};
5923
5924static const struct file_operations trace_clock_fops = {
5925        .open           = tracing_clock_open,
5926        .read           = seq_read,
5927        .llseek         = seq_lseek,
5928        .release        = tracing_single_release_tr,
5929        .write          = tracing_clock_write,
5930};
5931
5932#ifdef CONFIG_TRACER_SNAPSHOT
5933static const struct file_operations snapshot_fops = {
5934        .open           = tracing_snapshot_open,
5935        .read           = seq_read,
5936        .write          = tracing_snapshot_write,
5937        .llseek         = tracing_lseek,
5938        .release        = tracing_snapshot_release,
5939};
5940
5941static const struct file_operations snapshot_raw_fops = {
5942        .open           = snapshot_raw_open,
5943        .read           = tracing_buffers_read,
5944        .release        = tracing_buffers_release,
5945        .splice_read    = tracing_buffers_splice_read,
5946        .llseek         = no_llseek,
5947};
5948
5949#endif /* CONFIG_TRACER_SNAPSHOT */
5950
5951static int tracing_buffers_open(struct inode *inode, struct file *filp)
5952{
5953        struct trace_array *tr = inode->i_private;
5954        struct ftrace_buffer_info *info;
5955        int ret;
5956
5957        if (tracing_disabled)
5958                return -ENODEV;
5959
5960        if (trace_array_get(tr) < 0)
5961                return -ENODEV;
5962
5963        info = kzalloc(sizeof(*info), GFP_KERNEL);
5964        if (!info) {
5965                trace_array_put(tr);
5966                return -ENOMEM;
5967        }
5968
5969        mutex_lock(&trace_types_lock);
5970
5971        info->iter.tr           = tr;
5972        info->iter.cpu_file     = tracing_get_cpu(inode);
5973        info->iter.trace        = tr->current_trace;
5974        info->iter.trace_buffer = &tr->trace_buffer;
5975        info->spare             = NULL;
5976        /* Force reading ring buffer for first read */
5977        info->read              = (unsigned int)-1;
5978
5979        filp->private_data = info;
5980
5981        tr->current_trace->ref++;
5982
5983        mutex_unlock(&trace_types_lock);
5984
5985        ret = nonseekable_open(inode, filp);
5986        if (ret < 0)
5987                trace_array_put(tr);
5988
5989        return ret;
5990}
5991
5992static unsigned int
5993tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5994{
5995        struct ftrace_buffer_info *info = filp->private_data;
5996        struct trace_iterator *iter = &info->iter;
5997
5998        return trace_poll(iter, filp, poll_table);
5999}
6000
6001static ssize_t
6002tracing_buffers_read(struct file *filp, char __user *ubuf,
6003                     size_t count, loff_t *ppos)
6004{
6005        struct ftrace_buffer_info *info = filp->private_data;
6006        struct trace_iterator *iter = &info->iter;
6007        ssize_t ret;
6008        ssize_t size;
6009
6010        if (!count)
6011                return 0;
6012
6013#ifdef CONFIG_TRACER_MAX_TRACE
6014        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6015                return -EBUSY;
6016#endif
6017
6018        if (!info->spare)
6019                info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6020                                                          iter->cpu_file);
6021        if (!info->spare)
6022                return -ENOMEM;
6023
6024        /* Do we have previous read data to read? */
6025        if (info->read < PAGE_SIZE)
6026                goto read;
6027
6028 again:
6029        trace_access_lock(iter->cpu_file);
6030        ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6031                                    &info->spare,
6032                                    count,
6033                                    iter->cpu_file, 0);
6034        trace_access_unlock(iter->cpu_file);
6035
6036        if (ret < 0) {
6037                if (trace_empty(iter)) {
6038                        if ((filp->f_flags & O_NONBLOCK))
6039                                return -EAGAIN;
6040
6041                        ret = wait_on_pipe(iter, false);
6042                        if (ret)
6043                                return ret;
6044
6045                        goto again;
6046                }
6047                return 0;
6048        }
6049
6050        info->read = 0;
6051 read:
6052        size = PAGE_SIZE - info->read;
6053        if (size > count)
6054                size = count;
6055
6056        ret = copy_to_user(ubuf, info->spare + info->read, size);
6057        if (ret == size)
6058                return -EFAULT;
6059
6060        size -= ret;
6061
6062        *ppos += size;
6063        info->read += size;
6064
6065        return size;
6066}
6067
6068static int tracing_buffers_release(struct inode *inode, struct file *file)
6069{
6070        struct ftrace_buffer_info *info = file->private_data;
6071        struct trace_iterator *iter = &info->iter;
6072
6073        mutex_lock(&trace_types_lock);
6074
6075        iter->tr->current_trace->ref--;
6076
6077        __trace_array_put(iter->tr);
6078
6079        if (info->spare)
6080                ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6081        kfree(info);
6082
6083        mutex_unlock(&trace_types_lock);
6084
6085        return 0;
6086}
6087
6088struct buffer_ref {
6089        struct ring_buffer      *buffer;
6090        void                    *page;
6091        int                     ref;
6092};
6093
6094static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6095                                    struct pipe_buffer *buf)
6096{
6097        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6098
6099        if (--ref->ref)
6100                return;
6101
6102        ring_buffer_free_read_page(ref->buffer, ref->page);
6103        kfree(ref);
6104        buf->private = 0;
6105}
6106
6107static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6108                                struct pipe_buffer *buf)
6109{
6110        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6111
6112        ref->ref++;
6113}
6114
6115/* Pipe buffer operations for a buffer. */
6116static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6117        .can_merge              = 0,
6118        .confirm                = generic_pipe_buf_confirm,
6119        .release                = buffer_pipe_buf_release,
6120        .steal                  = generic_pipe_buf_steal,
6121        .get                    = buffer_pipe_buf_get,
6122};
6123
6124/*
6125 * Callback from splice_to_pipe(), if we need to release some pages
6126 * at the end of the spd in case we error'ed out in filling the pipe.
6127 */
6128static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6129{
6130        struct buffer_ref *ref =
6131                (struct buffer_ref *)spd->partial[i].private;
6132
6133        if (--ref->ref)
6134                return;
6135
6136        ring_buffer_free_read_page(ref->buffer, ref->page);
6137        kfree(ref);
6138        spd->partial[i].private = 0;
6139}
6140
6141static ssize_t
6142tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6143                            struct pipe_inode_info *pipe, size_t len,
6144                            unsigned int flags)
6145{
6146        struct ftrace_buffer_info *info = file->private_data;
6147        struct trace_iterator *iter = &info->iter;
6148        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6149        struct page *pages_def[PIPE_DEF_BUFFERS];
6150        struct splice_pipe_desc spd = {
6151                .pages          = pages_def,
6152                .partial        = partial_def,
6153                .nr_pages_max   = PIPE_DEF_BUFFERS,
6154                .flags          = flags,
6155                .ops            = &buffer_pipe_buf_ops,
6156                .spd_release    = buffer_spd_release,
6157        };
6158        struct buffer_ref *ref;
6159        int entries, size, i;
6160        ssize_t ret = 0;
6161
6162#ifdef CONFIG_TRACER_MAX_TRACE
6163        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6164                return -EBUSY;
6165#endif
6166
6167        if (*ppos & (PAGE_SIZE - 1))
6168                return -EINVAL;
6169
6170        if (len & (PAGE_SIZE - 1)) {
6171                if (len < PAGE_SIZE)
6172                        return -EINVAL;
6173                len &= PAGE_MASK;
6174        }
6175
6176        if (splice_grow_spd(pipe, &spd))
6177                return -ENOMEM;
6178
6179 again:
6180        trace_access_lock(iter->cpu_file);
6181        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6182
6183        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6184                struct page *page;
6185                int r;
6186
6187                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6188                if (!ref) {
6189                        ret = -ENOMEM;
6190                        break;
6191                }
6192
6193                ref->ref = 1;
6194                ref->buffer = iter->trace_buffer->buffer;
6195                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6196                if (!ref->page) {
6197                        ret = -ENOMEM;
6198                        kfree(ref);
6199                        break;
6200                }
6201
6202                r = ring_buffer_read_page(ref->buffer, &ref->page,
6203                                          len, iter->cpu_file, 1);
6204                if (r < 0) {
6205                        ring_buffer_free_read_page(ref->buffer, ref->page);
6206                        kfree(ref);
6207                        break;
6208                }
6209
6210                /*
6211                 * zero out any left over data, this is going to
6212                 * user land.
6213                 */
6214                size = ring_buffer_page_len(ref->page);
6215                if (size < PAGE_SIZE)
6216                        memset(ref->page + size, 0, PAGE_SIZE - size);
6217
6218                page = virt_to_page(ref->page);
6219
6220                spd.pages[i] = page;
6221                spd.partial[i].len = PAGE_SIZE;
6222                spd.partial[i].offset = 0;
6223                spd.partial[i].private = (unsigned long)ref;
6224                spd.nr_pages++;
6225                *ppos += PAGE_SIZE;
6226
6227                entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6228        }
6229
6230        trace_access_unlock(iter->cpu_file);
6231        spd.nr_pages = i;
6232
6233        /* did we read anything? */
6234        if (!spd.nr_pages) {
6235                if (ret)
6236                        goto out;
6237
6238                ret = -EAGAIN;
6239                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6240                        goto out;
6241
6242                ret = wait_on_pipe(iter, true);
6243                if (ret)
6244                        goto out;
6245
6246                goto again;
6247        }
6248
6249        ret = splice_to_pipe(pipe, &spd);
6250out:
6251        splice_shrink_spd(&spd);
6252
6253        return ret;
6254}
6255
6256static const struct file_operations tracing_buffers_fops = {
6257        .open           = tracing_buffers_open,
6258        .read           = tracing_buffers_read,
6259        .poll           = tracing_buffers_poll,
6260        .release        = tracing_buffers_release,
6261        .splice_read    = tracing_buffers_splice_read,
6262        .llseek         = no_llseek,
6263};
6264
6265static ssize_t
6266tracing_stats_read(struct file *filp, char __user *ubuf,
6267                   size_t count, loff_t *ppos)
6268{
6269        struct inode *inode = file_inode(filp);
6270        struct trace_array *tr = inode->i_private;
6271        struct trace_buffer *trace_buf = &tr->trace_buffer;
6272        int cpu = tracing_get_cpu(inode);
6273        struct trace_seq *s;
6274        unsigned long cnt;
6275        unsigned long long t;
6276        unsigned long usec_rem;
6277
6278        s = kmalloc(sizeof(*s), GFP_KERNEL);
6279        if (!s)
6280                return -ENOMEM;
6281
6282        trace_seq_init(s);
6283
6284        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6285        trace_seq_printf(s, "entries: %ld\n", cnt);
6286
6287        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6288        trace_seq_printf(s, "overrun: %ld\n", cnt);
6289
6290        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6291        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6292
6293        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6294        trace_seq_printf(s, "bytes: %ld\n", cnt);
6295
6296        if (trace_clocks[tr->clock_id].in_ns) {
6297                /* local or global for trace_clock */
6298                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6299                usec_rem = do_div(t, USEC_PER_SEC);
6300                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6301                                                                t, usec_rem);
6302
6303                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6304                usec_rem = do_div(t, USEC_PER_SEC);
6305                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6306        } else {
6307                /* counter or tsc mode for trace_clock */
6308                trace_seq_printf(s, "oldest event ts: %llu\n",
6309                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6310
6311                trace_seq_printf(s, "now ts: %llu\n",
6312                                ring_buffer_time_stamp(trace_buf->buffer, cpu));
6313        }
6314
6315        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6316        trace_seq_printf(s, "dropped events: %ld\n", cnt);
6317
6318        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6319        trace_seq_printf(s, "read events: %ld\n", cnt);
6320
6321        count = simple_read_from_buffer(ubuf, count, ppos,
6322                                        s->buffer, trace_seq_used(s));
6323
6324        kfree(s);
6325
6326        return count;
6327}
6328
6329static const struct file_operations tracing_stats_fops = {
6330        .open           = tracing_open_generic_tr,
6331        .read           = tracing_stats_read,
6332        .llseek         = generic_file_llseek,
6333        .release        = tracing_release_generic_tr,
6334};
6335
6336#ifdef CONFIG_DYNAMIC_FTRACE
6337
6338int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6339{
6340        return 0;
6341}
6342
6343static ssize_t
6344tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6345                  size_t cnt, loff_t *ppos)
6346{
6347        static char ftrace_dyn_info_buffer[1024];
6348        static DEFINE_MUTEX(dyn_info_mutex);
6349        unsigned long *p = filp->private_data;
6350        char *buf = ftrace_dyn_info_buffer;
6351        int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6352        int r;
6353
6354        mutex_lock(&dyn_info_mutex);
6355        r = sprintf(buf, "%ld ", *p);
6356
6357        r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6358        buf[r++] = '\n';
6359
6360        r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6361
6362        mutex_unlock(&dyn_info_mutex);
6363
6364        return r;
6365}
6366
6367static const struct file_operations tracing_dyn_info_fops = {
6368        .open           = tracing_open_generic,
6369        .read           = tracing_read_dyn_info,
6370        .llseek         = generic_file_llseek,
6371};
6372#endif /* CONFIG_DYNAMIC_FTRACE */
6373
6374#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6375static void
6376ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6377{
6378        tracing_snapshot();
6379}
6380
6381static void
6382ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6383{
6384        unsigned long *count = (long *)data;
6385
6386        if (!*count)
6387                return;
6388
6389        if (*count != -1)
6390                (*count)--;
6391
6392        tracing_snapshot();
6393}
6394
6395static int
6396ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6397                      struct ftrace_probe_ops *ops, void *data)
6398{
6399        long count = (long)data;
6400
6401        seq_printf(m, "%ps:", (void *)ip);
6402
6403        seq_puts(m, "snapshot");
6404
6405        if (count == -1)
6406                seq_puts(m, ":unlimited\n");
6407        else
6408                seq_printf(m, ":count=%ld\n", count);
6409
6410        return 0;
6411}
6412
6413static struct ftrace_probe_ops snapshot_probe_ops = {
6414        .func                   = ftrace_snapshot,
6415        .print                  = ftrace_snapshot_print,
6416};
6417
6418static struct ftrace_probe_ops snapshot_count_probe_ops = {
6419        .func                   = ftrace_count_snapshot,
6420        .print                  = ftrace_snapshot_print,
6421};
6422
6423static int
6424ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6425                               char *glob, char *cmd, char *param, int enable)
6426{
6427        struct ftrace_probe_ops *ops;
6428        void *count = (void *)-1;
6429        char *number;
6430        int ret;
6431
6432        /* hash funcs only work with set_ftrace_filter */
6433        if (!enable)
6434                return -EINVAL;
6435
6436        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6437
6438        if (glob[0] == '!') {
6439                unregister_ftrace_function_probe_func(glob+1, ops);
6440                return 0;
6441        }
6442
6443        if (!param)
6444                goto out_reg;
6445
6446        number = strsep(&param, ":");
6447
6448        if (!strlen(number))
6449                goto out_reg;
6450
6451        /*
6452         * We use the callback data field (which is a pointer)
6453         * as our counter.
6454         */
6455        ret = kstrtoul(number, 0, (unsigned long *)&count);
6456        if (ret)
6457                return ret;
6458
6459 out_reg:
6460        ret = register_ftrace_function_probe(glob, ops, count);
6461
6462        if (ret >= 0)
6463                alloc_snapshot(&global_trace);
6464
6465        return ret < 0 ? ret : 0;
6466}
6467
6468static struct ftrace_func_command ftrace_snapshot_cmd = {
6469        .name                   = "snapshot",
6470        .func                   = ftrace_trace_snapshot_callback,
6471};
6472
6473static __init int register_snapshot_cmd(void)
6474{
6475        return register_ftrace_command(&ftrace_snapshot_cmd);
6476}
6477#else
6478static inline __init int register_snapshot_cmd(void) { return 0; }
6479#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6480
6481static struct dentry *tracing_get_dentry(struct trace_array *tr)
6482{
6483        if (WARN_ON(!tr->dir))
6484                return ERR_PTR(-ENODEV);
6485
6486        /* Top directory uses NULL as the parent */
6487        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6488                return NULL;
6489
6490        /* All sub buffers have a descriptor */
6491        return tr->dir;
6492}
6493
6494static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6495{
6496        struct dentry *d_tracer;
6497
6498        if (tr->percpu_dir)
6499                return tr->percpu_dir;
6500
6501        d_tracer = tracing_get_dentry(tr);
6502        if (IS_ERR(d_tracer))
6503                return NULL;
6504
6505        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6506
6507        WARN_ONCE(!tr->percpu_dir,
6508                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6509
6510        return tr->percpu_dir;
6511}
6512
6513static struct dentry *
6514trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6515                      void *data, long cpu, const struct file_operations *fops)
6516{
6517        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6518
6519        if (ret) /* See tracing_get_cpu() */
6520                d_inode(ret)->i_cdev = (void *)(cpu + 1);
6521        return ret;
6522}
6523
6524static void
6525tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6526{
6527        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6528        struct dentry *d_cpu;
6529        char cpu_dir[30]; /* 30 characters should be more than enough */
6530
6531        if (!d_percpu)
6532                return;
6533
6534        snprintf(cpu_dir, 30, "cpu%ld", cpu);
6535        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6536        if (!d_cpu) {
6537                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6538                return;
6539        }
6540
6541        /* per cpu trace_pipe */
6542        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6543                                tr, cpu, &tracing_pipe_fops);
6544
6545        /* per cpu trace */
6546        trace_create_cpu_file("trace", 0644, d_cpu,
6547                                tr, cpu, &tracing_fops);
6548
6549        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6550                                tr, cpu, &tracing_buffers_fops);
6551
6552        trace_create_cpu_file("stats", 0444, d_cpu,
6553                                tr, cpu, &tracing_stats_fops);
6554
6555        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6556                                tr, cpu, &tracing_entries_fops);
6557
6558#ifdef CONFIG_TRACER_SNAPSHOT
6559        trace_create_cpu_file("snapshot", 0644, d_cpu,
6560                                tr, cpu, &snapshot_fops);
6561
6562        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6563                                tr, cpu, &snapshot_raw_fops);
6564#endif
6565}
6566
6567#ifdef CONFIG_FTRACE_SELFTEST
6568/* Let selftest have access to static functions in this file */
6569#include "trace_selftest.c"
6570#endif
6571
6572static ssize_t
6573trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6574                        loff_t *ppos)
6575{
6576        struct trace_option_dentry *topt = filp->private_data;
6577        char *buf;
6578
6579        if (topt->flags->val & topt->opt->bit)
6580                buf = "1\n";
6581        else
6582                buf = "0\n";
6583
6584        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6585}
6586
6587static ssize_t
6588trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6589                         loff_t *ppos)
6590{
6591        struct trace_option_dentry *topt = filp->private_data;
6592        unsigned long val;
6593        int ret;
6594
6595        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6596        if (ret)
6597                return ret;
6598
6599        if (val != 0 && val != 1)
6600                return -EINVAL;
6601
6602        if (!!(topt->flags->val & topt->opt->bit) != val) {
6603                mutex_lock(&trace_types_lock);
6604                ret = __set_tracer_option(topt->tr, topt->flags,
6605                                          topt->opt, !val);
6606                mutex_unlock(&trace_types_lock);
6607                if (ret)
6608                        return ret;
6609        }
6610
6611        *ppos += cnt;
6612
6613        return cnt;
6614}
6615
6616
6617static const struct file_operations trace_options_fops = {
6618        .open = tracing_open_generic,
6619        .read = trace_options_read,
6620        .write = trace_options_write,
6621        .llseek = generic_file_llseek,
6622};
6623
6624/*
6625 * In order to pass in both the trace_array descriptor as well as the index
6626 * to the flag that the trace option file represents, the trace_array
6627 * has a character array of trace_flags_index[], which holds the index
6628 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6629 * The address of this character array is passed to the flag option file
6630 * read/write callbacks.
6631 *
6632 * In order to extract both the index and the trace_array descriptor,
6633 * get_tr_index() uses the following algorithm.
6634 *
6635 *   idx = *ptr;
6636 *
6637 * As the pointer itself contains the address of the index (remember
6638 * index[1] == 1).
6639 *
6640 * Then to get the trace_array descriptor, by subtracting that index
6641 * from the ptr, we get to the start of the index itself.
6642 *
6643 *   ptr - idx == &index[0]
6644 *
6645 * Then a simple container_of() from that pointer gets us to the
6646 * trace_array descriptor.
6647 */
6648static void get_tr_index(void *data, struct trace_array **ptr,
6649                         unsigned int *pindex)
6650{
6651        *pindex = *(unsigned char *)data;
6652
6653        *ptr = container_of(data - *pindex, struct trace_array,
6654                            trace_flags_index);
6655}
6656
6657static ssize_t
6658trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6659                        loff_t *ppos)
6660{
6661        void *tr_index = filp->private_data;
6662        struct trace_array *tr;
6663        unsigned int index;
6664        char *buf;
6665
6666        get_tr_index(tr_index, &tr, &index);
6667
6668        if (tr->trace_flags & (1 << index))
6669                buf = "1\n";
6670        else
6671                buf = "0\n";
6672
6673        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6674}
6675
6676static ssize_t
6677trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6678                         loff_t *ppos)
6679{
6680        void *tr_index = filp->private_data;
6681        struct trace_array *tr;
6682        unsigned int index;
6683        unsigned long val;
6684        int ret;
6685
6686        get_tr_index(tr_index, &tr, &index);
6687
6688        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6689        if (ret)
6690                return ret;
6691
6692        if (val != 0 && val != 1)
6693                return -EINVAL;
6694
6695        mutex_lock(&trace_types_lock);
6696        ret = set_tracer_flag(tr, 1 << index, val);
6697        mutex_unlock(&trace_types_lock);
6698
6699        if (ret < 0)
6700                return ret;
6701
6702        *ppos += cnt;
6703
6704        return cnt;
6705}
6706
6707static const struct file_operations trace_options_core_fops = {
6708        .open = tracing_open_generic,
6709        .read = trace_options_core_read,
6710        .write = trace_options_core_write,
6711        .llseek = generic_file_llseek,
6712};
6713
6714struct dentry *trace_create_file(const char *name,
6715                                 umode_t mode,
6716                                 struct dentry *parent,
6717                                 void *data,
6718                                 const struct file_operations *fops)
6719{
6720        struct dentry *ret;
6721
6722        ret = tracefs_create_file(name, mode, parent, data, fops);
6723        if (!ret)
6724                pr_warn("Could not create tracefs '%s' entry\n", name);
6725
6726        return ret;
6727}
6728
6729
6730static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6731{
6732        struct dentry *d_tracer;
6733
6734        if (tr->options)
6735                return tr->options;
6736
6737        d_tracer = tracing_get_dentry(tr);
6738        if (IS_ERR(d_tracer))
6739                return NULL;
6740
6741        tr->options = tracefs_create_dir("options", d_tracer);
6742        if (!tr->options) {
6743                pr_warn("Could not create tracefs directory 'options'\n");
6744                return NULL;
6745        }
6746
6747        return tr->options;
6748}
6749
6750static void
6751create_trace_option_file(struct trace_array *tr,
6752                         struct trace_option_dentry *topt,
6753                         struct tracer_flags *flags,
6754                         struct tracer_opt *opt)
6755{
6756        struct dentry *t_options;
6757
6758        t_options = trace_options_init_dentry(tr);
6759        if (!t_options)
6760                return;
6761
6762        topt->flags = flags;
6763        topt->opt = opt;
6764        topt->tr = tr;
6765
6766        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6767                                    &trace_options_fops);
6768
6769}
6770
6771static void
6772create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6773{
6774        struct trace_option_dentry *topts;
6775        struct trace_options *tr_topts;
6776        struct tracer_flags *flags;
6777        struct tracer_opt *opts;
6778        int cnt;
6779        int i;
6780
6781        if (!tracer)
6782                return;
6783
6784        flags = tracer->flags;
6785
6786        if (!flags || !flags->opts)
6787                return;
6788
6789        /*
6790         * If this is an instance, only create flags for tracers
6791         * the instance may have.
6792         */
6793        if (!trace_ok_for_array(tracer, tr))
6794                return;
6795
6796        for (i = 0; i < tr->nr_topts; i++) {
6797                /* Make sure there's no duplicate flags. */
6798                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6799                        return;
6800        }
6801
6802        opts = flags->opts;
6803
6804        for (cnt = 0; opts[cnt].name; cnt++)
6805                ;
6806
6807        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6808        if (!topts)
6809                return;
6810
6811        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6812                            GFP_KERNEL);
6813        if (!tr_topts) {
6814                kfree(topts);
6815                return;
6816        }
6817
6818        tr->topts = tr_topts;
6819        tr->topts[tr->nr_topts].tracer = tracer;
6820        tr->topts[tr->nr_topts].topts = topts;
6821        tr->nr_topts++;
6822
6823        for (cnt = 0; opts[cnt].name; cnt++) {
6824                create_trace_option_file(tr, &topts[cnt], flags,
6825                                         &opts[cnt]);
6826                WARN_ONCE(topts[cnt].entry == NULL,
6827                          "Failed to create trace option: %s",
6828                          opts[cnt].name);
6829        }
6830}
6831
6832static struct dentry *
6833create_trace_option_core_file(struct trace_array *tr,
6834                              const char *option, long index)
6835{
6836        struct dentry *t_options;
6837
6838        t_options = trace_options_init_dentry(tr);
6839        if (!t_options)
6840                return NULL;
6841
6842        return trace_create_file(option, 0644, t_options,
6843                                 (void *)&tr->trace_flags_index[index],
6844                                 &trace_options_core_fops);
6845}
6846
6847static void create_trace_options_dir(struct trace_array *tr)
6848{
6849        struct dentry *t_options;
6850        bool top_level = tr == &global_trace;
6851        int i;
6852
6853        t_options = trace_options_init_dentry(tr);
6854        if (!t_options)
6855                return;
6856
6857        for (i = 0; trace_options[i]; i++) {
6858                if (top_level ||
6859                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6860                        create_trace_option_core_file(tr, trace_options[i], i);
6861        }
6862}
6863
6864static ssize_t
6865rb_simple_read(struct file *filp, char __user *ubuf,
6866               size_t cnt, loff_t *ppos)
6867{
6868        struct trace_array *tr = filp->private_data;
6869        char buf[64];
6870        int r;
6871
6872        r = tracer_tracing_is_on(tr);
6873        r = sprintf(buf, "%d\n", r);
6874
6875        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6876}
6877
6878static ssize_t
6879rb_simple_write(struct file *filp, const char __user *ubuf,
6880                size_t cnt, loff_t *ppos)
6881{
6882        struct trace_array *tr = filp->private_data;
6883        struct ring_buffer *buffer = tr->trace_buffer.buffer;
6884        unsigned long val;
6885        int ret;
6886
6887        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6888        if (ret)
6889                return ret;
6890
6891        if (buffer) {
6892                mutex_lock(&trace_types_lock);
6893                if (val) {
6894                        tracer_tracing_on(tr);
6895                        if (tr->current_trace->start)
6896                                tr->current_trace->start(tr);
6897                } else {
6898                        tracer_tracing_off(tr);
6899                        if (tr->current_trace->stop)
6900                                tr->current_trace->stop(tr);
6901                }
6902                mutex_unlock(&trace_types_lock);
6903        }
6904
6905        (*ppos)++;
6906
6907        return cnt;
6908}
6909
6910static const struct file_operations rb_simple_fops = {
6911        .open           = tracing_open_generic_tr,
6912        .read           = rb_simple_read,
6913        .write          = rb_simple_write,
6914        .release        = tracing_release_generic_tr,
6915        .llseek         = default_llseek,
6916};
6917
6918struct dentry *trace_instance_dir;
6919
6920static void
6921init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6922
6923static int
6924allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6925{
6926        enum ring_buffer_flags rb_flags;
6927
6928        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6929
6930        buf->tr = tr;
6931
6932        buf->buffer = ring_buffer_alloc(size, rb_flags);
6933        if (!buf->buffer)
6934                return -ENOMEM;
6935
6936        buf->data = alloc_percpu(struct trace_array_cpu);
6937        if (!buf->data) {
6938                ring_buffer_free(buf->buffer);
6939                return -ENOMEM;
6940        }
6941
6942        /* Allocate the first page for all buffers */
6943        set_buffer_entries(&tr->trace_buffer,
6944                           ring_buffer_size(tr->trace_buffer.buffer, 0));
6945
6946        return 0;
6947}
6948
6949static int allocate_trace_buffers(struct trace_array *tr, int size)
6950{
6951        int ret;
6952
6953        ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6954        if (ret)
6955                return ret;
6956
6957#ifdef CONFIG_TRACER_MAX_TRACE
6958        ret = allocate_trace_buffer(tr, &tr->max_buffer,
6959                                    allocate_snapshot ? size : 1);
6960        if (WARN_ON(ret)) {
6961                ring_buffer_free(tr->trace_buffer.buffer);
6962                free_percpu(tr->trace_buffer.data);
6963                return -ENOMEM;
6964        }
6965        tr->allocated_snapshot = allocate_snapshot;
6966
6967        /*
6968         * Only the top level trace array gets its snapshot allocated
6969         * from the kernel command line.
6970         */
6971        allocate_snapshot = false;
6972#endif
6973        return 0;
6974}
6975
6976static void free_trace_buffer(struct trace_buffer *buf)
6977{
6978        if (buf->buffer) {
6979                ring_buffer_free(buf->buffer);
6980                buf->buffer = NULL;
6981                free_percpu(buf->data);
6982                buf->data = NULL;
6983        }
6984}
6985
6986static void free_trace_buffers(struct trace_array *tr)
6987{
6988        if (!tr)
6989                return;
6990
6991        free_trace_buffer(&tr->trace_buffer);
6992
6993#ifdef CONFIG_TRACER_MAX_TRACE
6994        free_trace_buffer(&tr->max_buffer);
6995#endif
6996}
6997
6998static void init_trace_flags_index(struct trace_array *tr)
6999{
7000        int i;
7001
7002        /* Used by the trace options files */
7003        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7004                tr->trace_flags_index[i] = i;
7005}
7006
7007static void __update_tracer_options(struct trace_array *tr)
7008{
7009        struct tracer *t;
7010
7011        for (t = trace_types; t; t = t->next)
7012                add_tracer_options(tr, t);
7013}
7014
7015static void update_tracer_options(struct trace_array *tr)
7016{
7017        mutex_lock(&trace_types_lock);
7018        __update_tracer_options(tr);
7019        mutex_unlock(&trace_types_lock);
7020}
7021
7022static int instance_mkdir(const char *name)
7023{
7024        struct trace_array *tr;
7025        int ret;
7026
7027        mutex_lock(&trace_types_lock);
7028
7029        ret = -EEXIST;
7030        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7031                if (tr->name && strcmp(tr->name, name) == 0)
7032                        goto out_unlock;
7033        }
7034
7035        ret = -ENOMEM;
7036        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7037        if (!tr)
7038                goto out_unlock;
7039
7040        tr->name = kstrdup(name, GFP_KERNEL);
7041        if (!tr->name)
7042                goto out_free_tr;
7043
7044        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7045                goto out_free_tr;
7046
7047        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7048
7049        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7050
7051        raw_spin_lock_init(&tr->start_lock);
7052
7053        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7054
7055        tr->current_trace = &nop_trace;
7056
7057        INIT_LIST_HEAD(&tr->systems);
7058        INIT_LIST_HEAD(&tr->events);
7059
7060        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7061                goto out_free_tr;
7062
7063        tr->dir = tracefs_create_dir(name, trace_instance_dir);
7064        if (!tr->dir)
7065                goto out_free_tr;
7066
7067        ret = event_trace_add_tracer(tr->dir, tr);
7068        if (ret) {
7069                tracefs_remove_recursive(tr->dir);
7070                goto out_free_tr;
7071        }
7072
7073        init_tracer_tracefs(tr, tr->dir);
7074        init_trace_flags_index(tr);
7075        __update_tracer_options(tr);
7076
7077        list_add(&tr->list, &ftrace_trace_arrays);
7078
7079        mutex_unlock(&trace_types_lock);
7080
7081        return 0;
7082
7083 out_free_tr:
7084        free_trace_buffers(tr);
7085        free_cpumask_var(tr->tracing_cpumask);
7086        kfree(tr->name);
7087        kfree(tr);
7088
7089 out_unlock:
7090        mutex_unlock(&trace_types_lock);
7091
7092        return ret;
7093
7094}
7095
7096static int instance_rmdir(const char *name)
7097{
7098        struct trace_array *tr;
7099        int found = 0;
7100        int ret;
7101        int i;
7102
7103        mutex_lock(&trace_types_lock);
7104
7105        ret = -ENODEV;
7106        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7107                if (tr->name && strcmp(tr->name, name) == 0) {
7108                        found = 1;
7109                        break;
7110                }
7111        }
7112        if (!found)
7113                goto out_unlock;
7114
7115        ret = -EBUSY;
7116        if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7117                goto out_unlock;
7118
7119        list_del(&tr->list);
7120
7121        /* Disable all the flags that were enabled coming in */
7122        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7123                if ((1 << i) & ZEROED_TRACE_FLAGS)
7124                        set_tracer_flag(tr, 1 << i, 0);
7125        }
7126
7127        tracing_set_nop(tr);
7128        event_trace_del_tracer(tr);
7129        ftrace_destroy_function_files(tr);
7130        tracefs_remove_recursive(tr->dir);
7131        free_trace_buffers(tr);
7132
7133        for (i = 0; i < tr->nr_topts; i++) {
7134                kfree(tr->topts[i].topts);
7135        }
7136        kfree(tr->topts);
7137
7138        kfree(tr->name);
7139        kfree(tr);
7140
7141        ret = 0;
7142
7143 out_unlock:
7144        mutex_unlock(&trace_types_lock);
7145
7146        return ret;
7147}
7148
7149static __init void create_trace_instances(struct dentry *d_tracer)
7150{
7151        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7152                                                         instance_mkdir,
7153                                                         instance_rmdir);
7154        if (WARN_ON(!trace_instance_dir))
7155                return;
7156}
7157
7158static void
7159init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7160{
7161        int cpu;
7162
7163        trace_create_file("available_tracers", 0444, d_tracer,
7164                        tr, &show_traces_fops);
7165
7166        trace_create_file("current_tracer", 0644, d_tracer,
7167                        tr, &set_tracer_fops);
7168
7169        trace_create_file("tracing_cpumask", 0644, d_tracer,
7170                          tr, &tracing_cpumask_fops);
7171
7172        trace_create_file("trace_options", 0644, d_tracer,
7173                          tr, &tracing_iter_fops);
7174
7175        trace_create_file("trace", 0644, d_tracer,
7176                          tr, &tracing_fops);
7177
7178        trace_create_file("trace_pipe", 0444, d_tracer,
7179                          tr, &tracing_pipe_fops);
7180
7181        trace_create_file("buffer_size_kb", 0644, d_tracer,
7182                          tr, &tracing_entries_fops);
7183
7184        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7185                          tr, &tracing_total_entries_fops);
7186
7187        trace_create_file("free_buffer", 0200, d_tracer,
7188                          tr, &tracing_free_buffer_fops);
7189
7190        trace_create_file("trace_marker", 0220, d_tracer,
7191                          tr, &tracing_mark_fops);
7192
7193        trace_create_file("trace_clock", 0644, d_tracer, tr,
7194                          &trace_clock_fops);
7195
7196        trace_create_file("tracing_on", 0644, d_tracer,
7197                          tr, &rb_simple_fops);
7198
7199        create_trace_options_dir(tr);
7200
7201#ifdef CONFIG_TRACER_MAX_TRACE
7202        trace_create_file("tracing_max_latency", 0644, d_tracer,
7203                        &tr->max_latency, &tracing_max_lat_fops);
7204#endif
7205
7206        if (ftrace_create_function_files(tr, d_tracer))
7207                WARN(1, "Could not allocate function filter files");
7208
7209#ifdef CONFIG_TRACER_SNAPSHOT
7210        trace_create_file("snapshot", 0644, d_tracer,
7211                          tr, &snapshot_fops);
7212#endif
7213
7214        for_each_tracing_cpu(cpu)
7215                tracing_init_tracefs_percpu(tr, cpu);
7216
7217        ftrace_init_tracefs(tr, d_tracer);
7218}
7219
7220static struct vfsmount *trace_automount(void *ingore)
7221{
7222        struct vfsmount *mnt;
7223        struct file_system_type *type;
7224
7225        /*
7226         * To maintain backward compatibility for tools that mount
7227         * debugfs to get to the tracing facility, tracefs is automatically
7228         * mounted to the debugfs/tracing directory.
7229         */
7230        type = get_fs_type("tracefs");
7231        if (!type)
7232                return NULL;
7233        mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7234        put_filesystem(type);
7235        if (IS_ERR(mnt))
7236                return NULL;
7237        mntget(mnt);
7238
7239        return mnt;
7240}
7241
7242/**
7243 * tracing_init_dentry - initialize top level trace array
7244 *
7245 * This is called when creating files or directories in the tracing
7246 * directory. It is called via fs_initcall() by any of the boot up code
7247 * and expects to return the dentry of the top level tracing directory.
7248 */
7249struct dentry *tracing_init_dentry(void)
7250{
7251        struct trace_array *tr = &global_trace;
7252
7253        /* The top level trace array uses  NULL as parent */
7254        if (tr->dir)
7255                return NULL;
7256
7257        if (WARN_ON(!tracefs_initialized()) ||
7258                (IS_ENABLED(CONFIG_DEBUG_FS) &&
7259                 WARN_ON(!debugfs_initialized())))
7260                return ERR_PTR(-ENODEV);
7261
7262        /*
7263         * As there may still be users that expect the tracing
7264         * files to exist in debugfs/tracing, we must automount
7265         * the tracefs file system there, so older tools still
7266         * work with the newer kerenl.
7267         */
7268        tr->dir = debugfs_create_automount("tracing", NULL,
7269                                           trace_automount, NULL);
7270        if (!tr->dir) {
7271                pr_warn_once("Could not create debugfs directory 'tracing'\n");
7272                return ERR_PTR(-ENOMEM);
7273        }
7274
7275        return NULL;
7276}
7277
7278extern struct trace_enum_map *__start_ftrace_enum_maps[];
7279extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7280
7281static void __init trace_enum_init(void)
7282{
7283        int len;
7284
7285        len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7286        trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7287}
7288
7289#ifdef CONFIG_MODULES
7290static void trace_module_add_enums(struct module *mod)
7291{
7292        if (!mod->num_trace_enums)
7293                return;
7294
7295        /*
7296         * Modules with bad taint do not have events created, do
7297         * not bother with enums either.
7298         */
7299        if (trace_module_has_bad_taint(mod))
7300                return;
7301
7302        trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7303}
7304
7305#ifdef CONFIG_TRACE_ENUM_MAP_FILE
7306static void trace_module_remove_enums(struct module *mod)
7307{
7308        union trace_enum_map_item *map;
7309        union trace_enum_map_item **last = &trace_enum_maps;
7310
7311        if (!mod->num_trace_enums)
7312                return;
7313
7314        mutex_lock(&trace_enum_mutex);
7315
7316        map = trace_enum_maps;
7317
7318        while (map) {
7319                if (map->head.mod == mod)
7320                        break;
7321                map = trace_enum_jmp_to_tail(map);
7322                last = &map->tail.next;
7323                map = map->tail.next;
7324        }
7325        if (!map)
7326                goto out;
7327
7328        *last = trace_enum_jmp_to_tail(map)->tail.next;
7329        kfree(map);
7330 out:
7331        mutex_unlock(&trace_enum_mutex);
7332}
7333#else
7334static inline void trace_module_remove_enums(struct module *mod) { }
7335#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7336
7337static int trace_module_notify(struct notifier_block *self,
7338                               unsigned long val, void *data)
7339{
7340        struct module *mod = data;
7341
7342        switch (val) {
7343        case MODULE_STATE_COMING:
7344                trace_module_add_enums(mod);
7345                break;
7346        case MODULE_STATE_GOING:
7347                trace_module_remove_enums(mod);
7348                break;
7349        }
7350
7351        return 0;
7352}
7353
7354static struct notifier_block trace_module_nb = {
7355        .notifier_call = trace_module_notify,
7356        .priority = 0,
7357};
7358#endif /* CONFIG_MODULES */
7359
7360static __init int tracer_init_tracefs(void)
7361{
7362        struct dentry *d_tracer;
7363
7364        trace_access_lock_init();
7365
7366        d_tracer = tracing_init_dentry();
7367        if (IS_ERR(d_tracer))
7368                return 0;
7369
7370        init_tracer_tracefs(&global_trace, d_tracer);
7371        ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7372
7373        trace_create_file("tracing_thresh", 0644, d_tracer,
7374                        &global_trace, &tracing_thresh_fops);
7375
7376        trace_create_file("README", 0444, d_tracer,
7377                        NULL, &tracing_readme_fops);
7378
7379        trace_create_file("saved_cmdlines", 0444, d_tracer,
7380                        NULL, &tracing_saved_cmdlines_fops);
7381
7382        trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7383                          NULL, &tracing_saved_cmdlines_size_fops);
7384
7385        trace_enum_init();
7386
7387        trace_create_enum_file(d_tracer);
7388
7389#ifdef CONFIG_MODULES
7390        register_module_notifier(&trace_module_nb);
7391#endif
7392
7393#ifdef CONFIG_DYNAMIC_FTRACE
7394        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7395                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7396#endif
7397
7398        create_trace_instances(d_tracer);
7399
7400        update_tracer_options(&global_trace);
7401
7402        return 0;
7403}
7404
7405static int trace_panic_handler(struct notifier_block *this,
7406                               unsigned long event, void *unused)
7407{
7408        if (ftrace_dump_on_oops)
7409                ftrace_dump(ftrace_dump_on_oops);
7410        return NOTIFY_OK;
7411}
7412
7413static struct notifier_block trace_panic_notifier = {
7414        .notifier_call  = trace_panic_handler,
7415        .next           = NULL,
7416        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7417};
7418
7419static int trace_die_handler(struct notifier_block *self,
7420                             unsigned long val,
7421                             void *data)
7422{
7423        switch (val) {
7424        case DIE_OOPS:
7425                if (ftrace_dump_on_oops)
7426                        ftrace_dump(ftrace_dump_on_oops);
7427                break;
7428        default:
7429                break;
7430        }
7431        return NOTIFY_OK;
7432}
7433
7434static struct notifier_block trace_die_notifier = {
7435        .notifier_call = trace_die_handler,
7436        .priority = 200
7437};
7438
7439/*
7440 * printk is set to max of 1024, we really don't need it that big.
7441 * Nothing should be printing 1000 characters anyway.
7442 */
7443#define TRACE_MAX_PRINT         1000
7444
7445/*
7446 * Define here KERN_TRACE so that we have one place to modify
7447 * it if we decide to change what log level the ftrace dump
7448 * should be at.
7449 */
7450#define KERN_TRACE              KERN_EMERG
7451
7452void
7453trace_printk_seq(struct trace_seq *s)
7454{
7455        /* Probably should print a warning here. */
7456        if (s->seq.len >= TRACE_MAX_PRINT)
7457                s->seq.len = TRACE_MAX_PRINT;
7458
7459        /*
7460         * More paranoid code. Although the buffer size is set to
7461         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7462         * an extra layer of protection.
7463         */
7464        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7465                s->seq.len = s->seq.size - 1;
7466
7467        /* should be zero ended, but we are paranoid. */
7468        s->buffer[s->seq.len] = 0;
7469
7470        printk(KERN_TRACE "%s", s->buffer);
7471
7472        trace_seq_init(s);
7473}
7474
7475void trace_init_global_iter(struct trace_iterator *iter)
7476{
7477        iter->tr = &global_trace;
7478        iter->trace = iter->tr->current_trace;
7479        iter->cpu_file = RING_BUFFER_ALL_CPUS;
7480        iter->trace_buffer = &global_trace.trace_buffer;
7481
7482        if (iter->trace && iter->trace->open)
7483                iter->trace->open(iter);
7484
7485        /* Annotate start of buffers if we had overruns */
7486        if (ring_buffer_overruns(iter->trace_buffer->buffer))
7487                iter->iter_flags |= TRACE_FILE_ANNOTATE;
7488
7489        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7490        if (trace_clocks[iter->tr->clock_id].in_ns)
7491                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7492}
7493
7494void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7495{
7496        /* use static because iter can be a bit big for the stack */
7497        static struct trace_iterator iter;
7498        static atomic_t dump_running;
7499        struct trace_array *tr = &global_trace;
7500        unsigned int old_userobj;
7501        unsigned long flags;
7502        int cnt = 0, cpu;
7503
7504        /* Only allow one dump user at a time. */
7505        if (atomic_inc_return(&dump_running) != 1) {
7506                atomic_dec(&dump_running);
7507                return;
7508        }
7509
7510        /*
7511         * Always turn off tracing when we dump.
7512         * We don't need to show trace output of what happens
7513         * between multiple crashes.
7514         *
7515         * If the user does a sysrq-z, then they can re-enable
7516         * tracing with echo 1 > tracing_on.
7517         */
7518        tracing_off();
7519
7520        local_irq_save(flags);
7521
7522        /* Simulate the iterator */
7523        trace_init_global_iter(&iter);
7524
7525        for_each_tracing_cpu(cpu) {
7526                atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7527        }
7528
7529        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7530
7531        /* don't look at user memory in panic mode */
7532        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7533
7534        switch (oops_dump_mode) {
7535        case DUMP_ALL:
7536                iter.cpu_file = RING_BUFFER_ALL_CPUS;
7537                break;
7538        case DUMP_ORIG:
7539                iter.cpu_file = raw_smp_processor_id();
7540                break;
7541        case DUMP_NONE:
7542                goto out_enable;
7543        default:
7544                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7545                iter.cpu_file = RING_BUFFER_ALL_CPUS;
7546        }
7547
7548        printk(KERN_TRACE "Dumping ftrace buffer:\n");
7549
7550        /* Did function tracer already get disabled? */
7551        if (ftrace_is_dead()) {
7552                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7553                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7554        }
7555
7556        /*
7557         * We need to stop all tracing on all CPUS to read the
7558         * the next buffer. This is a bit expensive, but is
7559         * not done often. We fill all what we can read,
7560         * and then release the locks again.
7561         */
7562
7563        while (!trace_empty(&iter)) {
7564
7565                if (!cnt)
7566                        printk(KERN_TRACE "---------------------------------\n");
7567
7568                cnt++;
7569
7570                /* reset all but tr, trace, and overruns */
7571                memset(&iter.seq, 0,
7572                       sizeof(struct trace_iterator) -
7573                       offsetof(struct trace_iterator, seq));
7574                iter.iter_flags |= TRACE_FILE_LAT_FMT;
7575                iter.pos = -1;
7576
7577                if (trace_find_next_entry_inc(&iter) != NULL) {
7578                        int ret;
7579
7580                        ret = print_trace_line(&iter);
7581                        if (ret != TRACE_TYPE_NO_CONSUME)
7582                                trace_consume(&iter);
7583                }
7584                touch_nmi_watchdog();
7585
7586                trace_printk_seq(&iter.seq);
7587        }
7588
7589        if (!cnt)
7590                printk(KERN_TRACE "   (ftrace buffer empty)\n");
7591        else
7592                printk(KERN_TRACE "---------------------------------\n");
7593
7594 out_enable:
7595        tr->trace_flags |= old_userobj;
7596
7597        for_each_tracing_cpu(cpu) {
7598                atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7599        }
7600        atomic_dec(&dump_running);
7601        local_irq_restore(flags);
7602}
7603EXPORT_SYMBOL_GPL(ftrace_dump);
7604
7605__init static int tracer_alloc_buffers(void)
7606{
7607        int ring_buf_size;
7608        int ret = -ENOMEM;
7609
7610        /*
7611         * Make sure we don't accidently add more trace options
7612         * than we have bits for.
7613         */
7614        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7615
7616        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7617                goto out;
7618
7619        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7620                goto out_free_buffer_mask;
7621
7622        /* Only allocate trace_printk buffers if a trace_printk exists */
7623        if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7624                /* Must be called before global_trace.buffer is allocated */
7625                trace_printk_init_buffers();
7626
7627        /* To save memory, keep the ring buffer size to its minimum */
7628        if (ring_buffer_expanded)
7629                ring_buf_size = trace_buf_size;
7630        else
7631                ring_buf_size = 1;
7632
7633        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7634        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7635
7636        raw_spin_lock_init(&global_trace.start_lock);
7637
7638        /* Used for event triggers */
7639        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7640        if (!temp_buffer)
7641                goto out_free_cpumask;
7642
7643        if (trace_create_savedcmd() < 0)
7644                goto out_free_temp_buffer;
7645
7646        /* TODO: make the number of buffers hot pluggable with CPUS */
7647        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7648                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7649                WARN_ON(1);
7650                goto out_free_savedcmd;
7651        }
7652
7653        if (global_trace.buffer_disabled)
7654                tracing_off();
7655
7656        if (trace_boot_clock) {
7657                ret = tracing_set_clock(&global_trace, trace_boot_clock);
7658                if (ret < 0)
7659                        pr_warn("Trace clock %s not defined, going back to default\n",
7660                                trace_boot_clock);
7661        }
7662
7663        /*
7664         * register_tracer() might reference current_trace, so it
7665         * needs to be set before we register anything. This is
7666         * just a bootstrap of current_trace anyway.
7667         */
7668        global_trace.current_trace = &nop_trace;
7669
7670        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7671
7672        ftrace_init_global_array_ops(&global_trace);
7673
7674        init_trace_flags_index(&global_trace);
7675
7676        register_tracer(&nop_trace);
7677
7678        /* All seems OK, enable tracing */
7679        tracing_disabled = 0;
7680
7681        atomic_notifier_chain_register(&panic_notifier_list,
7682                                       &trace_panic_notifier);
7683
7684        register_die_notifier(&trace_die_notifier);
7685
7686        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7687
7688        INIT_LIST_HEAD(&global_trace.systems);
7689        INIT_LIST_HEAD(&global_trace.events);
7690        list_add(&global_trace.list, &ftrace_trace_arrays);
7691
7692        apply_trace_boot_options();
7693
7694        register_snapshot_cmd();
7695
7696        return 0;
7697
7698out_free_savedcmd:
7699        free_saved_cmdlines_buffer(savedcmd);
7700out_free_temp_buffer:
7701        ring_buffer_free(temp_buffer);
7702out_free_cpumask:
7703        free_cpumask_var(global_trace.tracing_cpumask);
7704out_free_buffer_mask:
7705        free_cpumask_var(tracing_buffer_mask);
7706out:
7707        return ret;
7708}
7709
7710void __init trace_init(void)
7711{
7712        if (tracepoint_printk) {
7713                tracepoint_print_iter =
7714                        kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7715                if (WARN_ON(!tracepoint_print_iter))
7716                        tracepoint_printk = 0;
7717        }
7718        tracer_alloc_buffers();
7719        trace_event_init();
7720}
7721
7722__init static int clear_boot_tracer(void)
7723{
7724        /*
7725         * The default tracer at boot buffer is an init section.
7726         * This function is called in lateinit. If we did not
7727         * find the boot tracer, then clear it out, to prevent
7728         * later registration from accessing the buffer that is
7729         * about to be freed.
7730         */
7731        if (!default_bootup_tracer)
7732                return 0;
7733
7734        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7735               default_bootup_tracer);
7736        default_bootup_tracer = NULL;
7737
7738        return 0;
7739}
7740
7741fs_initcall(tracer_init_tracefs);
7742late_initcall(clear_boot_tracer);
7743