linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/seq_file.h>
  21#include <linux/notifier.h>
  22#include <linux/irqflags.h>
  23#include <linux/debugfs.h>
  24#include <linux/tracefs.h>
  25#include <linux/pagemap.h>
  26#include <linux/hardirq.h>
  27#include <linux/linkage.h>
  28#include <linux/uaccess.h>
  29#include <linux/vmalloc.h>
  30#include <linux/ftrace.h>
  31#include <linux/module.h>
  32#include <linux/percpu.h>
  33#include <linux/splice.h>
  34#include <linux/kdebug.h>
  35#include <linux/string.h>
  36#include <linux/mount.h>
  37#include <linux/rwsem.h>
  38#include <linux/slab.h>
  39#include <linux/ctype.h>
  40#include <linux/init.h>
  41#include <linux/poll.h>
  42#include <linux/nmi.h>
  43#include <linux/fs.h>
  44#include <linux/trace.h>
  45#include <linux/sched/clock.h>
  46#include <linux/sched/rt.h>
  47
  48#include "trace.h"
  49#include "trace_output.h"
  50
  51/*
  52 * On boot up, the ring buffer is set to the minimum size, so that
  53 * we do not waste memory on systems that are not using tracing.
  54 */
  55bool ring_buffer_expanded;
  56
  57/*
  58 * We need to change this state when a selftest is running.
  59 * A selftest will lurk into the ring-buffer to count the
  60 * entries inserted during the selftest although some concurrent
  61 * insertions into the ring-buffer such as trace_printk could occurred
  62 * at the same time, giving false positive or negative results.
  63 */
  64static bool __read_mostly tracing_selftest_running;
  65
  66/*
  67 * If a tracer is running, we do not want to run SELFTEST.
  68 */
  69bool __read_mostly tracing_selftest_disabled;
  70
  71/* Pipe tracepoints to printk */
  72struct trace_iterator *tracepoint_print_iter;
  73int tracepoint_printk;
  74static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  75
  76/* For tracers that don't implement custom flags */
  77static struct tracer_opt dummy_tracer_opt[] = {
  78        { }
  79};
  80
  81static int
  82dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  83{
  84        return 0;
  85}
  86
  87/*
  88 * To prevent the comm cache from being overwritten when no
  89 * tracing is active, only save the comm when a trace event
  90 * occurred.
  91 */
  92static DEFINE_PER_CPU(bool, trace_taskinfo_save);
  93
  94/*
  95 * Kill all tracing for good (never come back).
  96 * It is initialized to 1 but will turn to zero if the initialization
  97 * of the tracer is successful. But that is the only place that sets
  98 * this back to zero.
  99 */
 100static int tracing_disabled = 1;
 101
 102cpumask_var_t __read_mostly     tracing_buffer_mask;
 103
 104/*
 105 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 106 *
 107 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 108 * is set, then ftrace_dump is called. This will output the contents
 109 * of the ftrace buffers to the console.  This is very useful for
 110 * capturing traces that lead to crashes and outputing it to a
 111 * serial console.
 112 *
 113 * It is default off, but you can enable it with either specifying
 114 * "ftrace_dump_on_oops" in the kernel command line, or setting
 115 * /proc/sys/kernel/ftrace_dump_on_oops
 116 * Set 1 if you want to dump buffers of all CPUs
 117 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 118 */
 119
 120enum ftrace_dump_mode ftrace_dump_on_oops;
 121
 122/* When set, tracing will stop when a WARN*() is hit */
 123int __disable_trace_on_warning;
 124
 125#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 126/* Map of enums to their values, for "eval_map" file */
 127struct trace_eval_map_head {
 128        struct module                   *mod;
 129        unsigned long                   length;
 130};
 131
 132union trace_eval_map_item;
 133
 134struct trace_eval_map_tail {
 135        /*
 136         * "end" is first and points to NULL as it must be different
 137         * than "mod" or "eval_string"
 138         */
 139        union trace_eval_map_item       *next;
 140        const char                      *end;   /* points to NULL */
 141};
 142
 143static DEFINE_MUTEX(trace_eval_mutex);
 144
 145/*
 146 * The trace_eval_maps are saved in an array with two extra elements,
 147 * one at the beginning, and one at the end. The beginning item contains
 148 * the count of the saved maps (head.length), and the module they
 149 * belong to if not built in (head.mod). The ending item contains a
 150 * pointer to the next array of saved eval_map items.
 151 */
 152union trace_eval_map_item {
 153        struct trace_eval_map           map;
 154        struct trace_eval_map_head      head;
 155        struct trace_eval_map_tail      tail;
 156};
 157
 158static union trace_eval_map_item *trace_eval_maps;
 159#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 160
 161static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 162
 163#define MAX_TRACER_SIZE         100
 164static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 165static char *default_bootup_tracer;
 166
 167static bool allocate_snapshot;
 168
 169static int __init set_cmdline_ftrace(char *str)
 170{
 171        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 172        default_bootup_tracer = bootup_tracer_buf;
 173        /* We are using ftrace early, expand it */
 174        ring_buffer_expanded = true;
 175        return 1;
 176}
 177__setup("ftrace=", set_cmdline_ftrace);
 178
 179static int __init set_ftrace_dump_on_oops(char *str)
 180{
 181        if (*str++ != '=' || !*str) {
 182                ftrace_dump_on_oops = DUMP_ALL;
 183                return 1;
 184        }
 185
 186        if (!strcmp("orig_cpu", str)) {
 187                ftrace_dump_on_oops = DUMP_ORIG;
 188                return 1;
 189        }
 190
 191        return 0;
 192}
 193__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 194
 195static int __init stop_trace_on_warning(char *str)
 196{
 197        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 198                __disable_trace_on_warning = 1;
 199        return 1;
 200}
 201__setup("traceoff_on_warning", stop_trace_on_warning);
 202
 203static int __init boot_alloc_snapshot(char *str)
 204{
 205        allocate_snapshot = true;
 206        /* We also need the main ring buffer expanded */
 207        ring_buffer_expanded = true;
 208        return 1;
 209}
 210__setup("alloc_snapshot", boot_alloc_snapshot);
 211
 212
 213static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 214
 215static int __init set_trace_boot_options(char *str)
 216{
 217        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 218        return 0;
 219}
 220__setup("trace_options=", set_trace_boot_options);
 221
 222static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 223static char *trace_boot_clock __initdata;
 224
 225static int __init set_trace_boot_clock(char *str)
 226{
 227        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 228        trace_boot_clock = trace_boot_clock_buf;
 229        return 0;
 230}
 231__setup("trace_clock=", set_trace_boot_clock);
 232
 233static int __init set_tracepoint_printk(char *str)
 234{
 235        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 236                tracepoint_printk = 1;
 237        return 1;
 238}
 239__setup("tp_printk", set_tracepoint_printk);
 240
 241unsigned long long ns2usecs(u64 nsec)
 242{
 243        nsec += 500;
 244        do_div(nsec, 1000);
 245        return nsec;
 246}
 247
 248/* trace_flags holds trace_options default values */
 249#define TRACE_DEFAULT_FLAGS                                             \
 250        (FUNCTION_DEFAULT_FLAGS |                                       \
 251         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 252         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 253         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 254         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
 255
 256/* trace_options that are only supported by global_trace */
 257#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 258               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 259
 260/* trace_flags that are default zero for instances */
 261#define ZEROED_TRACE_FLAGS \
 262        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 263
 264/*
 265 * The global_trace is the descriptor that holds the top-level tracing
 266 * buffers for the live tracing.
 267 */
 268static struct trace_array global_trace = {
 269        .trace_flags = TRACE_DEFAULT_FLAGS,
 270};
 271
 272LIST_HEAD(ftrace_trace_arrays);
 273
 274int trace_array_get(struct trace_array *this_tr)
 275{
 276        struct trace_array *tr;
 277        int ret = -ENODEV;
 278
 279        mutex_lock(&trace_types_lock);
 280        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 281                if (tr == this_tr) {
 282                        tr->ref++;
 283                        ret = 0;
 284                        break;
 285                }
 286        }
 287        mutex_unlock(&trace_types_lock);
 288
 289        return ret;
 290}
 291
 292static void __trace_array_put(struct trace_array *this_tr)
 293{
 294        WARN_ON(!this_tr->ref);
 295        this_tr->ref--;
 296}
 297
 298void trace_array_put(struct trace_array *this_tr)
 299{
 300        mutex_lock(&trace_types_lock);
 301        __trace_array_put(this_tr);
 302        mutex_unlock(&trace_types_lock);
 303}
 304
 305int call_filter_check_discard(struct trace_event_call *call, void *rec,
 306                              struct ring_buffer *buffer,
 307                              struct ring_buffer_event *event)
 308{
 309        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 310            !filter_match_preds(call->filter, rec)) {
 311                __trace_event_discard_commit(buffer, event);
 312                return 1;
 313        }
 314
 315        return 0;
 316}
 317
 318void trace_free_pid_list(struct trace_pid_list *pid_list)
 319{
 320        vfree(pid_list->pids);
 321        kfree(pid_list);
 322}
 323
 324/**
 325 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 326 * @filtered_pids: The list of pids to check
 327 * @search_pid: The PID to find in @filtered_pids
 328 *
 329 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
 330 */
 331bool
 332trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 333{
 334        /*
 335         * If pid_max changed after filtered_pids was created, we
 336         * by default ignore all pids greater than the previous pid_max.
 337         */
 338        if (search_pid >= filtered_pids->pid_max)
 339                return false;
 340
 341        return test_bit(search_pid, filtered_pids->pids);
 342}
 343
 344/**
 345 * trace_ignore_this_task - should a task be ignored for tracing
 346 * @filtered_pids: The list of pids to check
 347 * @task: The task that should be ignored if not filtered
 348 *
 349 * Checks if @task should be traced or not from @filtered_pids.
 350 * Returns true if @task should *NOT* be traced.
 351 * Returns false if @task should be traced.
 352 */
 353bool
 354trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 355{
 356        /*
 357         * Return false, because if filtered_pids does not exist,
 358         * all pids are good to trace.
 359         */
 360        if (!filtered_pids)
 361                return false;
 362
 363        return !trace_find_filtered_pid(filtered_pids, task->pid);
 364}
 365
 366/**
 367 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
 368 * @pid_list: The list to modify
 369 * @self: The current task for fork or NULL for exit
 370 * @task: The task to add or remove
 371 *
 372 * If adding a task, if @self is defined, the task is only added if @self
 373 * is also included in @pid_list. This happens on fork and tasks should
 374 * only be added when the parent is listed. If @self is NULL, then the
 375 * @task pid will be removed from the list, which would happen on exit
 376 * of a task.
 377 */
 378void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 379                                  struct task_struct *self,
 380                                  struct task_struct *task)
 381{
 382        if (!pid_list)
 383                return;
 384
 385        /* For forks, we only add if the forking task is listed */
 386        if (self) {
 387                if (!trace_find_filtered_pid(pid_list, self->pid))
 388                        return;
 389        }
 390
 391        /* Sorry, but we don't support pid_max changing after setting */
 392        if (task->pid >= pid_list->pid_max)
 393                return;
 394
 395        /* "self" is set for forks, and NULL for exits */
 396        if (self)
 397                set_bit(task->pid, pid_list->pids);
 398        else
 399                clear_bit(task->pid, pid_list->pids);
 400}
 401
 402/**
 403 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 404 * @pid_list: The pid list to show
 405 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 406 * @pos: The position of the file
 407 *
 408 * This is used by the seq_file "next" operation to iterate the pids
 409 * listed in a trace_pid_list structure.
 410 *
 411 * Returns the pid+1 as we want to display pid of zero, but NULL would
 412 * stop the iteration.
 413 */
 414void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 415{
 416        unsigned long pid = (unsigned long)v;
 417
 418        (*pos)++;
 419
 420        /* pid already is +1 of the actual prevous bit */
 421        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 422
 423        /* Return pid + 1 to allow zero to be represented */
 424        if (pid < pid_list->pid_max)
 425                return (void *)(pid + 1);
 426
 427        return NULL;
 428}
 429
 430/**
 431 * trace_pid_start - Used for seq_file to start reading pid lists
 432 * @pid_list: The pid list to show
 433 * @pos: The position of the file
 434 *
 435 * This is used by seq_file "start" operation to start the iteration
 436 * of listing pids.
 437 *
 438 * Returns the pid+1 as we want to display pid of zero, but NULL would
 439 * stop the iteration.
 440 */
 441void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 442{
 443        unsigned long pid;
 444        loff_t l = 0;
 445
 446        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 447        if (pid >= pid_list->pid_max)
 448                return NULL;
 449
 450        /* Return pid + 1 so that zero can be the exit value */
 451        for (pid++; pid && l < *pos;
 452             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 453                ;
 454        return (void *)pid;
 455}
 456
 457/**
 458 * trace_pid_show - show the current pid in seq_file processing
 459 * @m: The seq_file structure to write into
 460 * @v: A void pointer of the pid (+1) value to display
 461 *
 462 * Can be directly used by seq_file operations to display the current
 463 * pid value.
 464 */
 465int trace_pid_show(struct seq_file *m, void *v)
 466{
 467        unsigned long pid = (unsigned long)v - 1;
 468
 469        seq_printf(m, "%lu\n", pid);
 470        return 0;
 471}
 472
 473/* 128 should be much more than enough */
 474#define PID_BUF_SIZE            127
 475
 476int trace_pid_write(struct trace_pid_list *filtered_pids,
 477                    struct trace_pid_list **new_pid_list,
 478                    const char __user *ubuf, size_t cnt)
 479{
 480        struct trace_pid_list *pid_list;
 481        struct trace_parser parser;
 482        unsigned long val;
 483        int nr_pids = 0;
 484        ssize_t read = 0;
 485        ssize_t ret = 0;
 486        loff_t pos;
 487        pid_t pid;
 488
 489        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 490                return -ENOMEM;
 491
 492        /*
 493         * Always recreate a new array. The write is an all or nothing
 494         * operation. Always create a new array when adding new pids by
 495         * the user. If the operation fails, then the current list is
 496         * not modified.
 497         */
 498        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 499        if (!pid_list)
 500                return -ENOMEM;
 501
 502        pid_list->pid_max = READ_ONCE(pid_max);
 503
 504        /* Only truncating will shrink pid_max */
 505        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 506                pid_list->pid_max = filtered_pids->pid_max;
 507
 508        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 509        if (!pid_list->pids) {
 510                kfree(pid_list);
 511                return -ENOMEM;
 512        }
 513
 514        if (filtered_pids) {
 515                /* copy the current bits to the new max */
 516                for_each_set_bit(pid, filtered_pids->pids,
 517                                 filtered_pids->pid_max) {
 518                        set_bit(pid, pid_list->pids);
 519                        nr_pids++;
 520                }
 521        }
 522
 523        while (cnt > 0) {
 524
 525                pos = 0;
 526
 527                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 528                if (ret < 0 || !trace_parser_loaded(&parser))
 529                        break;
 530
 531                read += ret;
 532                ubuf += ret;
 533                cnt -= ret;
 534
 535                ret = -EINVAL;
 536                if (kstrtoul(parser.buffer, 0, &val))
 537                        break;
 538                if (val >= pid_list->pid_max)
 539                        break;
 540
 541                pid = (pid_t)val;
 542
 543                set_bit(pid, pid_list->pids);
 544                nr_pids++;
 545
 546                trace_parser_clear(&parser);
 547                ret = 0;
 548        }
 549        trace_parser_put(&parser);
 550
 551        if (ret < 0) {
 552                trace_free_pid_list(pid_list);
 553                return ret;
 554        }
 555
 556        if (!nr_pids) {
 557                /* Cleared the list of pids */
 558                trace_free_pid_list(pid_list);
 559                read = ret;
 560                pid_list = NULL;
 561        }
 562
 563        *new_pid_list = pid_list;
 564
 565        return read;
 566}
 567
 568static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 569{
 570        u64 ts;
 571
 572        /* Early boot up does not have a buffer yet */
 573        if (!buf->buffer)
 574                return trace_clock_local();
 575
 576        ts = ring_buffer_time_stamp(buf->buffer, cpu);
 577        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 578
 579        return ts;
 580}
 581
 582u64 ftrace_now(int cpu)
 583{
 584        return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
 585}
 586
 587/**
 588 * tracing_is_enabled - Show if global_trace has been disabled
 589 *
 590 * Shows if the global trace has been enabled or not. It uses the
 591 * mirror flag "buffer_disabled" to be used in fast paths such as for
 592 * the irqsoff tracer. But it may be inaccurate due to races. If you
 593 * need to know the accurate state, use tracing_is_on() which is a little
 594 * slower, but accurate.
 595 */
 596int tracing_is_enabled(void)
 597{
 598        /*
 599         * For quick access (irqsoff uses this in fast path), just
 600         * return the mirror variable of the state of the ring buffer.
 601         * It's a little racy, but we don't really care.
 602         */
 603        smp_rmb();
 604        return !global_trace.buffer_disabled;
 605}
 606
 607/*
 608 * trace_buf_size is the size in bytes that is allocated
 609 * for a buffer. Note, the number of bytes is always rounded
 610 * to page size.
 611 *
 612 * This number is purposely set to a low number of 16384.
 613 * If the dump on oops happens, it will be much appreciated
 614 * to not have to wait for all that output. Anyway this can be
 615 * boot time and run time configurable.
 616 */
 617#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 618
 619static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 620
 621/* trace_types holds a link list of available tracers. */
 622static struct tracer            *trace_types __read_mostly;
 623
 624/*
 625 * trace_types_lock is used to protect the trace_types list.
 626 */
 627DEFINE_MUTEX(trace_types_lock);
 628
 629/*
 630 * serialize the access of the ring buffer
 631 *
 632 * ring buffer serializes readers, but it is low level protection.
 633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 634 * are not protected by ring buffer.
 635 *
 636 * The content of events may become garbage if we allow other process consumes
 637 * these events concurrently:
 638 *   A) the page of the consumed events may become a normal page
 639 *      (not reader page) in ring buffer, and this page will be rewrited
 640 *      by events producer.
 641 *   B) The page of the consumed events may become a page for splice_read,
 642 *      and this page will be returned to system.
 643 *
 644 * These primitives allow multi process access to different cpu ring buffer
 645 * concurrently.
 646 *
 647 * These primitives don't distinguish read-only and read-consume access.
 648 * Multi read-only access are also serialized.
 649 */
 650
 651#ifdef CONFIG_SMP
 652static DECLARE_RWSEM(all_cpu_access_lock);
 653static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 654
 655static inline void trace_access_lock(int cpu)
 656{
 657        if (cpu == RING_BUFFER_ALL_CPUS) {
 658                /* gain it for accessing the whole ring buffer. */
 659                down_write(&all_cpu_access_lock);
 660        } else {
 661                /* gain it for accessing a cpu ring buffer. */
 662
 663                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 664                down_read(&all_cpu_access_lock);
 665
 666                /* Secondly block other access to this @cpu ring buffer. */
 667                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 668        }
 669}
 670
 671static inline void trace_access_unlock(int cpu)
 672{
 673        if (cpu == RING_BUFFER_ALL_CPUS) {
 674                up_write(&all_cpu_access_lock);
 675        } else {
 676                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 677                up_read(&all_cpu_access_lock);
 678        }
 679}
 680
 681static inline void trace_access_lock_init(void)
 682{
 683        int cpu;
 684
 685        for_each_possible_cpu(cpu)
 686                mutex_init(&per_cpu(cpu_access_lock, cpu));
 687}
 688
 689#else
 690
 691static DEFINE_MUTEX(access_lock);
 692
 693static inline void trace_access_lock(int cpu)
 694{
 695        (void)cpu;
 696        mutex_lock(&access_lock);
 697}
 698
 699static inline void trace_access_unlock(int cpu)
 700{
 701        (void)cpu;
 702        mutex_unlock(&access_lock);
 703}
 704
 705static inline void trace_access_lock_init(void)
 706{
 707}
 708
 709#endif
 710
 711#ifdef CONFIG_STACKTRACE
 712static void __ftrace_trace_stack(struct ring_buffer *buffer,
 713                                 unsigned long flags,
 714                                 int skip, int pc, struct pt_regs *regs);
 715static inline void ftrace_trace_stack(struct trace_array *tr,
 716                                      struct ring_buffer *buffer,
 717                                      unsigned long flags,
 718                                      int skip, int pc, struct pt_regs *regs);
 719
 720#else
 721static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
 722                                        unsigned long flags,
 723                                        int skip, int pc, struct pt_regs *regs)
 724{
 725}
 726static inline void ftrace_trace_stack(struct trace_array *tr,
 727                                      struct ring_buffer *buffer,
 728                                      unsigned long flags,
 729                                      int skip, int pc, struct pt_regs *regs)
 730{
 731}
 732
 733#endif
 734
 735static __always_inline void
 736trace_event_setup(struct ring_buffer_event *event,
 737                  int type, unsigned long flags, int pc)
 738{
 739        struct trace_entry *ent = ring_buffer_event_data(event);
 740
 741        tracing_generic_entry_update(ent, flags, pc);
 742        ent->type = type;
 743}
 744
 745static __always_inline struct ring_buffer_event *
 746__trace_buffer_lock_reserve(struct ring_buffer *buffer,
 747                          int type,
 748                          unsigned long len,
 749                          unsigned long flags, int pc)
 750{
 751        struct ring_buffer_event *event;
 752
 753        event = ring_buffer_lock_reserve(buffer, len);
 754        if (event != NULL)
 755                trace_event_setup(event, type, flags, pc);
 756
 757        return event;
 758}
 759
 760void tracer_tracing_on(struct trace_array *tr)
 761{
 762        if (tr->trace_buffer.buffer)
 763                ring_buffer_record_on(tr->trace_buffer.buffer);
 764        /*
 765         * This flag is looked at when buffers haven't been allocated
 766         * yet, or by some tracers (like irqsoff), that just want to
 767         * know if the ring buffer has been disabled, but it can handle
 768         * races of where it gets disabled but we still do a record.
 769         * As the check is in the fast path of the tracers, it is more
 770         * important to be fast than accurate.
 771         */
 772        tr->buffer_disabled = 0;
 773        /* Make the flag seen by readers */
 774        smp_wmb();
 775}
 776
 777/**
 778 * tracing_on - enable tracing buffers
 779 *
 780 * This function enables tracing buffers that may have been
 781 * disabled with tracing_off.
 782 */
 783void tracing_on(void)
 784{
 785        tracer_tracing_on(&global_trace);
 786}
 787EXPORT_SYMBOL_GPL(tracing_on);
 788
 789
 790static __always_inline void
 791__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
 792{
 793        __this_cpu_write(trace_taskinfo_save, true);
 794
 795        /* If this is the temp buffer, we need to commit fully */
 796        if (this_cpu_read(trace_buffered_event) == event) {
 797                /* Length is in event->array[0] */
 798                ring_buffer_write(buffer, event->array[0], &event->array[1]);
 799                /* Release the temp buffer */
 800                this_cpu_dec(trace_buffered_event_cnt);
 801        } else
 802                ring_buffer_unlock_commit(buffer, event);
 803}
 804
 805/**
 806 * __trace_puts - write a constant string into the trace buffer.
 807 * @ip:    The address of the caller
 808 * @str:   The constant string to write
 809 * @size:  The size of the string.
 810 */
 811int __trace_puts(unsigned long ip, const char *str, int size)
 812{
 813        struct ring_buffer_event *event;
 814        struct ring_buffer *buffer;
 815        struct print_entry *entry;
 816        unsigned long irq_flags;
 817        int alloc;
 818        int pc;
 819
 820        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 821                return 0;
 822
 823        pc = preempt_count();
 824
 825        if (unlikely(tracing_selftest_running || tracing_disabled))
 826                return 0;
 827
 828        alloc = sizeof(*entry) + size + 2; /* possible \n added */
 829
 830        local_save_flags(irq_flags);
 831        buffer = global_trace.trace_buffer.buffer;
 832        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
 833                                            irq_flags, pc);
 834        if (!event)
 835                return 0;
 836
 837        entry = ring_buffer_event_data(event);
 838        entry->ip = ip;
 839
 840        memcpy(&entry->buf, str, size);
 841
 842        /* Add a newline if necessary */
 843        if (entry->buf[size - 1] != '\n') {
 844                entry->buf[size] = '\n';
 845                entry->buf[size + 1] = '\0';
 846        } else
 847                entry->buf[size] = '\0';
 848
 849        __buffer_unlock_commit(buffer, event);
 850        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 851
 852        return size;
 853}
 854EXPORT_SYMBOL_GPL(__trace_puts);
 855
 856/**
 857 * __trace_bputs - write the pointer to a constant string into trace buffer
 858 * @ip:    The address of the caller
 859 * @str:   The constant string to write to the buffer to
 860 */
 861int __trace_bputs(unsigned long ip, const char *str)
 862{
 863        struct ring_buffer_event *event;
 864        struct ring_buffer *buffer;
 865        struct bputs_entry *entry;
 866        unsigned long irq_flags;
 867        int size = sizeof(struct bputs_entry);
 868        int pc;
 869
 870        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 871                return 0;
 872
 873        pc = preempt_count();
 874
 875        if (unlikely(tracing_selftest_running || tracing_disabled))
 876                return 0;
 877
 878        local_save_flags(irq_flags);
 879        buffer = global_trace.trace_buffer.buffer;
 880        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
 881                                            irq_flags, pc);
 882        if (!event)
 883                return 0;
 884
 885        entry = ring_buffer_event_data(event);
 886        entry->ip                       = ip;
 887        entry->str                      = str;
 888
 889        __buffer_unlock_commit(buffer, event);
 890        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 891
 892        return 1;
 893}
 894EXPORT_SYMBOL_GPL(__trace_bputs);
 895
 896#ifdef CONFIG_TRACER_SNAPSHOT
 897void tracing_snapshot_instance(struct trace_array *tr)
 898{
 899        struct tracer *tracer = tr->current_trace;
 900        unsigned long flags;
 901
 902        if (in_nmi()) {
 903                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
 904                internal_trace_puts("*** snapshot is being ignored        ***\n");
 905                return;
 906        }
 907
 908        if (!tr->allocated_snapshot) {
 909                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
 910                internal_trace_puts("*** stopping trace here!   ***\n");
 911                tracing_off();
 912                return;
 913        }
 914
 915        /* Note, snapshot can not be used when the tracer uses it */
 916        if (tracer->use_max_tr) {
 917                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
 918                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
 919                return;
 920        }
 921
 922        local_irq_save(flags);
 923        update_max_tr(tr, current, smp_processor_id());
 924        local_irq_restore(flags);
 925}
 926
 927/**
 928 * tracing_snapshot - take a snapshot of the current buffer.
 929 *
 930 * This causes a swap between the snapshot buffer and the current live
 931 * tracing buffer. You can use this to take snapshots of the live
 932 * trace when some condition is triggered, but continue to trace.
 933 *
 934 * Note, make sure to allocate the snapshot with either
 935 * a tracing_snapshot_alloc(), or by doing it manually
 936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 937 *
 938 * If the snapshot buffer is not allocated, it will stop tracing.
 939 * Basically making a permanent snapshot.
 940 */
 941void tracing_snapshot(void)
 942{
 943        struct trace_array *tr = &global_trace;
 944
 945        tracing_snapshot_instance(tr);
 946}
 947EXPORT_SYMBOL_GPL(tracing_snapshot);
 948
 949static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
 950                                        struct trace_buffer *size_buf, int cpu_id);
 951static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
 952
 953int tracing_alloc_snapshot_instance(struct trace_array *tr)
 954{
 955        int ret;
 956
 957        if (!tr->allocated_snapshot) {
 958
 959                /* allocate spare buffer */
 960                ret = resize_buffer_duplicate_size(&tr->max_buffer,
 961                                   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
 962                if (ret < 0)
 963                        return ret;
 964
 965                tr->allocated_snapshot = true;
 966        }
 967
 968        return 0;
 969}
 970
 971static void free_snapshot(struct trace_array *tr)
 972{
 973        /*
 974         * We don't free the ring buffer. instead, resize it because
 975         * The max_tr ring buffer has some state (e.g. ring->clock) and
 976         * we want preserve it.
 977         */
 978        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
 979        set_buffer_entries(&tr->max_buffer, 1);
 980        tracing_reset_online_cpus(&tr->max_buffer);
 981        tr->allocated_snapshot = false;
 982}
 983
 984/**
 985 * tracing_alloc_snapshot - allocate snapshot buffer.
 986 *
 987 * This only allocates the snapshot buffer if it isn't already
 988 * allocated - it doesn't also take a snapshot.
 989 *
 990 * This is meant to be used in cases where the snapshot buffer needs
 991 * to be set up for events that can't sleep but need to be able to
 992 * trigger a snapshot.
 993 */
 994int tracing_alloc_snapshot(void)
 995{
 996        struct trace_array *tr = &global_trace;
 997        int ret;
 998
 999        ret = tracing_alloc_snapshot_instance(tr);
1000        WARN_ON(ret < 0);
1001
1002        return ret;
1003}
1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006/**
1007 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 *
1009 * This is similar to tracing_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1012 *
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1016 */
1017void tracing_snapshot_alloc(void)
1018{
1019        int ret;
1020
1021        ret = tracing_alloc_snapshot();
1022        if (ret < 0)
1023                return;
1024
1025        tracing_snapshot();
1026}
1027EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028#else
1029void tracing_snapshot(void)
1030{
1031        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032}
1033EXPORT_SYMBOL_GPL(tracing_snapshot);
1034int tracing_alloc_snapshot(void)
1035{
1036        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037        return -ENODEV;
1038}
1039EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040void tracing_snapshot_alloc(void)
1041{
1042        /* Give warning */
1043        tracing_snapshot();
1044}
1045EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046#endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048void tracer_tracing_off(struct trace_array *tr)
1049{
1050        if (tr->trace_buffer.buffer)
1051                ring_buffer_record_off(tr->trace_buffer.buffer);
1052        /*
1053         * This flag is looked at when buffers haven't been allocated
1054         * yet, or by some tracers (like irqsoff), that just want to
1055         * know if the ring buffer has been disabled, but it can handle
1056         * races of where it gets disabled but we still do a record.
1057         * As the check is in the fast path of the tracers, it is more
1058         * important to be fast than accurate.
1059         */
1060        tr->buffer_disabled = 1;
1061        /* Make the flag seen by readers */
1062        smp_wmb();
1063}
1064
1065/**
1066 * tracing_off - turn off tracing buffers
1067 *
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1072 */
1073void tracing_off(void)
1074{
1075        tracer_tracing_off(&global_trace);
1076}
1077EXPORT_SYMBOL_GPL(tracing_off);
1078
1079void disable_trace_on_warning(void)
1080{
1081        if (__disable_trace_on_warning)
1082                tracing_off();
1083}
1084
1085/**
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1088 *
1089 * Shows real state of the ring buffer if it is enabled or not.
1090 */
1091bool tracer_tracing_is_on(struct trace_array *tr)
1092{
1093        if (tr->trace_buffer.buffer)
1094                return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095        return !tr->buffer_disabled;
1096}
1097
1098/**
1099 * tracing_is_on - show state of ring buffers enabled
1100 */
1101int tracing_is_on(void)
1102{
1103        return tracer_tracing_is_on(&global_trace);
1104}
1105EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107static int __init set_buf_size(char *str)
1108{
1109        unsigned long buf_size;
1110
1111        if (!str)
1112                return 0;
1113        buf_size = memparse(str, &str);
1114        /* nr_entries can not be zero */
1115        if (buf_size == 0)
1116                return 0;
1117        trace_buf_size = buf_size;
1118        return 1;
1119}
1120__setup("trace_buf_size=", set_buf_size);
1121
1122static int __init set_tracing_thresh(char *str)
1123{
1124        unsigned long threshold;
1125        int ret;
1126
1127        if (!str)
1128                return 0;
1129        ret = kstrtoul(str, 0, &threshold);
1130        if (ret < 0)
1131                return 0;
1132        tracing_thresh = threshold * 1000;
1133        return 1;
1134}
1135__setup("tracing_thresh=", set_tracing_thresh);
1136
1137unsigned long nsecs_to_usecs(unsigned long nsecs)
1138{
1139        return nsecs / 1000;
1140}
1141
1142/*
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the evals (enum) were defined.
1147 */
1148#undef C
1149#define C(a, b) b
1150
1151/* These must match the bit postions in trace_iterator_flags */
1152static const char *trace_options[] = {
1153        TRACE_FLAGS
1154        NULL
1155};
1156
1157static struct {
1158        u64 (*func)(void);
1159        const char *name;
1160        int in_ns;              /* is this clock in nanoseconds? */
1161} trace_clocks[] = {
1162        { trace_clock_local,            "local",        1 },
1163        { trace_clock_global,           "global",       1 },
1164        { trace_clock_counter,          "counter",      0 },
1165        { trace_clock_jiffies,          "uptime",       0 },
1166        { trace_clock,                  "perf",         1 },
1167        { ktime_get_mono_fast_ns,       "mono",         1 },
1168        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169        { ktime_get_boot_fast_ns,       "boot",         1 },
1170        ARCH_TRACE_CLOCKS
1171};
1172
1173bool trace_clock_in_ns(struct trace_array *tr)
1174{
1175        if (trace_clocks[tr->clock_id].in_ns)
1176                return true;
1177
1178        return false;
1179}
1180
1181/*
1182 * trace_parser_get_init - gets the buffer for trace parser
1183 */
1184int trace_parser_get_init(struct trace_parser *parser, int size)
1185{
1186        memset(parser, 0, sizeof(*parser));
1187
1188        parser->buffer = kmalloc(size, GFP_KERNEL);
1189        if (!parser->buffer)
1190                return 1;
1191
1192        parser->size = size;
1193        return 0;
1194}
1195
1196/*
1197 * trace_parser_put - frees the buffer for trace parser
1198 */
1199void trace_parser_put(struct trace_parser *parser)
1200{
1201        kfree(parser->buffer);
1202        parser->buffer = NULL;
1203}
1204
1205/*
1206 * trace_get_user - reads the user input string separated by  space
1207 * (matched by isspace(ch))
1208 *
1209 * For each string found the 'struct trace_parser' is updated,
1210 * and the function returns.
1211 *
1212 * Returns number of bytes read.
1213 *
1214 * See kernel/trace/trace.h for 'struct trace_parser' details.
1215 */
1216int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1217        size_t cnt, loff_t *ppos)
1218{
1219        char ch;
1220        size_t read = 0;
1221        ssize_t ret;
1222
1223        if (!*ppos)
1224                trace_parser_clear(parser);
1225
1226        ret = get_user(ch, ubuf++);
1227        if (ret)
1228                goto out;
1229
1230        read++;
1231        cnt--;
1232
1233        /*
1234         * The parser is not finished with the last write,
1235         * continue reading the user input without skipping spaces.
1236         */
1237        if (!parser->cont) {
1238                /* skip white space */
1239                while (cnt && isspace(ch)) {
1240                        ret = get_user(ch, ubuf++);
1241                        if (ret)
1242                                goto out;
1243                        read++;
1244                        cnt--;
1245                }
1246
1247                parser->idx = 0;
1248
1249                /* only spaces were written */
1250                if (isspace(ch) || !ch) {
1251                        *ppos += read;
1252                        ret = read;
1253                        goto out;
1254                }
1255        }
1256
1257        /* read the non-space input */
1258        while (cnt && !isspace(ch) && ch) {
1259                if (parser->idx < parser->size - 1)
1260                        parser->buffer[parser->idx++] = ch;
1261                else {
1262                        ret = -EINVAL;
1263                        goto out;
1264                }
1265                ret = get_user(ch, ubuf++);
1266                if (ret)
1267                        goto out;
1268                read++;
1269                cnt--;
1270        }
1271
1272        /* We either got finished input or we have to wait for another call. */
1273        if (isspace(ch) || !ch) {
1274                parser->buffer[parser->idx] = 0;
1275                parser->cont = false;
1276        } else if (parser->idx < parser->size - 1) {
1277                parser->cont = true;
1278                parser->buffer[parser->idx++] = ch;
1279                /* Make sure the parsed string always terminates with '\0'. */
1280                parser->buffer[parser->idx] = 0;
1281        } else {
1282                ret = -EINVAL;
1283                goto out;
1284        }
1285
1286        *ppos += read;
1287        ret = read;
1288
1289out:
1290        return ret;
1291}
1292
1293/* TODO add a seq_buf_to_buffer() */
1294static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1295{
1296        int len;
1297
1298        if (trace_seq_used(s) <= s->seq.readpos)
1299                return -EBUSY;
1300
1301        len = trace_seq_used(s) - s->seq.readpos;
1302        if (cnt > len)
1303                cnt = len;
1304        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1305
1306        s->seq.readpos += cnt;
1307        return cnt;
1308}
1309
1310unsigned long __read_mostly     tracing_thresh;
1311
1312#ifdef CONFIG_TRACER_MAX_TRACE
1313/*
1314 * Copy the new maximum trace into the separate maximum-trace
1315 * structure. (this way the maximum trace is permanently saved,
1316 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1317 */
1318static void
1319__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1320{
1321        struct trace_buffer *trace_buf = &tr->trace_buffer;
1322        struct trace_buffer *max_buf = &tr->max_buffer;
1323        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1324        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1325
1326        max_buf->cpu = cpu;
1327        max_buf->time_start = data->preempt_timestamp;
1328
1329        max_data->saved_latency = tr->max_latency;
1330        max_data->critical_start = data->critical_start;
1331        max_data->critical_end = data->critical_end;
1332
1333        memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1334        max_data->pid = tsk->pid;
1335        /*
1336         * If tsk == current, then use current_uid(), as that does not use
1337         * RCU. The irq tracer can be called out of RCU scope.
1338         */
1339        if (tsk == current)
1340                max_data->uid = current_uid();
1341        else
1342                max_data->uid = task_uid(tsk);
1343
1344        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1345        max_data->policy = tsk->policy;
1346        max_data->rt_priority = tsk->rt_priority;
1347
1348        /* record this tasks comm */
1349        tracing_record_cmdline(tsk);
1350}
1351
1352/**
1353 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1354 * @tr: tracer
1355 * @tsk: the task with the latency
1356 * @cpu: The cpu that initiated the trace.
1357 *
1358 * Flip the buffers between the @tr and the max_tr and record information
1359 * about which task was the cause of this latency.
1360 */
1361void
1362update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1363{
1364        if (tr->stop_count)
1365                return;
1366
1367        WARN_ON_ONCE(!irqs_disabled());
1368
1369        if (!tr->allocated_snapshot) {
1370                /* Only the nop tracer should hit this when disabling */
1371                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1372                return;
1373        }
1374
1375        arch_spin_lock(&tr->max_lock);
1376
1377        /* Inherit the recordable setting from trace_buffer */
1378        if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1379                ring_buffer_record_on(tr->max_buffer.buffer);
1380        else
1381                ring_buffer_record_off(tr->max_buffer.buffer);
1382
1383        swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1384
1385        __update_max_tr(tr, tsk, cpu);
1386        arch_spin_unlock(&tr->max_lock);
1387}
1388
1389/**
1390 * update_max_tr_single - only copy one trace over, and reset the rest
1391 * @tr - tracer
1392 * @tsk - task with the latency
1393 * @cpu - the cpu of the buffer to copy.
1394 *
1395 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1396 */
1397void
1398update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1399{
1400        int ret;
1401
1402        if (tr->stop_count)
1403                return;
1404
1405        WARN_ON_ONCE(!irqs_disabled());
1406        if (!tr->allocated_snapshot) {
1407                /* Only the nop tracer should hit this when disabling */
1408                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1409                return;
1410        }
1411
1412        arch_spin_lock(&tr->max_lock);
1413
1414        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1415
1416        if (ret == -EBUSY) {
1417                /*
1418                 * We failed to swap the buffer due to a commit taking
1419                 * place on this CPU. We fail to record, but we reset
1420                 * the max trace buffer (no one writes directly to it)
1421                 * and flag that it failed.
1422                 */
1423                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1424                        "Failed to swap buffers due to commit in progress\n");
1425        }
1426
1427        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1428
1429        __update_max_tr(tr, tsk, cpu);
1430        arch_spin_unlock(&tr->max_lock);
1431}
1432#endif /* CONFIG_TRACER_MAX_TRACE */
1433
1434static int wait_on_pipe(struct trace_iterator *iter, bool full)
1435{
1436        /* Iterators are static, they should be filled or empty */
1437        if (trace_buffer_iter(iter, iter->cpu_file))
1438                return 0;
1439
1440        return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1441                                full);
1442}
1443
1444#ifdef CONFIG_FTRACE_STARTUP_TEST
1445static bool selftests_can_run;
1446
1447struct trace_selftests {
1448        struct list_head                list;
1449        struct tracer                   *type;
1450};
1451
1452static LIST_HEAD(postponed_selftests);
1453
1454static int save_selftest(struct tracer *type)
1455{
1456        struct trace_selftests *selftest;
1457
1458        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1459        if (!selftest)
1460                return -ENOMEM;
1461
1462        selftest->type = type;
1463        list_add(&selftest->list, &postponed_selftests);
1464        return 0;
1465}
1466
1467static int run_tracer_selftest(struct tracer *type)
1468{
1469        struct trace_array *tr = &global_trace;
1470        struct tracer *saved_tracer = tr->current_trace;
1471        int ret;
1472
1473        if (!type->selftest || tracing_selftest_disabled)
1474                return 0;
1475
1476        /*
1477         * If a tracer registers early in boot up (before scheduling is
1478         * initialized and such), then do not run its selftests yet.
1479         * Instead, run it a little later in the boot process.
1480         */
1481        if (!selftests_can_run)
1482                return save_selftest(type);
1483
1484        /*
1485         * Run a selftest on this tracer.
1486         * Here we reset the trace buffer, and set the current
1487         * tracer to be this tracer. The tracer can then run some
1488         * internal tracing to verify that everything is in order.
1489         * If we fail, we do not register this tracer.
1490         */
1491        tracing_reset_online_cpus(&tr->trace_buffer);
1492
1493        tr->current_trace = type;
1494
1495#ifdef CONFIG_TRACER_MAX_TRACE
1496        if (type->use_max_tr) {
1497                /* If we expanded the buffers, make sure the max is expanded too */
1498                if (ring_buffer_expanded)
1499                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1500                                           RING_BUFFER_ALL_CPUS);
1501                tr->allocated_snapshot = true;
1502        }
1503#endif
1504
1505        /* the test is responsible for initializing and enabling */
1506        pr_info("Testing tracer %s: ", type->name);
1507        ret = type->selftest(type, tr);
1508        /* the test is responsible for resetting too */
1509        tr->current_trace = saved_tracer;
1510        if (ret) {
1511                printk(KERN_CONT "FAILED!\n");
1512                /* Add the warning after printing 'FAILED' */
1513                WARN_ON(1);
1514                return -1;
1515        }
1516        /* Only reset on passing, to avoid touching corrupted buffers */
1517        tracing_reset_online_cpus(&tr->trace_buffer);
1518
1519#ifdef CONFIG_TRACER_MAX_TRACE
1520        if (type->use_max_tr) {
1521                tr->allocated_snapshot = false;
1522
1523                /* Shrink the max buffer again */
1524                if (ring_buffer_expanded)
1525                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1526                                           RING_BUFFER_ALL_CPUS);
1527        }
1528#endif
1529
1530        printk(KERN_CONT "PASSED\n");
1531        return 0;
1532}
1533
1534static __init int init_trace_selftests(void)
1535{
1536        struct trace_selftests *p, *n;
1537        struct tracer *t, **last;
1538        int ret;
1539
1540        selftests_can_run = true;
1541
1542        mutex_lock(&trace_types_lock);
1543
1544        if (list_empty(&postponed_selftests))
1545                goto out;
1546
1547        pr_info("Running postponed tracer tests:\n");
1548
1549        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1550                ret = run_tracer_selftest(p->type);
1551                /* If the test fails, then warn and remove from available_tracers */
1552                if (ret < 0) {
1553                        WARN(1, "tracer: %s failed selftest, disabling\n",
1554                             p->type->name);
1555                        last = &trace_types;
1556                        for (t = trace_types; t; t = t->next) {
1557                                if (t == p->type) {
1558                                        *last = t->next;
1559                                        break;
1560                                }
1561                                last = &t->next;
1562                        }
1563                }
1564                list_del(&p->list);
1565                kfree(p);
1566        }
1567
1568 out:
1569        mutex_unlock(&trace_types_lock);
1570
1571        return 0;
1572}
1573core_initcall(init_trace_selftests);
1574#else
1575static inline int run_tracer_selftest(struct tracer *type)
1576{
1577        return 0;
1578}
1579#endif /* CONFIG_FTRACE_STARTUP_TEST */
1580
1581static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1582
1583static void __init apply_trace_boot_options(void);
1584
1585/**
1586 * register_tracer - register a tracer with the ftrace system.
1587 * @type - the plugin for the tracer
1588 *
1589 * Register a new plugin tracer.
1590 */
1591int __init register_tracer(struct tracer *type)
1592{
1593        struct tracer *t;
1594        int ret = 0;
1595
1596        if (!type->name) {
1597                pr_info("Tracer must have a name\n");
1598                return -1;
1599        }
1600
1601        if (strlen(type->name) >= MAX_TRACER_SIZE) {
1602                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1603                return -1;
1604        }
1605
1606        mutex_lock(&trace_types_lock);
1607
1608        tracing_selftest_running = true;
1609
1610        for (t = trace_types; t; t = t->next) {
1611                if (strcmp(type->name, t->name) == 0) {
1612                        /* already found */
1613                        pr_info("Tracer %s already registered\n",
1614                                type->name);
1615                        ret = -1;
1616                        goto out;
1617                }
1618        }
1619
1620        if (!type->set_flag)
1621                type->set_flag = &dummy_set_flag;
1622        if (!type->flags) {
1623                /*allocate a dummy tracer_flags*/
1624                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1625                if (!type->flags) {
1626                        ret = -ENOMEM;
1627                        goto out;
1628                }
1629                type->flags->val = 0;
1630                type->flags->opts = dummy_tracer_opt;
1631        } else
1632                if (!type->flags->opts)
1633                        type->flags->opts = dummy_tracer_opt;
1634
1635        /* store the tracer for __set_tracer_option */
1636        type->flags->trace = type;
1637
1638        ret = run_tracer_selftest(type);
1639        if (ret < 0)
1640                goto out;
1641
1642        type->next = trace_types;
1643        trace_types = type;
1644        add_tracer_options(&global_trace, type);
1645
1646 out:
1647        tracing_selftest_running = false;
1648        mutex_unlock(&trace_types_lock);
1649
1650        if (ret || !default_bootup_tracer)
1651                goto out_unlock;
1652
1653        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1654                goto out_unlock;
1655
1656        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1657        /* Do we want this tracer to start on bootup? */
1658        tracing_set_tracer(&global_trace, type->name);
1659        default_bootup_tracer = NULL;
1660
1661        apply_trace_boot_options();
1662
1663        /* disable other selftests, since this will break it. */
1664        tracing_selftest_disabled = true;
1665#ifdef CONFIG_FTRACE_STARTUP_TEST
1666        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1667               type->name);
1668#endif
1669
1670 out_unlock:
1671        return ret;
1672}
1673
1674void tracing_reset(struct trace_buffer *buf, int cpu)
1675{
1676        struct ring_buffer *buffer = buf->buffer;
1677
1678        if (!buffer)
1679                return;
1680
1681        ring_buffer_record_disable(buffer);
1682
1683        /* Make sure all commits have finished */
1684        synchronize_sched();
1685        ring_buffer_reset_cpu(buffer, cpu);
1686
1687        ring_buffer_record_enable(buffer);
1688}
1689
1690void tracing_reset_online_cpus(struct trace_buffer *buf)
1691{
1692        struct ring_buffer *buffer = buf->buffer;
1693        int cpu;
1694
1695        if (!buffer)
1696                return;
1697
1698        ring_buffer_record_disable(buffer);
1699
1700        /* Make sure all commits have finished */
1701        synchronize_sched();
1702
1703        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1704
1705        for_each_online_cpu(cpu)
1706                ring_buffer_reset_cpu(buffer, cpu);
1707
1708        ring_buffer_record_enable(buffer);
1709}
1710
1711/* Must have trace_types_lock held */
1712void tracing_reset_all_online_cpus(void)
1713{
1714        struct trace_array *tr;
1715
1716        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1717                if (!tr->clear_trace)
1718                        continue;
1719                tr->clear_trace = false;
1720                tracing_reset_online_cpus(&tr->trace_buffer);
1721#ifdef CONFIG_TRACER_MAX_TRACE
1722                tracing_reset_online_cpus(&tr->max_buffer);
1723#endif
1724        }
1725}
1726
1727static int *tgid_map;
1728
1729#define SAVED_CMDLINES_DEFAULT 128
1730#define NO_CMDLINE_MAP UINT_MAX
1731static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1732struct saved_cmdlines_buffer {
1733        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1734        unsigned *map_cmdline_to_pid;
1735        unsigned cmdline_num;
1736        int cmdline_idx;
1737        char *saved_cmdlines;
1738};
1739static struct saved_cmdlines_buffer *savedcmd;
1740
1741/* temporary disable recording */
1742static atomic_t trace_record_taskinfo_disabled __read_mostly;
1743
1744static inline char *get_saved_cmdlines(int idx)
1745{
1746        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1747}
1748
1749static inline void set_cmdline(int idx, const char *cmdline)
1750{
1751        memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1752}
1753
1754static int allocate_cmdlines_buffer(unsigned int val,
1755                                    struct saved_cmdlines_buffer *s)
1756{
1757        s->map_cmdline_to_pid = kmalloc_array(val,
1758                                              sizeof(*s->map_cmdline_to_pid),
1759                                              GFP_KERNEL);
1760        if (!s->map_cmdline_to_pid)
1761                return -ENOMEM;
1762
1763        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1764        if (!s->saved_cmdlines) {
1765                kfree(s->map_cmdline_to_pid);
1766                return -ENOMEM;
1767        }
1768
1769        s->cmdline_idx = 0;
1770        s->cmdline_num = val;
1771        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1772               sizeof(s->map_pid_to_cmdline));
1773        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1774               val * sizeof(*s->map_cmdline_to_pid));
1775
1776        return 0;
1777}
1778
1779static int trace_create_savedcmd(void)
1780{
1781        int ret;
1782
1783        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1784        if (!savedcmd)
1785                return -ENOMEM;
1786
1787        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1788        if (ret < 0) {
1789                kfree(savedcmd);
1790                savedcmd = NULL;
1791                return -ENOMEM;
1792        }
1793
1794        return 0;
1795}
1796
1797int is_tracing_stopped(void)
1798{
1799        return global_trace.stop_count;
1800}
1801
1802/**
1803 * tracing_start - quick start of the tracer
1804 *
1805 * If tracing is enabled but was stopped by tracing_stop,
1806 * this will start the tracer back up.
1807 */
1808void tracing_start(void)
1809{
1810        struct ring_buffer *buffer;
1811        unsigned long flags;
1812
1813        if (tracing_disabled)
1814                return;
1815
1816        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1817        if (--global_trace.stop_count) {
1818                if (global_trace.stop_count < 0) {
1819                        /* Someone screwed up their debugging */
1820                        WARN_ON_ONCE(1);
1821                        global_trace.stop_count = 0;
1822                }
1823                goto out;
1824        }
1825
1826        /* Prevent the buffers from switching */
1827        arch_spin_lock(&global_trace.max_lock);
1828
1829        buffer = global_trace.trace_buffer.buffer;
1830        if (buffer)
1831                ring_buffer_record_enable(buffer);
1832
1833#ifdef CONFIG_TRACER_MAX_TRACE
1834        buffer = global_trace.max_buffer.buffer;
1835        if (buffer)
1836                ring_buffer_record_enable(buffer);
1837#endif
1838
1839        arch_spin_unlock(&global_trace.max_lock);
1840
1841 out:
1842        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1843}
1844
1845static void tracing_start_tr(struct trace_array *tr)
1846{
1847        struct ring_buffer *buffer;
1848        unsigned long flags;
1849
1850        if (tracing_disabled)
1851                return;
1852
1853        /* If global, we need to also start the max tracer */
1854        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1855                return tracing_start();
1856
1857        raw_spin_lock_irqsave(&tr->start_lock, flags);
1858
1859        if (--tr->stop_count) {
1860                if (tr->stop_count < 0) {
1861                        /* Someone screwed up their debugging */
1862                        WARN_ON_ONCE(1);
1863                        tr->stop_count = 0;
1864                }
1865                goto out;
1866        }
1867
1868        buffer = tr->trace_buffer.buffer;
1869        if (buffer)
1870                ring_buffer_record_enable(buffer);
1871
1872 out:
1873        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1874}
1875
1876/**
1877 * tracing_stop - quick stop of the tracer
1878 *
1879 * Light weight way to stop tracing. Use in conjunction with
1880 * tracing_start.
1881 */
1882void tracing_stop(void)
1883{
1884        struct ring_buffer *buffer;
1885        unsigned long flags;
1886
1887        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1888        if (global_trace.stop_count++)
1889                goto out;
1890
1891        /* Prevent the buffers from switching */
1892        arch_spin_lock(&global_trace.max_lock);
1893
1894        buffer = global_trace.trace_buffer.buffer;
1895        if (buffer)
1896                ring_buffer_record_disable(buffer);
1897
1898#ifdef CONFIG_TRACER_MAX_TRACE
1899        buffer = global_trace.max_buffer.buffer;
1900        if (buffer)
1901                ring_buffer_record_disable(buffer);
1902#endif
1903
1904        arch_spin_unlock(&global_trace.max_lock);
1905
1906 out:
1907        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1908}
1909
1910static void tracing_stop_tr(struct trace_array *tr)
1911{
1912        struct ring_buffer *buffer;
1913        unsigned long flags;
1914
1915        /* If global, we need to also stop the max tracer */
1916        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1917                return tracing_stop();
1918
1919        raw_spin_lock_irqsave(&tr->start_lock, flags);
1920        if (tr->stop_count++)
1921                goto out;
1922
1923        buffer = tr->trace_buffer.buffer;
1924        if (buffer)
1925                ring_buffer_record_disable(buffer);
1926
1927 out:
1928        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1929}
1930
1931static int trace_save_cmdline(struct task_struct *tsk)
1932{
1933        unsigned pid, idx;
1934
1935        /* treat recording of idle task as a success */
1936        if (!tsk->pid)
1937                return 1;
1938
1939        if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1940                return 0;
1941
1942        /*
1943         * It's not the end of the world if we don't get
1944         * the lock, but we also don't want to spin
1945         * nor do we want to disable interrupts,
1946         * so if we miss here, then better luck next time.
1947         */
1948        if (!arch_spin_trylock(&trace_cmdline_lock))
1949                return 0;
1950
1951        idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1952        if (idx == NO_CMDLINE_MAP) {
1953                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1954
1955                /*
1956                 * Check whether the cmdline buffer at idx has a pid
1957                 * mapped. We are going to overwrite that entry so we
1958                 * need to clear the map_pid_to_cmdline. Otherwise we
1959                 * would read the new comm for the old pid.
1960                 */
1961                pid = savedcmd->map_cmdline_to_pid[idx];
1962                if (pid != NO_CMDLINE_MAP)
1963                        savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1964
1965                savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1966                savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1967
1968                savedcmd->cmdline_idx = idx;
1969        }
1970
1971        set_cmdline(idx, tsk->comm);
1972
1973        arch_spin_unlock(&trace_cmdline_lock);
1974
1975        return 1;
1976}
1977
1978static void __trace_find_cmdline(int pid, char comm[])
1979{
1980        unsigned map;
1981
1982        if (!pid) {
1983                strcpy(comm, "<idle>");
1984                return;
1985        }
1986
1987        if (WARN_ON_ONCE(pid < 0)) {
1988                strcpy(comm, "<XXX>");
1989                return;
1990        }
1991
1992        if (pid > PID_MAX_DEFAULT) {
1993                strcpy(comm, "<...>");
1994                return;
1995        }
1996
1997        map = savedcmd->map_pid_to_cmdline[pid];
1998        if (map != NO_CMDLINE_MAP)
1999                strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2000        else
2001                strcpy(comm, "<...>");
2002}
2003
2004void trace_find_cmdline(int pid, char comm[])
2005{
2006        preempt_disable();
2007        arch_spin_lock(&trace_cmdline_lock);
2008
2009        __trace_find_cmdline(pid, comm);
2010
2011        arch_spin_unlock(&trace_cmdline_lock);
2012        preempt_enable();
2013}
2014
2015int trace_find_tgid(int pid)
2016{
2017        if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2018                return 0;
2019
2020        return tgid_map[pid];
2021}
2022
2023static int trace_save_tgid(struct task_struct *tsk)
2024{
2025        /* treat recording of idle task as a success */
2026        if (!tsk->pid)
2027                return 1;
2028
2029        if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2030                return 0;
2031
2032        tgid_map[tsk->pid] = tsk->tgid;
2033        return 1;
2034}
2035
2036static bool tracing_record_taskinfo_skip(int flags)
2037{
2038        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2039                return true;
2040        if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2041                return true;
2042        if (!__this_cpu_read(trace_taskinfo_save))
2043                return true;
2044        return false;
2045}
2046
2047/**
2048 * tracing_record_taskinfo - record the task info of a task
2049 *
2050 * @task  - task to record
2051 * @flags - TRACE_RECORD_CMDLINE for recording comm
2052 *        - TRACE_RECORD_TGID for recording tgid
2053 */
2054void tracing_record_taskinfo(struct task_struct *task, int flags)
2055{
2056        bool done;
2057
2058        if (tracing_record_taskinfo_skip(flags))
2059                return;
2060
2061        /*
2062         * Record as much task information as possible. If some fail, continue
2063         * to try to record the others.
2064         */
2065        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2066        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2067
2068        /* If recording any information failed, retry again soon. */
2069        if (!done)
2070                return;
2071
2072        __this_cpu_write(trace_taskinfo_save, false);
2073}
2074
2075/**
2076 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2077 *
2078 * @prev - previous task during sched_switch
2079 * @next - next task during sched_switch
2080 * @flags - TRACE_RECORD_CMDLINE for recording comm
2081 *          TRACE_RECORD_TGID for recording tgid
2082 */
2083void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2084                                          struct task_struct *next, int flags)
2085{
2086        bool done;
2087
2088        if (tracing_record_taskinfo_skip(flags))
2089                return;
2090
2091        /*
2092         * Record as much task information as possible. If some fail, continue
2093         * to try to record the others.
2094         */
2095        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2096        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2097        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2098        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2099
2100        /* If recording any information failed, retry again soon. */
2101        if (!done)
2102                return;
2103
2104        __this_cpu_write(trace_taskinfo_save, false);
2105}
2106
2107/* Helpers to record a specific task information */
2108void tracing_record_cmdline(struct task_struct *task)
2109{
2110        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2111}
2112
2113void tracing_record_tgid(struct task_struct *task)
2114{
2115        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2116}
2117
2118/*
2119 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2120 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2121 * simplifies those functions and keeps them in sync.
2122 */
2123enum print_line_t trace_handle_return(struct trace_seq *s)
2124{
2125        return trace_seq_has_overflowed(s) ?
2126                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2127}
2128EXPORT_SYMBOL_GPL(trace_handle_return);
2129
2130void
2131tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2132                             int pc)
2133{
2134        struct task_struct *tsk = current;
2135
2136        entry->preempt_count            = pc & 0xff;
2137        entry->pid                      = (tsk) ? tsk->pid : 0;
2138        entry->flags =
2139#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2140                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2141#else
2142                TRACE_FLAG_IRQS_NOSUPPORT |
2143#endif
2144                ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2145                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2146                ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2147                (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2148                (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2149}
2150EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2151
2152struct ring_buffer_event *
2153trace_buffer_lock_reserve(struct ring_buffer *buffer,
2154                          int type,
2155                          unsigned long len,
2156                          unsigned long flags, int pc)
2157{
2158        return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2159}
2160
2161DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2162DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2163static int trace_buffered_event_ref;
2164
2165/**
2166 * trace_buffered_event_enable - enable buffering events
2167 *
2168 * When events are being filtered, it is quicker to use a temporary
2169 * buffer to write the event data into if there's a likely chance
2170 * that it will not be committed. The discard of the ring buffer
2171 * is not as fast as committing, and is much slower than copying
2172 * a commit.
2173 *
2174 * When an event is to be filtered, allocate per cpu buffers to
2175 * write the event data into, and if the event is filtered and discarded
2176 * it is simply dropped, otherwise, the entire data is to be committed
2177 * in one shot.
2178 */
2179void trace_buffered_event_enable(void)
2180{
2181        struct ring_buffer_event *event;
2182        struct page *page;
2183        int cpu;
2184
2185        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2186
2187        if (trace_buffered_event_ref++)
2188                return;
2189
2190        for_each_tracing_cpu(cpu) {
2191                page = alloc_pages_node(cpu_to_node(cpu),
2192                                        GFP_KERNEL | __GFP_NORETRY, 0);
2193                if (!page)
2194                        goto failed;
2195
2196                event = page_address(page);
2197                memset(event, 0, sizeof(*event));
2198
2199                per_cpu(trace_buffered_event, cpu) = event;
2200
2201                preempt_disable();
2202                if (cpu == smp_processor_id() &&
2203                    this_cpu_read(trace_buffered_event) !=
2204                    per_cpu(trace_buffered_event, cpu))
2205                        WARN_ON_ONCE(1);
2206                preempt_enable();
2207        }
2208
2209        return;
2210 failed:
2211        trace_buffered_event_disable();
2212}
2213
2214static void enable_trace_buffered_event(void *data)
2215{
2216        /* Probably not needed, but do it anyway */
2217        smp_rmb();
2218        this_cpu_dec(trace_buffered_event_cnt);
2219}
2220
2221static void disable_trace_buffered_event(void *data)
2222{
2223        this_cpu_inc(trace_buffered_event_cnt);
2224}
2225
2226/**
2227 * trace_buffered_event_disable - disable buffering events
2228 *
2229 * When a filter is removed, it is faster to not use the buffered
2230 * events, and to commit directly into the ring buffer. Free up
2231 * the temp buffers when there are no more users. This requires
2232 * special synchronization with current events.
2233 */
2234void trace_buffered_event_disable(void)
2235{
2236        int cpu;
2237
2238        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2239
2240        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2241                return;
2242
2243        if (--trace_buffered_event_ref)
2244                return;
2245
2246        preempt_disable();
2247        /* For each CPU, set the buffer as used. */
2248        smp_call_function_many(tracing_buffer_mask,
2249                               disable_trace_buffered_event, NULL, 1);
2250        preempt_enable();
2251
2252        /* Wait for all current users to finish */
2253        synchronize_sched();
2254
2255        for_each_tracing_cpu(cpu) {
2256                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2257                per_cpu(trace_buffered_event, cpu) = NULL;
2258        }
2259        /*
2260         * Make sure trace_buffered_event is NULL before clearing
2261         * trace_buffered_event_cnt.
2262         */
2263        smp_wmb();
2264
2265        preempt_disable();
2266        /* Do the work on each cpu */
2267        smp_call_function_many(tracing_buffer_mask,
2268                               enable_trace_buffered_event, NULL, 1);
2269        preempt_enable();
2270}
2271
2272static struct ring_buffer *temp_buffer;
2273
2274struct ring_buffer_event *
2275trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2276                          struct trace_event_file *trace_file,
2277                          int type, unsigned long len,
2278                          unsigned long flags, int pc)
2279{
2280        struct ring_buffer_event *entry;
2281        int val;
2282
2283        *current_rb = trace_file->tr->trace_buffer.buffer;
2284
2285        if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2286             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2287            (entry = this_cpu_read(trace_buffered_event))) {
2288                /* Try to use the per cpu buffer first */
2289                val = this_cpu_inc_return(trace_buffered_event_cnt);
2290                if (val == 1) {
2291                        trace_event_setup(entry, type, flags, pc);
2292                        entry->array[0] = len;
2293                        return entry;
2294                }
2295                this_cpu_dec(trace_buffered_event_cnt);
2296        }
2297
2298        entry = __trace_buffer_lock_reserve(*current_rb,
2299                                            type, len, flags, pc);
2300        /*
2301         * If tracing is off, but we have triggers enabled
2302         * we still need to look at the event data. Use the temp_buffer
2303         * to store the trace event for the tigger to use. It's recusive
2304         * safe and will not be recorded anywhere.
2305         */
2306        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2307                *current_rb = temp_buffer;
2308                entry = __trace_buffer_lock_reserve(*current_rb,
2309                                                    type, len, flags, pc);
2310        }
2311        return entry;
2312}
2313EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2314
2315static DEFINE_SPINLOCK(tracepoint_iter_lock);
2316static DEFINE_MUTEX(tracepoint_printk_mutex);
2317
2318static void output_printk(struct trace_event_buffer *fbuffer)
2319{
2320        struct trace_event_call *event_call;
2321        struct trace_event *event;
2322        unsigned long flags;
2323        struct trace_iterator *iter = tracepoint_print_iter;
2324
2325        /* We should never get here if iter is NULL */
2326        if (WARN_ON_ONCE(!iter))
2327                return;
2328
2329        event_call = fbuffer->trace_file->event_call;
2330        if (!event_call || !event_call->event.funcs ||
2331            !event_call->event.funcs->trace)
2332                return;
2333
2334        event = &fbuffer->trace_file->event_call->event;
2335
2336        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2337        trace_seq_init(&iter->seq);
2338        iter->ent = fbuffer->entry;
2339        event_call->event.funcs->trace(iter, 0, event);
2340        trace_seq_putc(&iter->seq, 0);
2341        printk("%s", iter->seq.buffer);
2342
2343        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2344}
2345
2346int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2347                             void __user *buffer, size_t *lenp,
2348                             loff_t *ppos)
2349{
2350        int save_tracepoint_printk;
2351        int ret;
2352
2353        mutex_lock(&tracepoint_printk_mutex);
2354        save_tracepoint_printk = tracepoint_printk;
2355
2356        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2357
2358        /*
2359         * This will force exiting early, as tracepoint_printk
2360         * is always zero when tracepoint_printk_iter is not allocated
2361         */
2362        if (!tracepoint_print_iter)
2363                tracepoint_printk = 0;
2364
2365        if (save_tracepoint_printk == tracepoint_printk)
2366                goto out;
2367
2368        if (tracepoint_printk)
2369                static_key_enable(&tracepoint_printk_key.key);
2370        else
2371                static_key_disable(&tracepoint_printk_key.key);
2372
2373 out:
2374        mutex_unlock(&tracepoint_printk_mutex);
2375
2376        return ret;
2377}
2378
2379void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2380{
2381        if (static_key_false(&tracepoint_printk_key.key))
2382                output_printk(fbuffer);
2383
2384        event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2385                                    fbuffer->event, fbuffer->entry,
2386                                    fbuffer->flags, fbuffer->pc);
2387}
2388EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2389
2390/*
2391 * Skip 3:
2392 *
2393 *   trace_buffer_unlock_commit_regs()
2394 *   trace_event_buffer_commit()
2395 *   trace_event_raw_event_xxx()
2396 */
2397# define STACK_SKIP 3
2398
2399void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2400                                     struct ring_buffer *buffer,
2401                                     struct ring_buffer_event *event,
2402                                     unsigned long flags, int pc,
2403                                     struct pt_regs *regs)
2404{
2405        __buffer_unlock_commit(buffer, event);
2406
2407        /*
2408         * If regs is not set, then skip the necessary functions.
2409         * Note, we can still get here via blktrace, wakeup tracer
2410         * and mmiotrace, but that's ok if they lose a function or
2411         * two. They are not that meaningful.
2412         */
2413        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2414        ftrace_trace_userstack(buffer, flags, pc);
2415}
2416
2417/*
2418 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2419 */
2420void
2421trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2422                                   struct ring_buffer_event *event)
2423{
2424        __buffer_unlock_commit(buffer, event);
2425}
2426
2427static void
2428trace_process_export(struct trace_export *export,
2429               struct ring_buffer_event *event)
2430{
2431        struct trace_entry *entry;
2432        unsigned int size = 0;
2433
2434        entry = ring_buffer_event_data(event);
2435        size = ring_buffer_event_length(event);
2436        export->write(export, entry, size);
2437}
2438
2439static DEFINE_MUTEX(ftrace_export_lock);
2440
2441static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2442
2443static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2444
2445static inline void ftrace_exports_enable(void)
2446{
2447        static_branch_enable(&ftrace_exports_enabled);
2448}
2449
2450static inline void ftrace_exports_disable(void)
2451{
2452        static_branch_disable(&ftrace_exports_enabled);
2453}
2454
2455void ftrace_exports(struct ring_buffer_event *event)
2456{
2457        struct trace_export *export;
2458
2459        preempt_disable_notrace();
2460
2461        export = rcu_dereference_raw_notrace(ftrace_exports_list);
2462        while (export) {
2463                trace_process_export(export, event);
2464                export = rcu_dereference_raw_notrace(export->next);
2465        }
2466
2467        preempt_enable_notrace();
2468}
2469
2470static inline void
2471add_trace_export(struct trace_export **list, struct trace_export *export)
2472{
2473        rcu_assign_pointer(export->next, *list);
2474        /*
2475         * We are entering export into the list but another
2476         * CPU might be walking that list. We need to make sure
2477         * the export->next pointer is valid before another CPU sees
2478         * the export pointer included into the list.
2479         */
2480        rcu_assign_pointer(*list, export);
2481}
2482
2483static inline int
2484rm_trace_export(struct trace_export **list, struct trace_export *export)
2485{
2486        struct trace_export **p;
2487
2488        for (p = list; *p != NULL; p = &(*p)->next)
2489                if (*p == export)
2490                        break;
2491
2492        if (*p != export)
2493                return -1;
2494
2495        rcu_assign_pointer(*p, (*p)->next);
2496
2497        return 0;
2498}
2499
2500static inline void
2501add_ftrace_export(struct trace_export **list, struct trace_export *export)
2502{
2503        if (*list == NULL)
2504                ftrace_exports_enable();
2505
2506        add_trace_export(list, export);
2507}
2508
2509static inline int
2510rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2511{
2512        int ret;
2513
2514        ret = rm_trace_export(list, export);
2515        if (*list == NULL)
2516                ftrace_exports_disable();
2517
2518        return ret;
2519}
2520
2521int register_ftrace_export(struct trace_export *export)
2522{
2523        if (WARN_ON_ONCE(!export->write))
2524                return -1;
2525
2526        mutex_lock(&ftrace_export_lock);
2527
2528        add_ftrace_export(&ftrace_exports_list, export);
2529
2530        mutex_unlock(&ftrace_export_lock);
2531
2532        return 0;
2533}
2534EXPORT_SYMBOL_GPL(register_ftrace_export);
2535
2536int unregister_ftrace_export(struct trace_export *export)
2537{
2538        int ret;
2539
2540        mutex_lock(&ftrace_export_lock);
2541
2542        ret = rm_ftrace_export(&ftrace_exports_list, export);
2543
2544        mutex_unlock(&ftrace_export_lock);
2545
2546        return ret;
2547}
2548EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2549
2550void
2551trace_function(struct trace_array *tr,
2552               unsigned long ip, unsigned long parent_ip, unsigned long flags,
2553               int pc)
2554{
2555        struct trace_event_call *call = &event_function;
2556        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2557        struct ring_buffer_event *event;
2558        struct ftrace_entry *entry;
2559
2560        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2561                                            flags, pc);
2562        if (!event)
2563                return;
2564        entry   = ring_buffer_event_data(event);
2565        entry->ip                       = ip;
2566        entry->parent_ip                = parent_ip;
2567
2568        if (!call_filter_check_discard(call, entry, buffer, event)) {
2569                if (static_branch_unlikely(&ftrace_exports_enabled))
2570                        ftrace_exports(event);
2571                __buffer_unlock_commit(buffer, event);
2572        }
2573}
2574
2575#ifdef CONFIG_STACKTRACE
2576
2577#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2578struct ftrace_stack {
2579        unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2580};
2581
2582static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2583static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2584
2585static void __ftrace_trace_stack(struct ring_buffer *buffer,
2586                                 unsigned long flags,
2587                                 int skip, int pc, struct pt_regs *regs)
2588{
2589        struct trace_event_call *call = &event_kernel_stack;
2590        struct ring_buffer_event *event;
2591        struct stack_entry *entry;
2592        struct stack_trace trace;
2593        int use_stack;
2594        int size = FTRACE_STACK_ENTRIES;
2595
2596        trace.nr_entries        = 0;
2597        trace.skip              = skip;
2598
2599        /*
2600         * Add one, for this function and the call to save_stack_trace()
2601         * If regs is set, then these functions will not be in the way.
2602         */
2603#ifndef CONFIG_UNWINDER_ORC
2604        if (!regs)
2605                trace.skip++;
2606#endif
2607
2608        /*
2609         * Since events can happen in NMIs there's no safe way to
2610         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2611         * or NMI comes in, it will just have to use the default
2612         * FTRACE_STACK_SIZE.
2613         */
2614        preempt_disable_notrace();
2615
2616        use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2617        /*
2618         * We don't need any atomic variables, just a barrier.
2619         * If an interrupt comes in, we don't care, because it would
2620         * have exited and put the counter back to what we want.
2621         * We just need a barrier to keep gcc from moving things
2622         * around.
2623         */
2624        barrier();
2625        if (use_stack == 1) {
2626                trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2627                trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2628
2629                if (regs)
2630                        save_stack_trace_regs(regs, &trace);
2631                else
2632                        save_stack_trace(&trace);
2633
2634                if (trace.nr_entries > size)
2635                        size = trace.nr_entries;
2636        } else
2637                /* From now on, use_stack is a boolean */
2638                use_stack = 0;
2639
2640        size *= sizeof(unsigned long);
2641
2642        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2643                                            sizeof(*entry) + size, flags, pc);
2644        if (!event)
2645                goto out;
2646        entry = ring_buffer_event_data(event);
2647
2648        memset(&entry->caller, 0, size);
2649
2650        if (use_stack)
2651                memcpy(&entry->caller, trace.entries,
2652                       trace.nr_entries * sizeof(unsigned long));
2653        else {
2654                trace.max_entries       = FTRACE_STACK_ENTRIES;
2655                trace.entries           = entry->caller;
2656                if (regs)
2657                        save_stack_trace_regs(regs, &trace);
2658                else
2659                        save_stack_trace(&trace);
2660        }
2661
2662        entry->size = trace.nr_entries;
2663
2664        if (!call_filter_check_discard(call, entry, buffer, event))
2665                __buffer_unlock_commit(buffer, event);
2666
2667 out:
2668        /* Again, don't let gcc optimize things here */
2669        barrier();
2670        __this_cpu_dec(ftrace_stack_reserve);
2671        preempt_enable_notrace();
2672
2673}
2674
2675static inline void ftrace_trace_stack(struct trace_array *tr,
2676                                      struct ring_buffer *buffer,
2677                                      unsigned long flags,
2678                                      int skip, int pc, struct pt_regs *regs)
2679{
2680        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2681                return;
2682
2683        __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2684}
2685
2686void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2687                   int pc)
2688{
2689        struct ring_buffer *buffer = tr->trace_buffer.buffer;
2690
2691        if (rcu_is_watching()) {
2692                __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2693                return;
2694        }
2695
2696        /*
2697         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2698         * but if the above rcu_is_watching() failed, then the NMI
2699         * triggered someplace critical, and rcu_irq_enter() should
2700         * not be called from NMI.
2701         */
2702        if (unlikely(in_nmi()))
2703                return;
2704
2705        rcu_irq_enter_irqson();
2706        __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2707        rcu_irq_exit_irqson();
2708}
2709
2710/**
2711 * trace_dump_stack - record a stack back trace in the trace buffer
2712 * @skip: Number of functions to skip (helper handlers)
2713 */
2714void trace_dump_stack(int skip)
2715{
2716        unsigned long flags;
2717
2718        if (tracing_disabled || tracing_selftest_running)
2719                return;
2720
2721        local_save_flags(flags);
2722
2723#ifndef CONFIG_UNWINDER_ORC
2724        /* Skip 1 to skip this function. */
2725        skip++;
2726#endif
2727        __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2728                             flags, skip, preempt_count(), NULL);
2729}
2730
2731static DEFINE_PER_CPU(int, user_stack_count);
2732
2733void
2734ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2735{
2736        struct trace_event_call *call = &event_user_stack;
2737        struct ring_buffer_event *event;
2738        struct userstack_entry *entry;
2739        struct stack_trace trace;
2740
2741        if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2742                return;
2743
2744        /*
2745         * NMIs can not handle page faults, even with fix ups.
2746         * The save user stack can (and often does) fault.
2747         */
2748        if (unlikely(in_nmi()))
2749                return;
2750
2751        /*
2752         * prevent recursion, since the user stack tracing may
2753         * trigger other kernel events.
2754         */
2755        preempt_disable();
2756        if (__this_cpu_read(user_stack_count))
2757                goto out;
2758
2759        __this_cpu_inc(user_stack_count);
2760
2761        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2762                                            sizeof(*entry), flags, pc);
2763        if (!event)
2764                goto out_drop_count;
2765        entry   = ring_buffer_event_data(event);
2766
2767        entry->tgid             = current->tgid;
2768        memset(&entry->caller, 0, sizeof(entry->caller));
2769
2770        trace.nr_entries        = 0;
2771        trace.max_entries       = FTRACE_STACK_ENTRIES;
2772        trace.skip              = 0;
2773        trace.entries           = entry->caller;
2774
2775        save_stack_trace_user(&trace);
2776        if (!call_filter_check_discard(call, entry, buffer, event))
2777                __buffer_unlock_commit(buffer, event);
2778
2779 out_drop_count:
2780        __this_cpu_dec(user_stack_count);
2781 out:
2782        preempt_enable();
2783}
2784
2785#ifdef UNUSED
2786static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2787{
2788        ftrace_trace_userstack(tr, flags, preempt_count());
2789}
2790#endif /* UNUSED */
2791
2792#endif /* CONFIG_STACKTRACE */
2793
2794/* created for use with alloc_percpu */
2795struct trace_buffer_struct {
2796        int nesting;
2797        char buffer[4][TRACE_BUF_SIZE];
2798};
2799
2800static struct trace_buffer_struct *trace_percpu_buffer;
2801
2802/*
2803 * Thise allows for lockless recording.  If we're nested too deeply, then
2804 * this returns NULL.
2805 */
2806static char *get_trace_buf(void)
2807{
2808        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2809
2810        if (!buffer || buffer->nesting >= 4)
2811                return NULL;
2812
2813        buffer->nesting++;
2814
2815        /* Interrupts must see nesting incremented before we use the buffer */
2816        barrier();
2817        return &buffer->buffer[buffer->nesting][0];
2818}
2819
2820static void put_trace_buf(void)
2821{
2822        /* Don't let the decrement of nesting leak before this */
2823        barrier();
2824        this_cpu_dec(trace_percpu_buffer->nesting);
2825}
2826
2827static int alloc_percpu_trace_buffer(void)
2828{
2829        struct trace_buffer_struct *buffers;
2830
2831        buffers = alloc_percpu(struct trace_buffer_struct);
2832        if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2833                return -ENOMEM;
2834
2835        trace_percpu_buffer = buffers;
2836        return 0;
2837}
2838
2839static int buffers_allocated;
2840
2841void trace_printk_init_buffers(void)
2842{
2843        if (buffers_allocated)
2844                return;
2845
2846        if (alloc_percpu_trace_buffer())
2847                return;
2848
2849        /* trace_printk() is for debug use only. Don't use it in production. */
2850
2851        pr_warn("\n");
2852        pr_warn("**********************************************************\n");
2853        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2854        pr_warn("**                                                      **\n");
2855        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2856        pr_warn("**                                                      **\n");
2857        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2858        pr_warn("** unsafe for production use.                           **\n");
2859        pr_warn("**                                                      **\n");
2860        pr_warn("** If you see this message and you are not debugging    **\n");
2861        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2862        pr_warn("**                                                      **\n");
2863        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2864        pr_warn("**********************************************************\n");
2865
2866        /* Expand the buffers to set size */
2867        tracing_update_buffers();
2868
2869        buffers_allocated = 1;
2870
2871        /*
2872         * trace_printk_init_buffers() can be called by modules.
2873         * If that happens, then we need to start cmdline recording
2874         * directly here. If the global_trace.buffer is already
2875         * allocated here, then this was called by module code.
2876         */
2877        if (global_trace.trace_buffer.buffer)
2878                tracing_start_cmdline_record();
2879}
2880
2881void trace_printk_start_comm(void)
2882{
2883        /* Start tracing comms if trace printk is set */
2884        if (!buffers_allocated)
2885                return;
2886        tracing_start_cmdline_record();
2887}
2888
2889static void trace_printk_start_stop_comm(int enabled)
2890{
2891        if (!buffers_allocated)
2892                return;
2893
2894        if (enabled)
2895                tracing_start_cmdline_record();
2896        else
2897                tracing_stop_cmdline_record();
2898}
2899
2900/**
2901 * trace_vbprintk - write binary msg to tracing buffer
2902 *
2903 */
2904int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2905{
2906        struct trace_event_call *call = &event_bprint;
2907        struct ring_buffer_event *event;
2908        struct ring_buffer *buffer;
2909        struct trace_array *tr = &global_trace;
2910        struct bprint_entry *entry;
2911        unsigned long flags;
2912        char *tbuffer;
2913        int len = 0, size, pc;
2914
2915        if (unlikely(tracing_selftest_running || tracing_disabled))
2916                return 0;
2917
2918        /* Don't pollute graph traces with trace_vprintk internals */
2919        pause_graph_tracing();
2920
2921        pc = preempt_count();
2922        preempt_disable_notrace();
2923
2924        tbuffer = get_trace_buf();
2925        if (!tbuffer) {
2926                len = 0;
2927                goto out_nobuffer;
2928        }
2929
2930        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2931
2932        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2933                goto out;
2934
2935        local_save_flags(flags);
2936        size = sizeof(*entry) + sizeof(u32) * len;
2937        buffer = tr->trace_buffer.buffer;
2938        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2939                                            flags, pc);
2940        if (!event)
2941                goto out;
2942        entry = ring_buffer_event_data(event);
2943        entry->ip                       = ip;
2944        entry->fmt                      = fmt;
2945
2946        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2947        if (!call_filter_check_discard(call, entry, buffer, event)) {
2948                __buffer_unlock_commit(buffer, event);
2949                ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2950        }
2951
2952out:
2953        put_trace_buf();
2954
2955out_nobuffer:
2956        preempt_enable_notrace();
2957        unpause_graph_tracing();
2958
2959        return len;
2960}
2961EXPORT_SYMBOL_GPL(trace_vbprintk);
2962
2963__printf(3, 0)
2964static int
2965__trace_array_vprintk(struct ring_buffer *buffer,
2966                      unsigned long ip, const char *fmt, va_list args)
2967{
2968        struct trace_event_call *call = &event_print;
2969        struct ring_buffer_event *event;
2970        int len = 0, size, pc;
2971        struct print_entry *entry;
2972        unsigned long flags;
2973        char *tbuffer;
2974
2975        if (tracing_disabled || tracing_selftest_running)
2976                return 0;
2977
2978        /* Don't pollute graph traces with trace_vprintk internals */
2979        pause_graph_tracing();
2980
2981        pc = preempt_count();
2982        preempt_disable_notrace();
2983
2984
2985        tbuffer = get_trace_buf();
2986        if (!tbuffer) {
2987                len = 0;
2988                goto out_nobuffer;
2989        }
2990
2991        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2992
2993        local_save_flags(flags);
2994        size = sizeof(*entry) + len + 1;
2995        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2996                                            flags, pc);
2997        if (!event)
2998                goto out;
2999        entry = ring_buffer_event_data(event);
3000        entry->ip = ip;
3001
3002        memcpy(&entry->buf, tbuffer, len + 1);
3003        if (!call_filter_check_discard(call, entry, buffer, event)) {
3004                __buffer_unlock_commit(buffer, event);
3005                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3006        }
3007
3008out:
3009        put_trace_buf();
3010
3011out_nobuffer:
3012        preempt_enable_notrace();
3013        unpause_graph_tracing();
3014
3015        return len;
3016}
3017
3018__printf(3, 0)
3019int trace_array_vprintk(struct trace_array *tr,
3020                        unsigned long ip, const char *fmt, va_list args)
3021{
3022        return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3023}
3024
3025__printf(3, 0)
3026int trace_array_printk(struct trace_array *tr,
3027                       unsigned long ip, const char *fmt, ...)
3028{
3029        int ret;
3030        va_list ap;
3031
3032        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3033                return 0;
3034
3035        va_start(ap, fmt);
3036        ret = trace_array_vprintk(tr, ip, fmt, ap);
3037        va_end(ap);
3038        return ret;
3039}
3040
3041__printf(3, 4)
3042int trace_array_printk_buf(struct ring_buffer *buffer,
3043                           unsigned long ip, const char *fmt, ...)
3044{
3045        int ret;
3046        va_list ap;
3047
3048        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3049                return 0;
3050
3051        va_start(ap, fmt);
3052        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3053        va_end(ap);
3054        return ret;
3055}
3056
3057__printf(2, 0)
3058int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3059{
3060        return trace_array_vprintk(&global_trace, ip, fmt, args);
3061}
3062EXPORT_SYMBOL_GPL(trace_vprintk);
3063
3064static void trace_iterator_increment(struct trace_iterator *iter)
3065{
3066        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3067
3068        iter->idx++;
3069        if (buf_iter)
3070                ring_buffer_read(buf_iter, NULL);
3071}
3072
3073static struct trace_entry *
3074peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3075                unsigned long *lost_events)
3076{
3077        struct ring_buffer_event *event;
3078        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3079
3080        if (buf_iter)
3081                event = ring_buffer_iter_peek(buf_iter, ts);
3082        else
3083                event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3084                                         lost_events);
3085
3086        if (event) {
3087                iter->ent_size = ring_buffer_event_length(event);
3088                return ring_buffer_event_data(event);
3089        }
3090        iter->ent_size = 0;
3091        return NULL;
3092}
3093
3094static struct trace_entry *
3095__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3096                  unsigned long *missing_events, u64 *ent_ts)
3097{
3098        struct ring_buffer *buffer = iter->trace_buffer->buffer;
3099        struct trace_entry *ent, *next = NULL;
3100        unsigned long lost_events = 0, next_lost = 0;
3101        int cpu_file = iter->cpu_file;
3102        u64 next_ts = 0, ts;
3103        int next_cpu = -1;
3104        int next_size = 0;
3105        int cpu;
3106
3107        /*
3108         * If we are in a per_cpu trace file, don't bother by iterating over
3109         * all cpu and peek directly.
3110         */
3111        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3112                if (ring_buffer_empty_cpu(buffer, cpu_file))
3113                        return NULL;
3114                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3115                if (ent_cpu)
3116                        *ent_cpu = cpu_file;
3117
3118                return ent;
3119        }
3120
3121        for_each_tracing_cpu(cpu) {
3122
3123                if (ring_buffer_empty_cpu(buffer, cpu))
3124                        continue;
3125
3126                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3127
3128                /*
3129                 * Pick the entry with the smallest timestamp:
3130                 */
3131                if (ent && (!next || ts < next_ts)) {
3132                        next = ent;
3133                        next_cpu = cpu;
3134                        next_ts = ts;
3135                        next_lost = lost_events;
3136                        next_size = iter->ent_size;
3137                }
3138        }
3139
3140        iter->ent_size = next_size;
3141
3142        if (ent_cpu)
3143                *ent_cpu = next_cpu;
3144
3145        if (ent_ts)
3146                *ent_ts = next_ts;
3147
3148        if (missing_events)
3149                *missing_events = next_lost;
3150
3151        return next;
3152}
3153
3154/* Find the next real entry, without updating the iterator itself */
3155struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3156                                          int *ent_cpu, u64 *ent_ts)
3157{
3158        return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3159}
3160
3161/* Find the next real entry, and increment the iterator to the next entry */
3162void *trace_find_next_entry_inc(struct trace_iterator *iter)
3163{
3164        iter->ent = __find_next_entry(iter, &iter->cpu,
3165                                      &iter->lost_events, &iter->ts);
3166
3167        if (iter->ent)
3168                trace_iterator_increment(iter);
3169
3170        return iter->ent ? iter : NULL;
3171}
3172
3173static void trace_consume(struct trace_iterator *iter)
3174{
3175        ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3176                            &iter->lost_events);
3177}
3178
3179static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3180{
3181        struct trace_iterator *iter = m->private;
3182        int i = (int)*pos;
3183        void *ent;
3184
3185        WARN_ON_ONCE(iter->leftover);
3186
3187        (*pos)++;
3188
3189        /* can't go backwards */
3190        if (iter->idx > i)
3191                return NULL;
3192
3193        if (iter->idx < 0)
3194                ent = trace_find_next_entry_inc(iter);
3195        else
3196                ent = iter;
3197
3198        while (ent && iter->idx < i)
3199                ent = trace_find_next_entry_inc(iter);
3200
3201        iter->pos = *pos;
3202
3203        return ent;
3204}
3205
3206void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3207{
3208        struct ring_buffer_event *event;
3209        struct ring_buffer_iter *buf_iter;
3210        unsigned long entries = 0;
3211        u64 ts;
3212
3213        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3214
3215        buf_iter = trace_buffer_iter(iter, cpu);
3216        if (!buf_iter)
3217                return;
3218
3219        ring_buffer_iter_reset(buf_iter);
3220
3221        /*
3222         * We could have the case with the max latency tracers
3223         * that a reset never took place on a cpu. This is evident
3224         * by the timestamp being before the start of the buffer.
3225         */
3226        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3227                if (ts >= iter->trace_buffer->time_start)
3228                        break;
3229                entries++;
3230                ring_buffer_read(buf_iter, NULL);
3231        }
3232
3233        per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3234}
3235
3236/*
3237 * The current tracer is copied to avoid a global locking
3238 * all around.
3239 */
3240static void *s_start(struct seq_file *m, loff_t *pos)
3241{
3242        struct trace_iterator *iter = m->private;
3243        struct trace_array *tr = iter->tr;
3244        int cpu_file = iter->cpu_file;
3245        void *p = NULL;
3246        loff_t l = 0;
3247        int cpu;
3248
3249        /*
3250         * copy the tracer to avoid using a global lock all around.
3251         * iter->trace is a copy of current_trace, the pointer to the
3252         * name may be used instead of a strcmp(), as iter->trace->name
3253         * will point to the same string as current_trace->name.
3254         */
3255        mutex_lock(&trace_types_lock);
3256        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3257                *iter->trace = *tr->current_trace;
3258        mutex_unlock(&trace_types_lock);
3259
3260#ifdef CONFIG_TRACER_MAX_TRACE
3261        if (iter->snapshot && iter->trace->use_max_tr)
3262                return ERR_PTR(-EBUSY);
3263#endif
3264
3265        if (!iter->snapshot)
3266                atomic_inc(&trace_record_taskinfo_disabled);
3267
3268        if (*pos != iter->pos) {
3269                iter->ent = NULL;
3270                iter->cpu = 0;
3271                iter->idx = -1;
3272
3273                if (cpu_file == RING_BUFFER_ALL_CPUS) {
3274                        for_each_tracing_cpu(cpu)
3275                                tracing_iter_reset(iter, cpu);
3276                } else
3277                        tracing_iter_reset(iter, cpu_file);
3278
3279                iter->leftover = 0;
3280                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3281                        ;
3282
3283        } else {
3284                /*
3285                 * If we overflowed the seq_file before, then we want
3286                 * to just reuse the trace_seq buffer again.
3287                 */
3288                if (iter->leftover)
3289                        p = iter;
3290                else {
3291                        l = *pos - 1;
3292                        p = s_next(m, p, &l);
3293                }
3294        }
3295
3296        trace_event_read_lock();
3297        trace_access_lock(cpu_file);
3298        return p;
3299}
3300
3301static void s_stop(struct seq_file *m, void *p)
3302{
3303        struct trace_iterator *iter = m->private;
3304
3305#ifdef CONFIG_TRACER_MAX_TRACE
3306        if (iter->snapshot && iter->trace->use_max_tr)
3307                return;
3308#endif
3309
3310        if (!iter->snapshot)
3311                atomic_dec(&trace_record_taskinfo_disabled);
3312
3313        trace_access_unlock(iter->cpu_file);
3314        trace_event_read_unlock();
3315}
3316
3317static void
3318get_total_entries(struct trace_buffer *buf,
3319                  unsigned long *total, unsigned long *entries)
3320{
3321        unsigned long count;
3322        int cpu;
3323
3324        *total = 0;
3325        *entries = 0;
3326
3327        for_each_tracing_cpu(cpu) {
3328                count = ring_buffer_entries_cpu(buf->buffer, cpu);
3329                /*
3330                 * If this buffer has skipped entries, then we hold all
3331                 * entries for the trace and we need to ignore the
3332                 * ones before the time stamp.
3333                 */
3334                if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3335                        count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3336                        /* total is the same as the entries */
3337                        *total += count;
3338                } else
3339                        *total += count +
3340                                ring_buffer_overrun_cpu(buf->buffer, cpu);
3341                *entries += count;
3342        }
3343}
3344
3345static void print_lat_help_header(struct seq_file *m)
3346{
3347        seq_puts(m, "#                  _------=> CPU#            \n"
3348                    "#                 / _-----=> irqs-off        \n"
3349                    "#                | / _----=> need-resched    \n"
3350                    "#                || / _---=> hardirq/softirq \n"
3351                    "#                ||| / _--=> preempt-depth   \n"
3352                    "#                |||| /     delay            \n"
3353                    "#  cmd     pid   ||||| time  |   caller      \n"
3354                    "#     \\   /      |||||  \\    |   /         \n");
3355}
3356
3357static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3358{
3359        unsigned long total;
3360        unsigned long entries;
3361
3362        get_total_entries(buf, &total, &entries);
3363        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3364                   entries, total, num_online_cpus());
3365        seq_puts(m, "#\n");
3366}
3367
3368static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3369                                   unsigned int flags)
3370{
3371        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3372
3373        print_event_info(buf, m);
3374
3375        seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3376        seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3377}
3378
3379static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3380                                       unsigned int flags)
3381{
3382        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3383        const char tgid_space[] = "          ";
3384        const char space[] = "  ";
3385
3386        seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3387                   tgid ? tgid_space : space);
3388        seq_printf(m, "#                          %s / _----=> need-resched\n",
3389                   tgid ? tgid_space : space);
3390        seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3391                   tgid ? tgid_space : space);
3392        seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3393                   tgid ? tgid_space : space);
3394        seq_printf(m, "#                          %s||| /     delay\n",
3395                   tgid ? tgid_space : space);
3396        seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3397                   tgid ? "   TGID   " : space);
3398        seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3399                   tgid ? "     |    " : space);
3400}
3401
3402void
3403print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3404{
3405        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3406        struct trace_buffer *buf = iter->trace_buffer;
3407        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3408        struct tracer *type = iter->trace;
3409        unsigned long entries;
3410        unsigned long total;
3411        const char *name = "preemption";
3412
3413        name = type->name;
3414
3415        get_total_entries(buf, &total, &entries);
3416
3417        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3418                   name, UTS_RELEASE);
3419        seq_puts(m, "# -----------------------------------"
3420                 "---------------------------------\n");
3421        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3422                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3423                   nsecs_to_usecs(data->saved_latency),
3424                   entries,
3425                   total,
3426                   buf->cpu,
3427#if defined(CONFIG_PREEMPT_NONE)
3428                   "server",
3429#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3430                   "desktop",
3431#elif defined(CONFIG_PREEMPT)
3432                   "preempt",
3433#else
3434                   "unknown",
3435#endif
3436                   /* These are reserved for later use */
3437                   0, 0, 0, 0);
3438#ifdef CONFIG_SMP
3439        seq_printf(m, " #P:%d)\n", num_online_cpus());
3440#else
3441        seq_puts(m, ")\n");
3442#endif
3443        seq_puts(m, "#    -----------------\n");
3444        seq_printf(m, "#    | task: %.16s-%d "
3445                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3446                   data->comm, data->pid,
3447                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3448                   data->policy, data->rt_priority);
3449        seq_puts(m, "#    -----------------\n");
3450
3451        if (data->critical_start) {
3452                seq_puts(m, "#  => started at: ");
3453                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3454                trace_print_seq(m, &iter->seq);
3455                seq_puts(m, "\n#  => ended at:   ");
3456                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3457                trace_print_seq(m, &iter->seq);
3458                seq_puts(m, "\n#\n");
3459        }
3460
3461        seq_puts(m, "#\n");
3462}
3463
3464static void test_cpu_buff_start(struct trace_iterator *iter)
3465{
3466        struct trace_seq *s = &iter->seq;
3467        struct trace_array *tr = iter->tr;
3468
3469        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3470                return;
3471
3472        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3473                return;
3474
3475        if (cpumask_available(iter->started) &&
3476            cpumask_test_cpu(iter->cpu, iter->started))
3477                return;
3478
3479        if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3480                return;
3481
3482        if (cpumask_available(iter->started))
3483                cpumask_set_cpu(iter->cpu, iter->started);
3484
3485        /* Don't print started cpu buffer for the first entry of the trace */
3486        if (iter->idx > 1)
3487                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3488                                iter->cpu);
3489}
3490
3491static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3492{
3493        struct trace_array *tr = iter->tr;
3494        struct trace_seq *s = &iter->seq;
3495        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3496        struct trace_entry *entry;
3497        struct trace_event *event;
3498
3499        entry = iter->ent;
3500
3501        test_cpu_buff_start(iter);
3502
3503        event = ftrace_find_event(entry->type);
3504
3505        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3506                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3507                        trace_print_lat_context(iter);
3508                else
3509                        trace_print_context(iter);
3510        }
3511
3512        if (trace_seq_has_overflowed(s))
3513                return TRACE_TYPE_PARTIAL_LINE;
3514
3515        if (event)
3516                return event->funcs->trace(iter, sym_flags, event);
3517
3518        trace_seq_printf(s, "Unknown type %d\n", entry->type);
3519
3520        return trace_handle_return(s);
3521}
3522
3523static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3524{
3525        struct trace_array *tr = iter->tr;
3526        struct trace_seq *s = &iter->seq;
3527        struct trace_entry *entry;
3528        struct trace_event *event;
3529
3530        entry = iter->ent;
3531
3532        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3533                trace_seq_printf(s, "%d %d %llu ",
3534                                 entry->pid, iter->cpu, iter->ts);
3535
3536        if (trace_seq_has_overflowed(s))
3537                return TRACE_TYPE_PARTIAL_LINE;
3538
3539        event = ftrace_find_event(entry->type);
3540        if (event)
3541                return event->funcs->raw(iter, 0, event);
3542
3543        trace_seq_printf(s, "%d ?\n", entry->type);
3544
3545        return trace_handle_return(s);
3546}
3547
3548static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3549{
3550        struct trace_array *tr = iter->tr;
3551        struct trace_seq *s = &iter->seq;
3552        unsigned char newline = '\n';
3553        struct trace_entry *entry;
3554        struct trace_event *event;
3555
3556        entry = iter->ent;
3557
3558        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3559                SEQ_PUT_HEX_FIELD(s, entry->pid);
3560                SEQ_PUT_HEX_FIELD(s, iter->cpu);
3561                SEQ_PUT_HEX_FIELD(s, iter->ts);
3562                if (trace_seq_has_overflowed(s))
3563                        return TRACE_TYPE_PARTIAL_LINE;
3564        }
3565
3566        event = ftrace_find_event(entry->type);
3567        if (event) {
3568                enum print_line_t ret = event->funcs->hex(iter, 0, event);
3569                if (ret != TRACE_TYPE_HANDLED)
3570                        return ret;
3571        }
3572
3573        SEQ_PUT_FIELD(s, newline);
3574
3575        return trace_handle_return(s);
3576}
3577
3578static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3579{
3580        struct trace_array *tr = iter->tr;
3581        struct trace_seq *s = &iter->seq;
3582        struct trace_entry *entry;
3583        struct trace_event *event;
3584
3585        entry = iter->ent;
3586
3587        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3588                SEQ_PUT_FIELD(s, entry->pid);
3589                SEQ_PUT_FIELD(s, iter->cpu);
3590                SEQ_PUT_FIELD(s, iter->ts);
3591                if (trace_seq_has_overflowed(s))
3592                        return TRACE_TYPE_PARTIAL_LINE;
3593        }
3594
3595        event = ftrace_find_event(entry->type);
3596        return event ? event->funcs->binary(iter, 0, event) :
3597                TRACE_TYPE_HANDLED;
3598}
3599
3600int trace_empty(struct trace_iterator *iter)
3601{
3602        struct ring_buffer_iter *buf_iter;
3603        int cpu;
3604
3605        /* If we are looking at one CPU buffer, only check that one */
3606        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3607                cpu = iter->cpu_file;
3608                buf_iter = trace_buffer_iter(iter, cpu);
3609                if (buf_iter) {
3610                        if (!ring_buffer_iter_empty(buf_iter))
3611                                return 0;
3612                } else {
3613                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614                                return 0;
3615                }
3616                return 1;
3617        }
3618
3619        for_each_tracing_cpu(cpu) {
3620                buf_iter = trace_buffer_iter(iter, cpu);
3621                if (buf_iter) {
3622                        if (!ring_buffer_iter_empty(buf_iter))
3623                                return 0;
3624                } else {
3625                        if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3626                                return 0;
3627                }
3628        }
3629
3630        return 1;
3631}
3632
3633/*  Called with trace_event_read_lock() held. */
3634enum print_line_t print_trace_line(struct trace_iterator *iter)
3635{
3636        struct trace_array *tr = iter->tr;
3637        unsigned long trace_flags = tr->trace_flags;
3638        enum print_line_t ret;
3639
3640        if (iter->lost_events) {
3641                trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3642                                 iter->cpu, iter->lost_events);
3643                if (trace_seq_has_overflowed(&iter->seq))
3644                        return TRACE_TYPE_PARTIAL_LINE;
3645        }
3646
3647        if (iter->trace && iter->trace->print_line) {
3648                ret = iter->trace->print_line(iter);
3649                if (ret != TRACE_TYPE_UNHANDLED)
3650                        return ret;
3651        }
3652
3653        if (iter->ent->type == TRACE_BPUTS &&
3654                        trace_flags & TRACE_ITER_PRINTK &&
3655                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3656                return trace_print_bputs_msg_only(iter);
3657
3658        if (iter->ent->type == TRACE_BPRINT &&
3659                        trace_flags & TRACE_ITER_PRINTK &&
3660                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661                return trace_print_bprintk_msg_only(iter);
3662
3663        if (iter->ent->type == TRACE_PRINT &&
3664                        trace_flags & TRACE_ITER_PRINTK &&
3665                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666                return trace_print_printk_msg_only(iter);
3667
3668        if (trace_flags & TRACE_ITER_BIN)
3669                return print_bin_fmt(iter);
3670
3671        if (trace_flags & TRACE_ITER_HEX)
3672                return print_hex_fmt(iter);
3673
3674        if (trace_flags & TRACE_ITER_RAW)
3675                return print_raw_fmt(iter);
3676
3677        return print_trace_fmt(iter);
3678}
3679
3680void trace_latency_header(struct seq_file *m)
3681{
3682        struct trace_iterator *iter = m->private;
3683        struct trace_array *tr = iter->tr;
3684
3685        /* print nothing if the buffers are empty */
3686        if (trace_empty(iter))
3687                return;
3688
3689        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3690                print_trace_header(m, iter);
3691
3692        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3693                print_lat_help_header(m);
3694}
3695
3696void trace_default_header(struct seq_file *m)
3697{
3698        struct trace_iterator *iter = m->private;
3699        struct trace_array *tr = iter->tr;
3700        unsigned long trace_flags = tr->trace_flags;
3701
3702        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3703                return;
3704
3705        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3706                /* print nothing if the buffers are empty */
3707                if (trace_empty(iter))
3708                        return;
3709                print_trace_header(m, iter);
3710                if (!(trace_flags & TRACE_ITER_VERBOSE))
3711                        print_lat_help_header(m);
3712        } else {
3713                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3714                        if (trace_flags & TRACE_ITER_IRQ_INFO)
3715                                print_func_help_header_irq(iter->trace_buffer,
3716                                                           m, trace_flags);
3717                        else
3718                                print_func_help_header(iter->trace_buffer, m,
3719                                                       trace_flags);
3720                }
3721        }
3722}
3723
3724static void test_ftrace_alive(struct seq_file *m)
3725{
3726        if (!ftrace_is_dead())
3727                return;
3728        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3729                    "#          MAY BE MISSING FUNCTION EVENTS\n");
3730}
3731
3732#ifdef CONFIG_TRACER_MAX_TRACE
3733static void show_snapshot_main_help(struct seq_file *m)
3734{
3735        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3736                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3737                    "#                      Takes a snapshot of the main buffer.\n"
3738                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3739                    "#                      (Doesn't have to be '2' works with any number that\n"
3740                    "#                       is not a '0' or '1')\n");
3741}
3742
3743static void show_snapshot_percpu_help(struct seq_file *m)
3744{
3745        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3746#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3747        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3748                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3749#else
3750        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3751                    "#                     Must use main snapshot file to allocate.\n");
3752#endif
3753        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3754                    "#                      (Doesn't have to be '2' works with any number that\n"
3755                    "#                       is not a '0' or '1')\n");
3756}
3757
3758static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3759{
3760        if (iter->tr->allocated_snapshot)
3761                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3762        else
3763                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3764
3765        seq_puts(m, "# Snapshot commands:\n");
3766        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3767                show_snapshot_main_help(m);
3768        else
3769                show_snapshot_percpu_help(m);
3770}
3771#else
3772/* Should never be called */
3773static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3774#endif
3775
3776static int s_show(struct seq_file *m, void *v)
3777{
3778        struct trace_iterator *iter = v;
3779        int ret;
3780
3781        if (iter->ent == NULL) {
3782                if (iter->tr) {
3783                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
3784                        seq_puts(m, "#\n");
3785                        test_ftrace_alive(m);
3786                }
3787                if (iter->snapshot && trace_empty(iter))
3788                        print_snapshot_help(m, iter);
3789                else if (iter->trace && iter->trace->print_header)
3790                        iter->trace->print_header(m);
3791                else
3792                        trace_default_header(m);
3793
3794        } else if (iter->leftover) {
3795                /*
3796                 * If we filled the seq_file buffer earlier, we
3797                 * want to just show it now.
3798                 */
3799                ret = trace_print_seq(m, &iter->seq);
3800
3801                /* ret should this time be zero, but you never know */
3802                iter->leftover = ret;
3803
3804        } else {
3805                print_trace_line(iter);
3806                ret = trace_print_seq(m, &iter->seq);
3807                /*
3808                 * If we overflow the seq_file buffer, then it will
3809                 * ask us for this data again at start up.
3810                 * Use that instead.
3811                 *  ret is 0 if seq_file write succeeded.
3812                 *        -1 otherwise.
3813                 */
3814                iter->leftover = ret;
3815        }
3816
3817        return 0;
3818}
3819
3820/*
3821 * Should be used after trace_array_get(), trace_types_lock
3822 * ensures that i_cdev was already initialized.
3823 */
3824static inline int tracing_get_cpu(struct inode *inode)
3825{
3826        if (inode->i_cdev) /* See trace_create_cpu_file() */
3827                return (long)inode->i_cdev - 1;
3828        return RING_BUFFER_ALL_CPUS;
3829}
3830
3831static const struct seq_operations tracer_seq_ops = {
3832        .start          = s_start,
3833        .next           = s_next,
3834        .stop           = s_stop,
3835        .show           = s_show,
3836};
3837
3838static struct trace_iterator *
3839__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3840{
3841        struct trace_array *tr = inode->i_private;
3842        struct trace_iterator *iter;
3843        int cpu;
3844
3845        if (tracing_disabled)
3846                return ERR_PTR(-ENODEV);
3847
3848        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3849        if (!iter)
3850                return ERR_PTR(-ENOMEM);
3851
3852        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3853                                    GFP_KERNEL);
3854        if (!iter->buffer_iter)
3855                goto release;
3856
3857        /*
3858         * We make a copy of the current tracer to avoid concurrent
3859         * changes on it while we are reading.
3860         */
3861        mutex_lock(&trace_types_lock);
3862        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3863        if (!iter->trace)
3864                goto fail;
3865
3866        *iter->trace = *tr->current_trace;
3867
3868        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3869                goto fail;
3870
3871        iter->tr = tr;
3872
3873#ifdef CONFIG_TRACER_MAX_TRACE
3874        /* Currently only the top directory has a snapshot */
3875        if (tr->current_trace->print_max || snapshot)
3876                iter->trace_buffer = &tr->max_buffer;
3877        else
3878#endif
3879                iter->trace_buffer = &tr->trace_buffer;
3880        iter->snapshot = snapshot;
3881        iter->pos = -1;
3882        iter->cpu_file = tracing_get_cpu(inode);
3883        mutex_init(&iter->mutex);
3884
3885        /* Notify the tracer early; before we stop tracing. */
3886        if (iter->trace && iter->trace->open)
3887                iter->trace->open(iter);
3888
3889        /* Annotate start of buffers if we had overruns */
3890        if (ring_buffer_overruns(iter->trace_buffer->buffer))
3891                iter->iter_flags |= TRACE_FILE_ANNOTATE;
3892
3893        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3894        if (trace_clocks[tr->clock_id].in_ns)
3895                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3896
3897        /* stop the trace while dumping if we are not opening "snapshot" */
3898        if (!iter->snapshot)
3899                tracing_stop_tr(tr);
3900
3901        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3902                for_each_tracing_cpu(cpu) {
3903                        iter->buffer_iter[cpu] =
3904                                ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3905                }
3906                ring_buffer_read_prepare_sync();
3907                for_each_tracing_cpu(cpu) {
3908                        ring_buffer_read_start(iter->buffer_iter[cpu]);
3909                        tracing_iter_reset(iter, cpu);
3910                }
3911        } else {
3912                cpu = iter->cpu_file;
3913                iter->buffer_iter[cpu] =
3914                        ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3915                ring_buffer_read_prepare_sync();
3916                ring_buffer_read_start(iter->buffer_iter[cpu]);
3917                tracing_iter_reset(iter, cpu);
3918        }
3919
3920        mutex_unlock(&trace_types_lock);
3921
3922        return iter;
3923
3924 fail:
3925        mutex_unlock(&trace_types_lock);
3926        kfree(iter->trace);
3927        kfree(iter->buffer_iter);
3928release:
3929        seq_release_private(inode, file);
3930        return ERR_PTR(-ENOMEM);
3931}
3932
3933int tracing_open_generic(struct inode *inode, struct file *filp)
3934{
3935        if (tracing_disabled)
3936                return -ENODEV;
3937
3938        filp->private_data = inode->i_private;
3939        return 0;
3940}
3941
3942bool tracing_is_disabled(void)
3943{
3944        return (tracing_disabled) ? true: false;
3945}
3946
3947/*
3948 * Open and update trace_array ref count.
3949 * Must have the current trace_array passed to it.
3950 */
3951static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3952{
3953        struct trace_array *tr = inode->i_private;
3954
3955        if (tracing_disabled)
3956                return -ENODEV;
3957
3958        if (trace_array_get(tr) < 0)
3959                return -ENODEV;
3960
3961        filp->private_data = inode->i_private;
3962
3963        return 0;
3964}
3965
3966static int tracing_release(struct inode *inode, struct file *file)
3967{
3968        struct trace_array *tr = inode->i_private;
3969        struct seq_file *m = file->private_data;
3970        struct trace_iterator *iter;
3971        int cpu;
3972
3973        if (!(file->f_mode & FMODE_READ)) {
3974                trace_array_put(tr);
3975                return 0;
3976        }
3977
3978        /* Writes do not use seq_file */
3979        iter = m->private;
3980        mutex_lock(&trace_types_lock);
3981
3982        for_each_tracing_cpu(cpu) {
3983                if (iter->buffer_iter[cpu])
3984                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
3985        }
3986
3987        if (iter->trace && iter->trace->close)
3988                iter->trace->close(iter);
3989
3990        if (!iter->snapshot)
3991                /* reenable tracing if it was previously enabled */
3992                tracing_start_tr(tr);
3993
3994        __trace_array_put(tr);
3995
3996        mutex_unlock(&trace_types_lock);
3997
3998        mutex_destroy(&iter->mutex);
3999        free_cpumask_var(iter->started);
4000        kfree(iter->trace);
4001        kfree(iter->buffer_iter);
4002        seq_release_private(inode, file);
4003
4004        return 0;
4005}
4006
4007static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4008{
4009        struct trace_array *tr = inode->i_private;
4010
4011        trace_array_put(tr);
4012        return 0;
4013}
4014
4015static int tracing_single_release_tr(struct inode *inode, struct file *file)
4016{
4017        struct trace_array *tr = inode->i_private;
4018
4019        trace_array_put(tr);
4020
4021        return single_release(inode, file);
4022}
4023
4024static int tracing_open(struct inode *inode, struct file *file)
4025{
4026        struct trace_array *tr = inode->i_private;
4027        struct trace_iterator *iter;
4028        int ret = 0;
4029
4030        if (trace_array_get(tr) < 0)
4031                return -ENODEV;
4032
4033        /* If this file was open for write, then erase contents */
4034        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4035                int cpu = tracing_get_cpu(inode);
4036                struct trace_buffer *trace_buf = &tr->trace_buffer;
4037
4038#ifdef CONFIG_TRACER_MAX_TRACE
4039                if (tr->current_trace->print_max)
4040                        trace_buf = &tr->max_buffer;
4041#endif
4042
4043                if (cpu == RING_BUFFER_ALL_CPUS)
4044                        tracing_reset_online_cpus(trace_buf);
4045                else
4046                        tracing_reset(trace_buf, cpu);
4047        }
4048
4049        if (file->f_mode & FMODE_READ) {
4050                iter = __tracing_open(inode, file, false);
4051                if (IS_ERR(iter))
4052                        ret = PTR_ERR(iter);
4053                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4054                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4055        }
4056
4057        if (ret < 0)
4058                trace_array_put(tr);
4059
4060        return ret;
4061}
4062
4063/*
4064 * Some tracers are not suitable for instance buffers.
4065 * A tracer is always available for the global array (toplevel)
4066 * or if it explicitly states that it is.
4067 */
4068static bool
4069trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4070{
4071        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4072}
4073
4074/* Find the next tracer that this trace array may use */
4075static struct tracer *
4076get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4077{
4078        while (t && !trace_ok_for_array(t, tr))
4079                t = t->next;
4080
4081        return t;
4082}
4083
4084static void *
4085t_next(struct seq_file *m, void *v, loff_t *pos)
4086{
4087        struct trace_array *tr = m->private;
4088        struct tracer *t = v;
4089
4090        (*pos)++;
4091
4092        if (t)
4093                t = get_tracer_for_array(tr, t->next);
4094
4095        return t;
4096}
4097
4098static void *t_start(struct seq_file *m, loff_t *pos)
4099{
4100        struct trace_array *tr = m->private;
4101        struct tracer *t;
4102        loff_t l = 0;
4103
4104        mutex_lock(&trace_types_lock);
4105
4106        t = get_tracer_for_array(tr, trace_types);
4107        for (; t && l < *pos; t = t_next(m, t, &l))
4108                        ;
4109
4110        return t;
4111}
4112
4113static void t_stop(struct seq_file *m, void *p)
4114{
4115        mutex_unlock(&trace_types_lock);
4116}
4117
4118static int t_show(struct seq_file *m, void *v)
4119{
4120        struct tracer *t = v;
4121
4122        if (!t)
4123                return 0;
4124
4125        seq_puts(m, t->name);
4126        if (t->next)
4127                seq_putc(m, ' ');
4128        else
4129                seq_putc(m, '\n');
4130
4131        return 0;
4132}
4133
4134static const struct seq_operations show_traces_seq_ops = {
4135        .start          = t_start,
4136        .next           = t_next,
4137        .stop           = t_stop,
4138        .show           = t_show,
4139};
4140
4141static int show_traces_open(struct inode *inode, struct file *file)
4142{
4143        struct trace_array *tr = inode->i_private;
4144        struct seq_file *m;
4145        int ret;
4146
4147        if (tracing_disabled)
4148                return -ENODEV;
4149
4150        ret = seq_open(file, &show_traces_seq_ops);
4151        if (ret)
4152                return ret;
4153
4154        m = file->private_data;
4155        m->private = tr;
4156
4157        return 0;
4158}
4159
4160static ssize_t
4161tracing_write_stub(struct file *filp, const char __user *ubuf,
4162                   size_t count, loff_t *ppos)
4163{
4164        return count;
4165}
4166
4167loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4168{
4169        int ret;
4170
4171        if (file->f_mode & FMODE_READ)
4172                ret = seq_lseek(file, offset, whence);
4173        else
4174                file->f_pos = ret = 0;
4175
4176        return ret;
4177}
4178
4179static const struct file_operations tracing_fops = {
4180        .open           = tracing_open,
4181        .read           = seq_read,
4182        .write          = tracing_write_stub,
4183        .llseek         = tracing_lseek,
4184        .release        = tracing_release,
4185};
4186
4187static const struct file_operations show_traces_fops = {
4188        .open           = show_traces_open,
4189        .read           = seq_read,
4190        .release        = seq_release,
4191        .llseek         = seq_lseek,
4192};
4193
4194static ssize_t
4195tracing_cpumask_read(struct file *filp, char __user *ubuf,
4196                     size_t count, loff_t *ppos)
4197{
4198        struct trace_array *tr = file_inode(filp)->i_private;
4199        char *mask_str;
4200        int len;
4201
4202        len = snprintf(NULL, 0, "%*pb\n",
4203                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4204        mask_str = kmalloc(len, GFP_KERNEL);
4205        if (!mask_str)
4206                return -ENOMEM;
4207
4208        len = snprintf(mask_str, len, "%*pb\n",
4209                       cpumask_pr_args(tr->tracing_cpumask));
4210        if (len >= count) {
4211                count = -EINVAL;
4212                goto out_err;
4213        }
4214        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4215
4216out_err:
4217        kfree(mask_str);
4218
4219        return count;
4220}
4221
4222static ssize_t
4223tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4224                      size_t count, loff_t *ppos)
4225{
4226        struct trace_array *tr = file_inode(filp)->i_private;
4227        cpumask_var_t tracing_cpumask_new;
4228        int err, cpu;
4229
4230        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4231                return -ENOMEM;
4232
4233        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4234        if (err)
4235                goto err_unlock;
4236
4237        local_irq_disable();
4238        arch_spin_lock(&tr->max_lock);
4239        for_each_tracing_cpu(cpu) {
4240                /*
4241                 * Increase/decrease the disabled counter if we are
4242                 * about to flip a bit in the cpumask:
4243                 */
4244                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4245                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4246                        atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4247                        ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4248                }
4249                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4250                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4251                        atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4252                        ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4253                }
4254        }
4255        arch_spin_unlock(&tr->max_lock);
4256        local_irq_enable();
4257
4258        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4259        free_cpumask_var(tracing_cpumask_new);
4260
4261        return count;
4262
4263err_unlock:
4264        free_cpumask_var(tracing_cpumask_new);
4265
4266        return err;
4267}
4268
4269static const struct file_operations tracing_cpumask_fops = {
4270        .open           = tracing_open_generic_tr,
4271        .read           = tracing_cpumask_read,
4272        .write          = tracing_cpumask_write,
4273        .release        = tracing_release_generic_tr,
4274        .llseek         = generic_file_llseek,
4275};
4276
4277static int tracing_trace_options_show(struct seq_file *m, void *v)
4278{
4279        struct tracer_opt *trace_opts;
4280        struct trace_array *tr = m->private;
4281        u32 tracer_flags;
4282        int i;
4283
4284        mutex_lock(&trace_types_lock);
4285        tracer_flags = tr->current_trace->flags->val;
4286        trace_opts = tr->current_trace->flags->opts;
4287
4288        for (i = 0; trace_options[i]; i++) {
4289                if (tr->trace_flags & (1 << i))
4290                        seq_printf(m, "%s\n", trace_options[i]);
4291                else
4292                        seq_printf(m, "no%s\n", trace_options[i]);
4293        }
4294
4295        for (i = 0; trace_opts[i].name; i++) {
4296                if (tracer_flags & trace_opts[i].bit)
4297                        seq_printf(m, "%s\n", trace_opts[i].name);
4298                else
4299                        seq_printf(m, "no%s\n", trace_opts[i].name);
4300        }
4301        mutex_unlock(&trace_types_lock);
4302
4303        return 0;
4304}
4305
4306static int __set_tracer_option(struct trace_array *tr,
4307                               struct tracer_flags *tracer_flags,
4308                               struct tracer_opt *opts, int neg)
4309{
4310        struct tracer *trace = tracer_flags->trace;
4311        int ret;
4312
4313        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4314        if (ret)
4315                return ret;
4316
4317        if (neg)
4318                tracer_flags->val &= ~opts->bit;
4319        else
4320                tracer_flags->val |= opts->bit;
4321        return 0;
4322}
4323
4324/* Try to assign a tracer specific option */
4325static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4326{
4327        struct tracer *trace = tr->current_trace;
4328        struct tracer_flags *tracer_flags = trace->flags;
4329        struct tracer_opt *opts = NULL;
4330        int i;
4331
4332        for (i = 0; tracer_flags->opts[i].name; i++) {
4333                opts = &tracer_flags->opts[i];
4334
4335                if (strcmp(cmp, opts->name) == 0)
4336                        return __set_tracer_option(tr, trace->flags, opts, neg);
4337        }
4338
4339        return -EINVAL;
4340}
4341
4342/* Some tracers require overwrite to stay enabled */
4343int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4344{
4345        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4346                return -1;
4347
4348        return 0;
4349}
4350
4351int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4352{
4353        /* do nothing if flag is already set */
4354        if (!!(tr->trace_flags & mask) == !!enabled)
4355                return 0;
4356
4357        /* Give the tracer a chance to approve the change */
4358        if (tr->current_trace->flag_changed)
4359                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4360                        return -EINVAL;
4361
4362        if (enabled)
4363                tr->trace_flags |= mask;
4364        else
4365                tr->trace_flags &= ~mask;
4366
4367        if (mask == TRACE_ITER_RECORD_CMD)
4368                trace_event_enable_cmd_record(enabled);
4369
4370        if (mask == TRACE_ITER_RECORD_TGID) {
4371                if (!tgid_map)
4372                        tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4373                                           sizeof(*tgid_map),
4374                                           GFP_KERNEL);
4375                if (!tgid_map) {
4376                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4377                        return -ENOMEM;
4378                }
4379
4380                trace_event_enable_tgid_record(enabled);
4381        }
4382
4383        if (mask == TRACE_ITER_EVENT_FORK)
4384                trace_event_follow_fork(tr, enabled);
4385
4386        if (mask == TRACE_ITER_FUNC_FORK)
4387                ftrace_pid_follow_fork(tr, enabled);
4388
4389        if (mask == TRACE_ITER_OVERWRITE) {
4390                ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4391#ifdef CONFIG_TRACER_MAX_TRACE
4392                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4393#endif
4394        }
4395
4396        if (mask == TRACE_ITER_PRINTK) {
4397                trace_printk_start_stop_comm(enabled);
4398                trace_printk_control(enabled);
4399        }
4400
4401        return 0;
4402}
4403
4404static int trace_set_options(struct trace_array *tr, char *option)
4405{
4406        char *cmp;
4407        int neg = 0;
4408        int ret;
4409        size_t orig_len = strlen(option);
4410
4411        cmp = strstrip(option);
4412
4413        if (strncmp(cmp, "no", 2) == 0) {
4414                neg = 1;
4415                cmp += 2;
4416        }
4417
4418        mutex_lock(&trace_types_lock);
4419
4420        ret = match_string(trace_options, -1, cmp);
4421        /* If no option could be set, test the specific tracer options */
4422        if (ret < 0)
4423                ret = set_tracer_option(tr, cmp, neg);
4424        else
4425                ret = set_tracer_flag(tr, 1 << ret, !neg);
4426
4427        mutex_unlock(&trace_types_lock);
4428
4429        /*
4430         * If the first trailing whitespace is replaced with '\0' by strstrip,
4431         * turn it back into a space.
4432         */
4433        if (orig_len > strlen(option))
4434                option[strlen(option)] = ' ';
4435
4436        return ret;
4437}
4438
4439static void __init apply_trace_boot_options(void)
4440{
4441        char *buf = trace_boot_options_buf;
4442        char *option;
4443
4444        while (true) {
4445                option = strsep(&buf, ",");
4446
4447                if (!option)
4448                        break;
4449
4450                if (*option)
4451                        trace_set_options(&global_trace, option);
4452
4453                /* Put back the comma to allow this to be called again */
4454                if (buf)
4455                        *(buf - 1) = ',';
4456        }
4457}
4458
4459static ssize_t
4460tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4461                        size_t cnt, loff_t *ppos)
4462{
4463        struct seq_file *m = filp->private_data;
4464        struct trace_array *tr = m->private;
4465        char buf[64];
4466        int ret;
4467
4468        if (cnt >= sizeof(buf))
4469                return -EINVAL;
4470
4471        if (copy_from_user(buf, ubuf, cnt))
4472                return -EFAULT;
4473
4474        buf[cnt] = 0;
4475
4476        ret = trace_set_options(tr, buf);
4477        if (ret < 0)
4478                return ret;
4479
4480        *ppos += cnt;
4481
4482        return cnt;
4483}
4484
4485static int tracing_trace_options_open(struct inode *inode, struct file *file)
4486{
4487        struct trace_array *tr = inode->i_private;
4488        int ret;
4489
4490        if (tracing_disabled)
4491                return -ENODEV;
4492
4493        if (trace_array_get(tr) < 0)
4494                return -ENODEV;
4495
4496        ret = single_open(file, tracing_trace_options_show, inode->i_private);
4497        if (ret < 0)
4498                trace_array_put(tr);
4499
4500        return ret;
4501}
4502
4503static const struct file_operations tracing_iter_fops = {
4504        .open           = tracing_trace_options_open,
4505        .read           = seq_read,
4506        .llseek         = seq_lseek,
4507        .release        = tracing_single_release_tr,
4508        .write          = tracing_trace_options_write,
4509};
4510
4511static const char readme_msg[] =
4512        "tracing mini-HOWTO:\n\n"
4513        "# echo 0 > tracing_on : quick way to disable tracing\n"
4514        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4515        " Important files:\n"
4516        "  trace\t\t\t- The static contents of the buffer\n"
4517        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4518        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4519        "  current_tracer\t- function and latency tracers\n"
4520        "  available_tracers\t- list of configured tracers for current_tracer\n"
4521        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4522        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4523        "  trace_clock\t\t-change the clock used to order events\n"
4524        "       local:   Per cpu clock but may not be synced across CPUs\n"
4525        "      global:   Synced across CPUs but slows tracing down.\n"
4526        "     counter:   Not a clock, but just an increment\n"
4527        "      uptime:   Jiffy counter from time of boot\n"
4528        "        perf:   Same clock that perf events use\n"
4529#ifdef CONFIG_X86_64
4530        "     x86-tsc:   TSC cycle counter\n"
4531#endif
4532        "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4533        "       delta:   Delta difference against a buffer-wide timestamp\n"
4534        "    absolute:   Absolute (standalone) timestamp\n"
4535        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4536        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4537        "  tracing_cpumask\t- Limit which CPUs to trace\n"
4538        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4539        "\t\t\t  Remove sub-buffer with rmdir\n"
4540        "  trace_options\t\t- Set format or modify how tracing happens\n"
4541        "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4542        "\t\t\t  option name\n"
4543        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4544#ifdef CONFIG_DYNAMIC_FTRACE
4545        "\n  available_filter_functions - list of functions that can be filtered on\n"
4546        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4547        "\t\t\t  functions\n"
4548        "\t     accepts: func_full_name or glob-matching-pattern\n"
4549        "\t     modules: Can select a group via module\n"
4550        "\t      Format: :mod:<module-name>\n"
4551        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4552        "\t    triggers: a command to perform when function is hit\n"
4553        "\t      Format: <function>:<trigger>[:count]\n"
4554        "\t     trigger: traceon, traceoff\n"
4555        "\t\t      enable_event:<system>:<event>\n"
4556        "\t\t      disable_event:<system>:<event>\n"
4557#ifdef CONFIG_STACKTRACE
4558        "\t\t      stacktrace\n"
4559#endif
4560#ifdef CONFIG_TRACER_SNAPSHOT
4561        "\t\t      snapshot\n"
4562#endif
4563        "\t\t      dump\n"
4564        "\t\t      cpudump\n"
4565        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4566        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4567        "\t     The first one will disable tracing every time do_fault is hit\n"
4568        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4569        "\t       The first time do trap is hit and it disables tracing, the\n"
4570        "\t       counter will decrement to 2. If tracing is already disabled,\n"
4571        "\t       the counter will not decrement. It only decrements when the\n"
4572        "\t       trigger did work\n"
4573        "\t     To remove trigger without count:\n"
4574        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4575        "\t     To remove trigger with a count:\n"
4576        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4577        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4578        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4579        "\t    modules: Can select a group via module command :mod:\n"
4580        "\t    Does not accept triggers\n"
4581#endif /* CONFIG_DYNAMIC_FTRACE */
4582#ifdef CONFIG_FUNCTION_TRACER
4583        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4584        "\t\t    (function)\n"
4585#endif
4586#ifdef CONFIG_FUNCTION_GRAPH_TRACER
4587        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4588        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4589        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4590#endif
4591#ifdef CONFIG_TRACER_SNAPSHOT
4592        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4593        "\t\t\t  snapshot buffer. Read the contents for more\n"
4594        "\t\t\t  information\n"
4595#endif
4596#ifdef CONFIG_STACK_TRACER
4597        "  stack_trace\t\t- Shows the max stack trace when active\n"
4598        "  stack_max_size\t- Shows current max stack size that was traced\n"
4599        "\t\t\t  Write into this file to reset the max size (trigger a\n"
4600        "\t\t\t  new trace)\n"
4601#ifdef CONFIG_DYNAMIC_FTRACE
4602        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4603        "\t\t\t  traces\n"
4604#endif
4605#endif /* CONFIG_STACK_TRACER */
4606#ifdef CONFIG_KPROBE_EVENTS
4607        "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4608        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4609#endif
4610#ifdef CONFIG_UPROBE_EVENTS
4611        "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4612        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4613#endif
4614#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4615        "\t  accepts: event-definitions (one definition per line)\n"
4616        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4617        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4618        "\t           -:[<group>/]<event>\n"
4619#ifdef CONFIG_KPROBE_EVENTS
4620        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4621  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4622#endif
4623#ifdef CONFIG_UPROBE_EVENTS
4624        "\t    place: <path>:<offset>\n"
4625#endif
4626        "\t     args: <name>=fetcharg[:type]\n"
4627        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4628        "\t           $stack<index>, $stack, $retval, $comm\n"
4629        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4630        "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4631#endif
4632        "  events/\t\t- Directory containing all trace event subsystems:\n"
4633        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4634        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4635        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4636        "\t\t\t  events\n"
4637        "      filter\t\t- If set, only events passing filter are traced\n"
4638        "  events/<system>/<event>/\t- Directory containing control files for\n"
4639        "\t\t\t  <event>:\n"
4640        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4641        "      filter\t\t- If set, only events passing filter are traced\n"
4642        "      trigger\t\t- If set, a command to perform when event is hit\n"
4643        "\t    Format: <trigger>[:count][if <filter>]\n"
4644        "\t   trigger: traceon, traceoff\n"
4645        "\t            enable_event:<system>:<event>\n"
4646        "\t            disable_event:<system>:<event>\n"
4647#ifdef CONFIG_HIST_TRIGGERS
4648        "\t            enable_hist:<system>:<event>\n"
4649        "\t            disable_hist:<system>:<event>\n"
4650#endif
4651#ifdef CONFIG_STACKTRACE
4652        "\t\t    stacktrace\n"
4653#endif
4654#ifdef CONFIG_TRACER_SNAPSHOT
4655        "\t\t    snapshot\n"
4656#endif
4657#ifdef CONFIG_HIST_TRIGGERS
4658        "\t\t    hist (see below)\n"
4659#endif
4660        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4661        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4662        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4663        "\t                  events/block/block_unplug/trigger\n"
4664        "\t   The first disables tracing every time block_unplug is hit.\n"
4665        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4666        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4667        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4668        "\t   Like function triggers, the counter is only decremented if it\n"
4669        "\t    enabled or disabled tracing.\n"
4670        "\t   To remove a trigger without a count:\n"
4671        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4672        "\t   To remove a trigger with a count:\n"
4673        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4674        "\t   Filters can be ignored when removing a trigger.\n"
4675#ifdef CONFIG_HIST_TRIGGERS
4676        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4677        "\t    Format: hist:keys=<field1[,field2,...]>\n"
4678        "\t            [:values=<field1[,field2,...]>]\n"
4679        "\t            [:sort=<field1[,field2,...]>]\n"
4680        "\t            [:size=#entries]\n"
4681        "\t            [:pause][:continue][:clear]\n"
4682        "\t            [:name=histname1]\n"
4683        "\t            [if <filter>]\n\n"
4684        "\t    When a matching event is hit, an entry is added to a hash\n"
4685        "\t    table using the key(s) and value(s) named, and the value of a\n"
4686        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4687        "\t    correspond to fields in the event's format description.  Keys\n"
4688        "\t    can be any field, or the special string 'stacktrace'.\n"
4689        "\t    Compound keys consisting of up to two fields can be specified\n"
4690        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4691        "\t    fields.  Sort keys consisting of up to two fields can be\n"
4692        "\t    specified using the 'sort' keyword.  The sort direction can\n"
4693        "\t    be modified by appending '.descending' or '.ascending' to a\n"
4694        "\t    sort field.  The 'size' parameter can be used to specify more\n"
4695        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4696        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4697        "\t    its histogram data will be shared with other triggers of the\n"
4698        "\t    same name, and trigger hits will update this common data.\n\n"
4699        "\t    Reading the 'hist' file for the event will dump the hash\n"
4700        "\t    table in its entirety to stdout.  If there are multiple hist\n"
4701        "\t    triggers attached to an event, there will be a table for each\n"
4702        "\t    trigger in the output.  The table displayed for a named\n"
4703        "\t    trigger will be the same as any other instance having the\n"
4704        "\t    same name.  The default format used to display a given field\n"
4705        "\t    can be modified by appending any of the following modifiers\n"
4706        "\t    to the field name, as applicable:\n\n"
4707        "\t            .hex        display a number as a hex value\n"
4708        "\t            .sym        display an address as a symbol\n"
4709        "\t            .sym-offset display an address as a symbol and offset\n"
4710        "\t            .execname   display a common_pid as a program name\n"
4711        "\t            .syscall    display a syscall id as a syscall name\n"
4712        "\t            .log2       display log2 value rather than raw number\n"
4713        "\t            .usecs      display a common_timestamp in microseconds\n\n"
4714        "\t    The 'pause' parameter can be used to pause an existing hist\n"
4715        "\t    trigger or to start a hist trigger but not log any events\n"
4716        "\t    until told to do so.  'continue' can be used to start or\n"
4717        "\t    restart a paused hist trigger.\n\n"
4718        "\t    The 'clear' parameter will clear the contents of a running\n"
4719        "\t    hist trigger and leave its current paused/active state\n"
4720        "\t    unchanged.\n\n"
4721        "\t    The enable_hist and disable_hist triggers can be used to\n"
4722        "\t    have one event conditionally start and stop another event's\n"
4723        "\t    already-attached hist trigger.  The syntax is analagous to\n"
4724        "\t    the enable_event and disable_event triggers.\n"
4725#endif
4726;
4727
4728static ssize_t
4729tracing_readme_read(struct file *filp, char __user *ubuf,
4730                       size_t cnt, loff_t *ppos)
4731{
4732        return simple_read_from_buffer(ubuf, cnt, ppos,
4733                                        readme_msg, strlen(readme_msg));
4734}
4735
4736static const struct file_operations tracing_readme_fops = {
4737        .open           = tracing_open_generic,
4738        .read           = tracing_readme_read,
4739        .llseek         = generic_file_llseek,
4740};
4741
4742static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4743{
4744        int *ptr = v;
4745
4746        if (*pos || m->count)
4747                ptr++;
4748
4749        (*pos)++;
4750
4751        for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4752                if (trace_find_tgid(*ptr))
4753                        return ptr;
4754        }
4755
4756        return NULL;
4757}
4758
4759static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4760{
4761        void *v;
4762        loff_t l = 0;
4763
4764        if (!tgid_map)
4765                return NULL;
4766
4767        v = &tgid_map[0];
4768        while (l <= *pos) {
4769                v = saved_tgids_next(m, v, &l);
4770                if (!v)
4771                        return NULL;
4772        }
4773
4774        return v;
4775}
4776
4777static void saved_tgids_stop(struct seq_file *m, void *v)
4778{
4779}
4780
4781static int saved_tgids_show(struct seq_file *m, void *v)
4782{
4783        int pid = (int *)v - tgid_map;
4784
4785        seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4786        return 0;
4787}
4788
4789static const struct seq_operations tracing_saved_tgids_seq_ops = {
4790        .start          = saved_tgids_start,
4791        .stop           = saved_tgids_stop,
4792        .next           = saved_tgids_next,
4793        .show           = saved_tgids_show,
4794};
4795
4796static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4797{
4798        if (tracing_disabled)
4799                return -ENODEV;
4800
4801        return seq_open(filp, &tracing_saved_tgids_seq_ops);
4802}
4803
4804
4805static const struct file_operations tracing_saved_tgids_fops = {
4806        .open           = tracing_saved_tgids_open,
4807        .read           = seq_read,
4808        .llseek         = seq_lseek,
4809        .release        = seq_release,
4810};
4811
4812static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4813{
4814        unsigned int *ptr = v;
4815
4816        if (*pos || m->count)
4817                ptr++;
4818
4819        (*pos)++;
4820
4821        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4822             ptr++) {
4823                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4824                        continue;
4825
4826                return ptr;
4827        }
4828
4829        return NULL;
4830}
4831
4832static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4833{
4834        void *v;
4835        loff_t l = 0;
4836
4837        preempt_disable();
4838        arch_spin_lock(&trace_cmdline_lock);
4839
4840        v = &savedcmd->map_cmdline_to_pid[0];
4841        while (l <= *pos) {
4842                v = saved_cmdlines_next(m, v, &l);
4843                if (!v)
4844                        return NULL;
4845        }
4846
4847        return v;
4848}
4849
4850static void saved_cmdlines_stop(struct seq_file *m, void *v)
4851{
4852        arch_spin_unlock(&trace_cmdline_lock);
4853        preempt_enable();
4854}
4855
4856static int saved_cmdlines_show(struct seq_file *m, void *v)
4857{
4858        char buf[TASK_COMM_LEN];
4859        unsigned int *pid = v;
4860
4861        __trace_find_cmdline(*pid, buf);
4862        seq_printf(m, "%d %s\n", *pid, buf);
4863        return 0;
4864}
4865
4866static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4867        .start          = saved_cmdlines_start,
4868        .next           = saved_cmdlines_next,
4869        .stop           = saved_cmdlines_stop,
4870        .show           = saved_cmdlines_show,
4871};
4872
4873static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4874{
4875        if (tracing_disabled)
4876                return -ENODEV;
4877
4878        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4879}
4880
4881static const struct file_operations tracing_saved_cmdlines_fops = {
4882        .open           = tracing_saved_cmdlines_open,
4883        .read           = seq_read,
4884        .llseek         = seq_lseek,
4885        .release        = seq_release,
4886};
4887
4888static ssize_t
4889tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4890                                 size_t cnt, loff_t *ppos)
4891{
4892        char buf[64];
4893        int r;
4894
4895        arch_spin_lock(&trace_cmdline_lock);
4896        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4897        arch_spin_unlock(&trace_cmdline_lock);
4898
4899        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4900}
4901
4902static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4903{
4904        kfree(s->saved_cmdlines);
4905        kfree(s->map_cmdline_to_pid);
4906        kfree(s);
4907}
4908
4909static int tracing_resize_saved_cmdlines(unsigned int val)
4910{
4911        struct saved_cmdlines_buffer *s, *savedcmd_temp;
4912
4913        s = kmalloc(sizeof(*s), GFP_KERNEL);
4914        if (!s)
4915                return -ENOMEM;
4916
4917        if (allocate_cmdlines_buffer(val, s) < 0) {
4918                kfree(s);
4919                return -ENOMEM;
4920        }
4921
4922        arch_spin_lock(&trace_cmdline_lock);
4923        savedcmd_temp = savedcmd;
4924        savedcmd = s;
4925        arch_spin_unlock(&trace_cmdline_lock);
4926        free_saved_cmdlines_buffer(savedcmd_temp);
4927
4928        return 0;
4929}
4930
4931static ssize_t
4932tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4933                                  size_t cnt, loff_t *ppos)
4934{
4935        unsigned long val;
4936        int ret;
4937
4938        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4939        if (ret)
4940                return ret;
4941
4942        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4943        if (!val || val > PID_MAX_DEFAULT)
4944                return -EINVAL;
4945
4946        ret = tracing_resize_saved_cmdlines((unsigned int)val);
4947        if (ret < 0)
4948                return ret;
4949
4950        *ppos += cnt;
4951
4952        return cnt;
4953}
4954
4955static const struct file_operations tracing_saved_cmdlines_size_fops = {
4956        .open           = tracing_open_generic,
4957        .read           = tracing_saved_cmdlines_size_read,
4958        .write          = tracing_saved_cmdlines_size_write,
4959};
4960
4961#ifdef CONFIG_TRACE_EVAL_MAP_FILE
4962static union trace_eval_map_item *
4963update_eval_map(union trace_eval_map_item *ptr)
4964{
4965        if (!ptr->map.eval_string) {
4966                if (ptr->tail.next) {
4967                        ptr = ptr->tail.next;
4968                        /* Set ptr to the next real item (skip head) */
4969                        ptr++;
4970                } else
4971                        return NULL;
4972        }
4973        return ptr;
4974}
4975
4976static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4977{
4978        union trace_eval_map_item *ptr = v;
4979
4980        /*
4981         * Paranoid! If ptr points to end, we don't want to increment past it.
4982         * This really should never happen.
4983         */
4984        ptr = update_eval_map(ptr);
4985        if (WARN_ON_ONCE(!ptr))
4986                return NULL;
4987
4988        ptr++;
4989
4990        (*pos)++;
4991
4992        ptr = update_eval_map(ptr);
4993
4994        return ptr;
4995}
4996
4997static void *eval_map_start(struct seq_file *m, loff_t *pos)
4998{
4999        union trace_eval_map_item *v;
5000        loff_t l = 0;
5001
5002        mutex_lock(&trace_eval_mutex);
5003
5004        v = trace_eval_maps;
5005        if (v)
5006                v++;
5007
5008        while (v && l < *pos) {
5009                v = eval_map_next(m, v, &l);
5010        }
5011
5012        return v;
5013}
5014
5015static void eval_map_stop(struct seq_file *m, void *v)
5016{
5017        mutex_unlock(&trace_eval_mutex);
5018}
5019
5020static int eval_map_show(struct seq_file *m, void *v)
5021{
5022        union trace_eval_map_item *ptr = v;
5023
5024        seq_printf(m, "%s %ld (%s)\n",
5025                   ptr->map.eval_string, ptr->map.eval_value,
5026                   ptr->map.system);
5027
5028        return 0;
5029}
5030
5031static const struct seq_operations tracing_eval_map_seq_ops = {
5032        .start          = eval_map_start,
5033        .next           = eval_map_next,
5034        .stop           = eval_map_stop,
5035        .show           = eval_map_show,
5036};
5037
5038static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5039{
5040        if (tracing_disabled)
5041                return -ENODEV;
5042
5043        return seq_open(filp, &tracing_eval_map_seq_ops);
5044}
5045
5046static const struct file_operations tracing_eval_map_fops = {
5047        .open           = tracing_eval_map_open,
5048        .read           = seq_read,
5049        .llseek         = seq_lseek,
5050        .release        = seq_release,
5051};
5052
5053static inline union trace_eval_map_item *
5054trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5055{
5056        /* Return tail of array given the head */
5057        return ptr + ptr->head.length + 1;
5058}
5059
5060static void
5061trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5062                           int len)
5063{
5064        struct trace_eval_map **stop;
5065        struct trace_eval_map **map;
5066        union trace_eval_map_item *map_array;
5067        union trace_eval_map_item *ptr;
5068
5069        stop = start + len;
5070
5071        /*
5072         * The trace_eval_maps contains the map plus a head and tail item,
5073         * where the head holds the module and length of array, and the
5074         * tail holds a pointer to the next list.
5075         */
5076        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5077        if (!map_array) {
5078                pr_warn("Unable to allocate trace eval mapping\n");
5079                return;
5080        }
5081
5082        mutex_lock(&trace_eval_mutex);
5083
5084        if (!trace_eval_maps)
5085                trace_eval_maps = map_array;
5086        else {
5087                ptr = trace_eval_maps;
5088                for (;;) {
5089                        ptr = trace_eval_jmp_to_tail(ptr);
5090                        if (!ptr->tail.next)
5091                                break;
5092                        ptr = ptr->tail.next;
5093
5094                }
5095                ptr->tail.next = map_array;
5096        }
5097        map_array->head.mod = mod;
5098        map_array->head.length = len;
5099        map_array++;
5100
5101        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5102                map_array->map = **map;
5103                map_array++;
5104        }
5105        memset(map_array, 0, sizeof(*map_array));
5106
5107        mutex_unlock(&trace_eval_mutex);
5108}
5109
5110static void trace_create_eval_file(struct dentry *d_tracer)
5111{
5112        trace_create_file("eval_map", 0444, d_tracer,
5113                          NULL, &tracing_eval_map_fops);
5114}
5115
5116#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5117static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5118static inline void trace_insert_eval_map_file(struct module *mod,
5119                              struct trace_eval_map **start, int len) { }
5120#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5121
5122static void trace_insert_eval_map(struct module *mod,
5123                                  struct trace_eval_map **start, int len)
5124{
5125        struct trace_eval_map **map;
5126
5127        if (len <= 0)
5128                return;
5129
5130        map = start;
5131
5132        trace_event_eval_update(map, len);
5133
5134        trace_insert_eval_map_file(mod, start, len);
5135}
5136
5137static ssize_t
5138tracing_set_trace_read(struct file *filp, char __user *ubuf,
5139                       size_t cnt, loff_t *ppos)
5140{
5141        struct trace_array *tr = filp->private_data;
5142        char buf[MAX_TRACER_SIZE+2];
5143        int r;
5144
5145        mutex_lock(&trace_types_lock);
5146        r = sprintf(buf, "%s\n", tr->current_trace->name);
5147        mutex_unlock(&trace_types_lock);
5148
5149        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5150}
5151
5152int tracer_init(struct tracer *t, struct trace_array *tr)
5153{
5154        tracing_reset_online_cpus(&tr->trace_buffer);
5155        return t->init(tr);
5156}
5157
5158static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5159{
5160        int cpu;
5161
5162        for_each_tracing_cpu(cpu)
5163                per_cpu_ptr(buf->data, cpu)->entries = val;
5164}
5165
5166#ifdef CONFIG_TRACER_MAX_TRACE
5167/* resize @tr's buffer to the size of @size_tr's entries */
5168static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5169                                        struct trace_buffer *size_buf, int cpu_id)
5170{
5171        int cpu, ret = 0;
5172
5173        if (cpu_id == RING_BUFFER_ALL_CPUS) {
5174                for_each_tracing_cpu(cpu) {
5175                        ret = ring_buffer_resize(trace_buf->buffer,
5176                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5177                        if (ret < 0)
5178                                break;
5179                        per_cpu_ptr(trace_buf->data, cpu)->entries =
5180                                per_cpu_ptr(size_buf->data, cpu)->entries;
5181                }
5182        } else {
5183                ret = ring_buffer_resize(trace_buf->buffer,
5184                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5185                if (ret == 0)
5186                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5187                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
5188        }
5189
5190        return ret;
5191}
5192#endif /* CONFIG_TRACER_MAX_TRACE */
5193
5194static int __tracing_resize_ring_buffer(struct trace_array *tr,
5195                                        unsigned long size, int cpu)
5196{
5197        int ret;
5198
5199        /*
5200         * If kernel or user changes the size of the ring buffer
5201         * we use the size that was given, and we can forget about
5202         * expanding it later.
5203         */
5204        ring_buffer_expanded = true;
5205
5206        /* May be called before buffers are initialized */
5207        if (!tr->trace_buffer.buffer)
5208                return 0;
5209
5210        ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5211        if (ret < 0)
5212                return ret;
5213
5214#ifdef CONFIG_TRACER_MAX_TRACE
5215        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5216            !tr->current_trace->use_max_tr)
5217                goto out;
5218
5219        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5220        if (ret < 0) {
5221                int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5222                                                     &tr->trace_buffer, cpu);
5223                if (r < 0) {
5224                        /*
5225                         * AARGH! We are left with different
5226                         * size max buffer!!!!
5227                         * The max buffer is our "snapshot" buffer.
5228                         * When a tracer needs a snapshot (one of the
5229                         * latency tracers), it swaps the max buffer
5230                         * with the saved snap shot. We succeeded to
5231                         * update the size of the main buffer, but failed to
5232                         * update the size of the max buffer. But when we tried
5233                         * to reset the main buffer to the original size, we
5234                         * failed there too. This is very unlikely to
5235                         * happen, but if it does, warn and kill all
5236                         * tracing.
5237                         */
5238                        WARN_ON(1);
5239                        tracing_disabled = 1;
5240                }
5241                return ret;
5242        }
5243
5244        if (cpu == RING_BUFFER_ALL_CPUS)
5245                set_buffer_entries(&tr->max_buffer, size);
5246        else
5247                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5248
5249 out:
5250#endif /* CONFIG_TRACER_MAX_TRACE */
5251
5252        if (cpu == RING_BUFFER_ALL_CPUS)
5253                set_buffer_entries(&tr->trace_buffer, size);
5254        else
5255                per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5256
5257        return ret;
5258}
5259
5260static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5261                                          unsigned long size, int cpu_id)
5262{
5263        int ret = size;
5264
5265        mutex_lock(&trace_types_lock);
5266
5267        if (cpu_id != RING_BUFFER_ALL_CPUS) {
5268                /* make sure, this cpu is enabled in the mask */
5269                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5270                        ret = -EINVAL;
5271                        goto out;
5272                }
5273        }
5274
5275        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5276        if (ret < 0)
5277                ret = -ENOMEM;
5278
5279out:
5280        mutex_unlock(&trace_types_lock);
5281
5282        return ret;
5283}
5284
5285
5286/**
5287 * tracing_update_buffers - used by tracing facility to expand ring buffers
5288 *
5289 * To save on memory when the tracing is never used on a system with it
5290 * configured in. The ring buffers are set to a minimum size. But once
5291 * a user starts to use the tracing facility, then they need to grow
5292 * to their default size.
5293 *
5294 * This function is to be called when a tracer is about to be used.
5295 */
5296int tracing_update_buffers(void)
5297{
5298        int ret = 0;
5299
5300        mutex_lock(&trace_types_lock);
5301        if (!ring_buffer_expanded)
5302                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5303                                                RING_BUFFER_ALL_CPUS);
5304        mutex_unlock(&trace_types_lock);
5305
5306        return ret;
5307}
5308
5309struct trace_option_dentry;
5310
5311static void
5312create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5313
5314/*
5315 * Used to clear out the tracer before deletion of an instance.
5316 * Must have trace_types_lock held.
5317 */
5318static void tracing_set_nop(struct trace_array *tr)
5319{
5320        if (tr->current_trace == &nop_trace)
5321                return;
5322        
5323        tr->current_trace->enabled--;
5324
5325        if (tr->current_trace->reset)
5326                tr->current_trace->reset(tr);
5327
5328        tr->current_trace = &nop_trace;
5329}
5330
5331static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5332{
5333        /* Only enable if the directory has been created already. */
5334        if (!tr->dir)
5335                return;
5336
5337        create_trace_option_files(tr, t);
5338}
5339
5340static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5341{
5342        struct tracer *t;
5343#ifdef CONFIG_TRACER_MAX_TRACE
5344        bool had_max_tr;
5345#endif
5346        int ret = 0;
5347
5348        mutex_lock(&trace_types_lock);
5349
5350        if (!ring_buffer_expanded) {
5351                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5352                                                RING_BUFFER_ALL_CPUS);
5353                if (ret < 0)
5354                        goto out;
5355                ret = 0;
5356        }
5357
5358        for (t = trace_types; t; t = t->next) {
5359                if (strcmp(t->name, buf) == 0)
5360                        break;
5361        }
5362        if (!t) {
5363                ret = -EINVAL;
5364                goto out;
5365        }
5366        if (t == tr->current_trace)
5367                goto out;
5368
5369        /* Some tracers won't work on kernel command line */
5370        if (system_state < SYSTEM_RUNNING && t->noboot) {
5371                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5372                        t->name);
5373                goto out;
5374        }
5375
5376        /* Some tracers are only allowed for the top level buffer */
5377        if (!trace_ok_for_array(t, tr)) {
5378                ret = -EINVAL;
5379                goto out;
5380        }
5381
5382        /* If trace pipe files are being read, we can't change the tracer */
5383        if (tr->current_trace->ref) {
5384                ret = -EBUSY;
5385                goto out;
5386        }
5387
5388        trace_branch_disable();
5389
5390        tr->current_trace->enabled--;
5391
5392        if (tr->current_trace->reset)
5393                tr->current_trace->reset(tr);
5394
5395        /* Current trace needs to be nop_trace before synchronize_sched */
5396        tr->current_trace = &nop_trace;
5397
5398#ifdef CONFIG_TRACER_MAX_TRACE
5399        had_max_tr = tr->allocated_snapshot;
5400
5401        if (had_max_tr && !t->use_max_tr) {
5402                /*
5403                 * We need to make sure that the update_max_tr sees that
5404                 * current_trace changed to nop_trace to keep it from
5405                 * swapping the buffers after we resize it.
5406                 * The update_max_tr is called from interrupts disabled
5407                 * so a synchronized_sched() is sufficient.
5408                 */
5409                synchronize_sched();
5410                free_snapshot(tr);
5411        }
5412#endif
5413
5414#ifdef CONFIG_TRACER_MAX_TRACE
5415        if (t->use_max_tr && !had_max_tr) {
5416                ret = tracing_alloc_snapshot_instance(tr);
5417                if (ret < 0)
5418                        goto out;
5419        }
5420#endif
5421
5422        if (t->init) {
5423                ret = tracer_init(t, tr);
5424                if (ret)
5425                        goto out;
5426        }
5427
5428        tr->current_trace = t;
5429        tr->current_trace->enabled++;
5430        trace_branch_enable(tr);
5431 out:
5432        mutex_unlock(&trace_types_lock);
5433
5434        return ret;
5435}
5436
5437static ssize_t
5438tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5439                        size_t cnt, loff_t *ppos)
5440{
5441        struct trace_array *tr = filp->private_data;
5442        char buf[MAX_TRACER_SIZE+1];
5443        int i;
5444        size_t ret;
5445        int err;
5446
5447        ret = cnt;
5448
5449        if (cnt > MAX_TRACER_SIZE)
5450                cnt = MAX_TRACER_SIZE;
5451
5452        if (copy_from_user(buf, ubuf, cnt))
5453                return -EFAULT;
5454
5455        buf[cnt] = 0;
5456
5457        /* strip ending whitespace. */
5458        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5459                buf[i] = 0;
5460
5461        err = tracing_set_tracer(tr, buf);
5462        if (err)
5463                return err;
5464
5465        *ppos += ret;
5466
5467        return ret;
5468}
5469
5470static ssize_t
5471tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5472                   size_t cnt, loff_t *ppos)
5473{
5474        char buf[64];
5475        int r;
5476
5477        r = snprintf(buf, sizeof(buf), "%ld\n",
5478                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5479        if (r > sizeof(buf))
5480                r = sizeof(buf);
5481        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5482}
5483
5484static ssize_t
5485tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5486                    size_t cnt, loff_t *ppos)
5487{
5488        unsigned long val;
5489        int ret;
5490
5491        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5492        if (ret)
5493                return ret;
5494
5495        *ptr = val * 1000;
5496
5497        return cnt;
5498}
5499
5500static ssize_t
5501tracing_thresh_read(struct file *filp, char __user *ubuf,
5502                    size_t cnt, loff_t *ppos)
5503{
5504        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5505}
5506
5507static ssize_t
5508tracing_thresh_write(struct file *filp, const char __user *ubuf,
5509                     size_t cnt, loff_t *ppos)
5510{
5511        struct trace_array *tr = filp->private_data;
5512        int ret;
5513
5514        mutex_lock(&trace_types_lock);
5515        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5516        if (ret < 0)
5517                goto out;
5518
5519        if (tr->current_trace->update_thresh) {
5520                ret = tr->current_trace->update_thresh(tr);
5521                if (ret < 0)
5522                        goto out;
5523        }
5524
5525        ret = cnt;
5526out:
5527        mutex_unlock(&trace_types_lock);
5528
5529        return ret;
5530}
5531
5532#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5533
5534static ssize_t
5535tracing_max_lat_read(struct file *filp, char __user *ubuf,
5536                     size_t cnt, loff_t *ppos)
5537{
5538        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5539}
5540
5541static ssize_t
5542tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5543                      size_t cnt, loff_t *ppos)
5544{
5545        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5546}
5547
5548#endif
5549
5550static int tracing_open_pipe(struct inode *inode, struct file *filp)
5551{
5552        struct trace_array *tr = inode->i_private;
5553        struct trace_iterator *iter;
5554        int ret = 0;
5555
5556        if (tracing_disabled)
5557                return -ENODEV;
5558
5559        if (trace_array_get(tr) < 0)
5560                return -ENODEV;
5561
5562        mutex_lock(&trace_types_lock);
5563
5564        /* create a buffer to store the information to pass to userspace */
5565        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5566        if (!iter) {
5567                ret = -ENOMEM;
5568                __trace_array_put(tr);
5569                goto out;
5570        }
5571
5572        trace_seq_init(&iter->seq);
5573        iter->trace = tr->current_trace;
5574
5575        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5576                ret = -ENOMEM;
5577                goto fail;
5578        }
5579
5580        /* trace pipe does not show start of buffer */
5581        cpumask_setall(iter->started);
5582
5583        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5584                iter->iter_flags |= TRACE_FILE_LAT_FMT;
5585
5586        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5587        if (trace_clocks[tr->clock_id].in_ns)
5588                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5589
5590        iter->tr = tr;
5591        iter->trace_buffer = &tr->trace_buffer;
5592        iter->cpu_file = tracing_get_cpu(inode);
5593        mutex_init(&iter->mutex);
5594        filp->private_data = iter;
5595
5596        if (iter->trace->pipe_open)
5597                iter->trace->pipe_open(iter);
5598
5599        nonseekable_open(inode, filp);
5600
5601        tr->current_trace->ref++;
5602out:
5603        mutex_unlock(&trace_types_lock);
5604        return ret;
5605
5606fail:
5607        kfree(iter->trace);
5608        kfree(iter);
5609        __trace_array_put(tr);
5610        mutex_unlock(&trace_types_lock);
5611        return ret;
5612}
5613
5614static int tracing_release_pipe(struct inode *inode, struct file *file)
5615{
5616        struct trace_iterator *iter = file->private_data;
5617        struct trace_array *tr = inode->i_private;
5618
5619        mutex_lock(&trace_types_lock);
5620
5621        tr->current_trace->ref--;
5622
5623        if (iter->trace->pipe_close)
5624                iter->trace->pipe_close(iter);
5625
5626        mutex_unlock(&trace_types_lock);
5627
5628        free_cpumask_var(iter->started);
5629        mutex_destroy(&iter->mutex);
5630        kfree(iter);
5631
5632        trace_array_put(tr);
5633
5634        return 0;
5635}
5636
5637static __poll_t
5638trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5639{
5640        struct trace_array *tr = iter->tr;
5641
5642        /* Iterators are static, they should be filled or empty */
5643        if (trace_buffer_iter(iter, iter->cpu_file))
5644                return EPOLLIN | EPOLLRDNORM;
5645
5646        if (tr->trace_flags & TRACE_ITER_BLOCK)
5647                /*
5648                 * Always select as readable when in blocking mode
5649                 */
5650                return EPOLLIN | EPOLLRDNORM;
5651        else
5652                return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5653                                             filp, poll_table);
5654}
5655
5656static __poll_t
5657tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5658{
5659        struct trace_iterator *iter = filp->private_data;
5660
5661        return trace_poll(iter, filp, poll_table);
5662}
5663
5664/* Must be called with iter->mutex held. */
5665static int tracing_wait_pipe(struct file *filp)
5666{
5667        struct trace_iterator *iter = filp->private_data;
5668        int ret;
5669
5670        while (trace_empty(iter)) {
5671
5672                if ((filp->f_flags & O_NONBLOCK)) {
5673                        return -EAGAIN;
5674                }
5675
5676                /*
5677                 * We block until we read something and tracing is disabled.
5678                 * We still block if tracing is disabled, but we have never
5679                 * read anything. This allows a user to cat this file, and
5680                 * then enable tracing. But after we have read something,
5681                 * we give an EOF when tracing is again disabled.
5682                 *
5683                 * iter->pos will be 0 if we haven't read anything.
5684                 */
5685                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5686                        break;
5687
5688                mutex_unlock(&iter->mutex);
5689
5690                ret = wait_on_pipe(iter, false);
5691
5692                mutex_lock(&iter->mutex);
5693
5694                if (ret)
5695                        return ret;
5696        }
5697
5698        return 1;
5699}
5700
5701/*
5702 * Consumer reader.
5703 */
5704static ssize_t
5705tracing_read_pipe(struct file *filp, char __user *ubuf,
5706                  size_t cnt, loff_t *ppos)
5707{
5708        struct trace_iterator *iter = filp->private_data;
5709        ssize_t sret;
5710
5711        /*
5712         * Avoid more than one consumer on a single file descriptor
5713         * This is just a matter of traces coherency, the ring buffer itself
5714         * is protected.
5715         */
5716        mutex_lock(&iter->mutex);
5717
5718        /* return any leftover data */
5719        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5720        if (sret != -EBUSY)
5721                goto out;
5722
5723        trace_seq_init(&iter->seq);
5724
5725        if (iter->trace->read) {
5726                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5727                if (sret)
5728                        goto out;
5729        }
5730
5731waitagain:
5732        sret = tracing_wait_pipe(filp);
5733        if (sret <= 0)
5734                goto out;
5735
5736        /* stop when tracing is finished */
5737        if (trace_empty(iter)) {
5738                sret = 0;
5739                goto out;
5740        }
5741
5742        if (cnt >= PAGE_SIZE)
5743                cnt = PAGE_SIZE - 1;
5744
5745        /* reset all but tr, trace, and overruns */
5746        memset(&iter->seq, 0,
5747               sizeof(struct trace_iterator) -
5748               offsetof(struct trace_iterator, seq));
5749        cpumask_clear(iter->started);
5750        iter->pos = -1;
5751
5752        trace_event_read_lock();
5753        trace_access_lock(iter->cpu_file);
5754        while (trace_find_next_entry_inc(iter) != NULL) {
5755                enum print_line_t ret;
5756                int save_len = iter->seq.seq.len;
5757
5758                ret = print_trace_line(iter);
5759                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5760                        /* don't print partial lines */
5761                        iter->seq.seq.len = save_len;
5762                        break;
5763                }
5764                if (ret != TRACE_TYPE_NO_CONSUME)
5765                        trace_consume(iter);
5766
5767                if (trace_seq_used(&iter->seq) >= cnt)
5768                        break;
5769
5770                /*
5771                 * Setting the full flag means we reached the trace_seq buffer
5772                 * size and we should leave by partial output condition above.
5773                 * One of the trace_seq_* functions is not used properly.
5774                 */
5775                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5776                          iter->ent->type);
5777        }
5778        trace_access_unlock(iter->cpu_file);
5779        trace_event_read_unlock();
5780
5781        /* Now copy what we have to the user */
5782        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5783        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5784                trace_seq_init(&iter->seq);
5785
5786        /*
5787         * If there was nothing to send to user, in spite of consuming trace
5788         * entries, go back to wait for more entries.
5789         */
5790        if (sret == -EBUSY)
5791                goto waitagain;
5792
5793out:
5794        mutex_unlock(&iter->mutex);
5795
5796        return sret;
5797}
5798
5799static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5800                                     unsigned int idx)
5801{
5802        __free_page(spd->pages[idx]);
5803}
5804
5805static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5806        .can_merge              = 0,
5807        .confirm                = generic_pipe_buf_confirm,
5808        .release                = generic_pipe_buf_release,
5809        .steal                  = generic_pipe_buf_steal,
5810        .get                    = generic_pipe_buf_get,
5811};
5812
5813static size_t
5814tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5815{
5816        size_t count;
5817        int save_len;
5818        int ret;
5819
5820        /* Seq buffer is page-sized, exactly what we need. */
5821        for (;;) {
5822                save_len = iter->seq.seq.len;
5823                ret = print_trace_line(iter);
5824
5825                if (trace_seq_has_overflowed(&iter->seq)) {
5826                        iter->seq.seq.len = save_len;
5827                        break;
5828                }
5829
5830                /*
5831                 * This should not be hit, because it should only
5832                 * be set if the iter->seq overflowed. But check it
5833                 * anyway to be safe.
5834                 */
5835                if (ret == TRACE_TYPE_PARTIAL_LINE) {
5836                        iter->seq.seq.len = save_len;
5837                        break;
5838                }
5839
5840                count = trace_seq_used(&iter->seq) - save_len;
5841                if (rem < count) {
5842                        rem = 0;
5843                        iter->seq.seq.len = save_len;
5844                        break;
5845                }
5846
5847                if (ret != TRACE_TYPE_NO_CONSUME)
5848                        trace_consume(iter);
5849                rem -= count;
5850                if (!trace_find_next_entry_inc(iter))   {
5851                        rem = 0;
5852                        iter->ent = NULL;
5853                        break;
5854                }
5855        }
5856
5857        return rem;
5858}
5859
5860static ssize_t tracing_splice_read_pipe(struct file *filp,
5861                                        loff_t *ppos,
5862                                        struct pipe_inode_info *pipe,
5863                                        size_t len,
5864                                        unsigned int flags)
5865{
5866        struct page *pages_def[PIPE_DEF_BUFFERS];
5867        struct partial_page partial_def[PIPE_DEF_BUFFERS];
5868        struct trace_iterator *iter = filp->private_data;
5869        struct splice_pipe_desc spd = {
5870                .pages          = pages_def,
5871                .partial        = partial_def,
5872                .nr_pages       = 0, /* This gets updated below. */
5873                .nr_pages_max   = PIPE_DEF_BUFFERS,
5874                .ops            = &tracing_pipe_buf_ops,
5875                .spd_release    = tracing_spd_release_pipe,
5876        };
5877        ssize_t ret;
5878        size_t rem;
5879        unsigned int i;
5880
5881        if (splice_grow_spd(pipe, &spd))
5882                return -ENOMEM;
5883
5884        mutex_lock(&iter->mutex);
5885
5886        if (iter->trace->splice_read) {
5887                ret = iter->trace->splice_read(iter, filp,
5888                                               ppos, pipe, len, flags);
5889                if (ret)
5890                        goto out_err;
5891        }
5892
5893        ret = tracing_wait_pipe(filp);
5894        if (ret <= 0)
5895                goto out_err;
5896
5897        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5898                ret = -EFAULT;
5899                goto out_err;
5900        }
5901
5902        trace_event_read_lock();
5903        trace_access_lock(iter->cpu_file);
5904
5905        /* Fill as many pages as possible. */
5906        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5907                spd.pages[i] = alloc_page(GFP_KERNEL);
5908                if (!spd.pages[i])
5909                        break;
5910
5911                rem = tracing_fill_pipe_page(rem, iter);
5912
5913                /* Copy the data into the page, so we can start over. */
5914                ret = trace_seq_to_buffer(&iter->seq,
5915                                          page_address(spd.pages[i]),
5916                                          trace_seq_used(&iter->seq));
5917                if (ret < 0) {
5918                        __free_page(spd.pages[i]);
5919                        break;
5920                }
5921                spd.partial[i].offset = 0;
5922                spd.partial[i].len = trace_seq_used(&iter->seq);
5923
5924                trace_seq_init(&iter->seq);
5925        }
5926
5927        trace_access_unlock(iter->cpu_file);
5928        trace_event_read_unlock();
5929        mutex_unlock(&iter->mutex);
5930
5931        spd.nr_pages = i;
5932
5933        if (i)
5934                ret = splice_to_pipe(pipe, &spd);
5935        else
5936                ret = 0;
5937out:
5938        splice_shrink_spd(&spd);
5939        return ret;
5940
5941out_err:
5942        mutex_unlock(&iter->mutex);
5943        goto out;
5944}
5945
5946static ssize_t
5947tracing_entries_read(struct file *filp, char __user *ubuf,
5948                     size_t cnt, loff_t *ppos)
5949{
5950        struct inode *inode = file_inode(filp);
5951        struct trace_array *tr = inode->i_private;
5952        int cpu = tracing_get_cpu(inode);
5953        char buf[64];
5954        int r = 0;
5955        ssize_t ret;
5956
5957        mutex_lock(&trace_types_lock);
5958
5959        if (cpu == RING_BUFFER_ALL_CPUS) {
5960                int cpu, buf_size_same;
5961                unsigned long size;
5962
5963                size = 0;
5964                buf_size_same = 1;
5965                /* check if all cpu sizes are same */
5966                for_each_tracing_cpu(cpu) {
5967                        /* fill in the size from first enabled cpu */
5968                        if (size == 0)
5969                                size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5970                        if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5971                                buf_size_same = 0;
5972                                break;
5973                        }
5974                }
5975
5976                if (buf_size_same) {
5977                        if (!ring_buffer_expanded)
5978                                r = sprintf(buf, "%lu (expanded: %lu)\n",
5979                                            size >> 10,
5980                                            trace_buf_size >> 10);
5981                        else
5982                                r = sprintf(buf, "%lu\n", size >> 10);
5983                } else
5984                        r = sprintf(buf, "X\n");
5985        } else
5986                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5987
5988        mutex_unlock(&trace_types_lock);
5989
5990        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5991        return ret;
5992}
5993
5994static ssize_t
5995tracing_entries_write(struct file *filp, const char __user *ubuf,
5996                      size_t cnt, loff_t *ppos)
5997{
5998        struct inode *inode = file_inode(filp);
5999        struct trace_array *tr = inode->i_private;
6000        unsigned long val;
6001        int ret;
6002
6003        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6004        if (ret)
6005                return ret;
6006
6007        /* must have at least 1 entry */
6008        if (!val)
6009                return -EINVAL;
6010
6011        /* value is in KB */
6012        val <<= 10;
6013        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6014        if (ret < 0)
6015                return ret;
6016
6017        *ppos += cnt;
6018
6019        return cnt;
6020}
6021
6022static ssize_t
6023tracing_total_entries_read(struct file *filp, char __user *ubuf,
6024                                size_t cnt, loff_t *ppos)
6025{
6026        struct trace_array *tr = filp->private_data;
6027        char buf[64];
6028        int r, cpu;
6029        unsigned long size = 0, expanded_size = 0;
6030
6031        mutex_lock(&trace_types_lock);
6032        for_each_tracing_cpu(cpu) {
6033                size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6034                if (!ring_buffer_expanded)
6035                        expanded_size += trace_buf_size >> 10;
6036        }
6037        if (ring_buffer_expanded)
6038                r = sprintf(buf, "%lu\n", size);
6039        else
6040                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6041        mutex_unlock(&trace_types_lock);
6042
6043        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6044}
6045
6046static ssize_t
6047tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6048                          size_t cnt, loff_t *ppos)
6049{
6050        /*
6051         * There is no need to read what the user has written, this function
6052         * is just to make sure that there is no error when "echo" is used
6053         */
6054
6055        *ppos += cnt;
6056
6057        return cnt;
6058}
6059
6060static int
6061tracing_free_buffer_release(struct inode *inode, struct file *filp)
6062{
6063        struct trace_array *tr = inode->i_private;
6064
6065        /* disable tracing ? */
6066        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6067                tracer_tracing_off(tr);
6068        /* resize the ring buffer to 0 */
6069        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6070
6071        trace_array_put(tr);
6072
6073        return 0;
6074}
6075
6076static ssize_t
6077tracing_mark_write(struct file *filp, const char __user *ubuf,
6078                                        size_t cnt, loff_t *fpos)
6079{
6080        struct trace_array *tr = filp->private_data;
6081        struct ring_buffer_event *event;
6082        enum event_trigger_type tt = ETT_NONE;
6083        struct ring_buffer *buffer;
6084        struct print_entry *entry;
6085        unsigned long irq_flags;
6086        const char faulted[] = "<faulted>";
6087        ssize_t written;
6088        int size;
6089        int len;
6090
6091/* Used in tracing_mark_raw_write() as well */
6092#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6093
6094        if (tracing_disabled)
6095                return -EINVAL;
6096
6097        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6098                return -EINVAL;
6099
6100        if (cnt > TRACE_BUF_SIZE)
6101                cnt = TRACE_BUF_SIZE;
6102
6103        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6104
6105        local_save_flags(irq_flags);
6106        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6107
6108        /* If less than "<faulted>", then make sure we can still add that */
6109        if (cnt < FAULTED_SIZE)
6110                size += FAULTED_SIZE - cnt;
6111
6112        buffer = tr->trace_buffer.buffer;
6113        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6114                                            irq_flags, preempt_count());
6115        if (unlikely(!event))
6116                /* Ring buffer disabled, return as if not open for write */
6117                return -EBADF;
6118
6119        entry = ring_buffer_event_data(event);
6120        entry->ip = _THIS_IP_;
6121
6122        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6123        if (len) {
6124                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6125                cnt = FAULTED_SIZE;
6126                written = -EFAULT;
6127        } else
6128                written = cnt;
6129        len = cnt;
6130
6131        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6132                /* do not add \n before testing triggers, but add \0 */
6133                entry->buf[cnt] = '\0';
6134                tt = event_triggers_call(tr->trace_marker_file, entry, event);
6135        }
6136
6137        if (entry->buf[cnt - 1] != '\n') {
6138                entry->buf[cnt] = '\n';
6139                entry->buf[cnt + 1] = '\0';
6140        } else
6141                entry->buf[cnt] = '\0';
6142
6143        __buffer_unlock_commit(buffer, event);
6144
6145        if (tt)
6146                event_triggers_post_call(tr->trace_marker_file, tt);
6147
6148        if (written > 0)
6149                *fpos += written;
6150
6151        return written;
6152}
6153
6154/* Limit it for now to 3K (including tag) */
6155#define RAW_DATA_MAX_SIZE (1024*3)
6156
6157static ssize_t
6158tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6159                                        size_t cnt, loff_t *fpos)
6160{
6161        struct trace_array *tr = filp->private_data;
6162        struct ring_buffer_event *event;
6163        struct ring_buffer *buffer;
6164        struct raw_data_entry *entry;
6165        const char faulted[] = "<faulted>";
6166        unsigned long irq_flags;
6167        ssize_t written;
6168        int size;
6169        int len;
6170
6171#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6172
6173        if (tracing_disabled)
6174                return -EINVAL;
6175
6176        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6177                return -EINVAL;
6178
6179        /* The marker must at least have a tag id */
6180        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6181                return -EINVAL;
6182
6183        if (cnt > TRACE_BUF_SIZE)
6184                cnt = TRACE_BUF_SIZE;
6185
6186        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6187
6188        local_save_flags(irq_flags);
6189        size = sizeof(*entry) + cnt;
6190        if (cnt < FAULT_SIZE_ID)
6191                size += FAULT_SIZE_ID - cnt;
6192
6193        buffer = tr->trace_buffer.buffer;
6194        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6195                                            irq_flags, preempt_count());
6196        if (!event)
6197                /* Ring buffer disabled, return as if not open for write */
6198                return -EBADF;
6199
6200        entry = ring_buffer_event_data(event);
6201
6202        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6203        if (len) {
6204                entry->id = -1;
6205                memcpy(&entry->buf, faulted, FAULTED_SIZE);
6206                written = -EFAULT;
6207        } else
6208                written = cnt;
6209
6210        __buffer_unlock_commit(buffer, event);
6211
6212        if (written > 0)
6213                *fpos += written;
6214
6215        return written;
6216}
6217
6218static int tracing_clock_show(struct seq_file *m, void *v)
6219{
6220        struct trace_array *tr = m->private;
6221        int i;
6222
6223        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6224                seq_printf(m,
6225                        "%s%s%s%s", i ? " " : "",
6226                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6227                        i == tr->clock_id ? "]" : "");
6228        seq_putc(m, '\n');
6229
6230        return 0;
6231}
6232
6233int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6234{
6235        int i;
6236
6237        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6238                if (strcmp(trace_clocks[i].name, clockstr) == 0)
6239                        break;
6240        }
6241        if (i == ARRAY_SIZE(trace_clocks))
6242                return -EINVAL;
6243
6244        mutex_lock(&trace_types_lock);
6245
6246        tr->clock_id = i;
6247
6248        ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6249
6250        /*
6251         * New clock may not be consistent with the previous clock.
6252         * Reset the buffer so that it doesn't have incomparable timestamps.
6253         */
6254        tracing_reset_online_cpus(&tr->trace_buffer);
6255
6256#ifdef CONFIG_TRACER_MAX_TRACE
6257        if (tr->max_buffer.buffer)
6258                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6259        tracing_reset_online_cpus(&tr->max_buffer);
6260#endif
6261
6262        mutex_unlock(&trace_types_lock);
6263
6264        return 0;
6265}
6266
6267static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6268                                   size_t cnt, loff_t *fpos)
6269{
6270        struct seq_file *m = filp->private_data;
6271        struct trace_array *tr = m->private;
6272        char buf[64];
6273        const char *clockstr;
6274        int ret;
6275
6276        if (cnt >= sizeof(buf))
6277                return -EINVAL;
6278
6279        if (copy_from_user(buf, ubuf, cnt))
6280                return -EFAULT;
6281
6282        buf[cnt] = 0;
6283
6284        clockstr = strstrip(buf);
6285
6286        ret = tracing_set_clock(tr, clockstr);
6287        if (ret)
6288                return ret;
6289
6290        *fpos += cnt;
6291
6292        return cnt;
6293}
6294
6295static int tracing_clock_open(struct inode *inode, struct file *file)
6296{
6297        struct trace_array *tr = inode->i_private;
6298        int ret;
6299
6300        if (tracing_disabled)
6301                return -ENODEV;
6302
6303        if (trace_array_get(tr))
6304                return -ENODEV;
6305
6306        ret = single_open(file, tracing_clock_show, inode->i_private);
6307        if (ret < 0)
6308                trace_array_put(tr);
6309
6310        return ret;
6311}
6312
6313static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6314{
6315        struct trace_array *tr = m->private;
6316
6317        mutex_lock(&trace_types_lock);
6318
6319        if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6320                seq_puts(m, "delta [absolute]\n");
6321        else
6322                seq_puts(m, "[delta] absolute\n");
6323
6324        mutex_unlock(&trace_types_lock);
6325
6326        return 0;
6327}
6328
6329static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6330{
6331        struct trace_array *tr = inode->i_private;
6332        int ret;
6333
6334        if (tracing_disabled)
6335                return -ENODEV;
6336
6337        if (trace_array_get(tr))
6338                return -ENODEV;
6339
6340        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6341        if (ret < 0)
6342                trace_array_put(tr);
6343
6344        return ret;
6345}
6346
6347int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6348{
6349        int ret = 0;
6350
6351        mutex_lock(&trace_types_lock);
6352
6353        if (abs && tr->time_stamp_abs_ref++)
6354                goto out;
6355
6356        if (!abs) {
6357                if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6358                        ret = -EINVAL;
6359                        goto out;
6360                }
6361
6362                if (--tr->time_stamp_abs_ref)
6363                        goto out;
6364        }
6365
6366        ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6367
6368#ifdef CONFIG_TRACER_MAX_TRACE
6369        if (tr->max_buffer.buffer)
6370                ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6371#endif
6372 out:
6373        mutex_unlock(&trace_types_lock);
6374
6375        return ret;
6376}
6377
6378struct ftrace_buffer_info {
6379        struct trace_iterator   iter;
6380        void                    *spare;
6381        unsigned int            spare_cpu;
6382        unsigned int            read;
6383};
6384
6385#ifdef CONFIG_TRACER_SNAPSHOT
6386static int tracing_snapshot_open(struct inode *inode, struct file *file)
6387{
6388        struct trace_array *tr = inode->i_private;
6389        struct trace_iterator *iter;
6390        struct seq_file *m;
6391        int ret = 0;
6392
6393        if (trace_array_get(tr) < 0)
6394                return -ENODEV;
6395
6396        if (file->f_mode & FMODE_READ) {
6397                iter = __tracing_open(inode, file, true);
6398                if (IS_ERR(iter))
6399                        ret = PTR_ERR(iter);
6400        } else {
6401                /* Writes still need the seq_file to hold the private data */
6402                ret = -ENOMEM;
6403                m = kzalloc(sizeof(*m), GFP_KERNEL);
6404                if (!m)
6405                        goto out;
6406                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6407                if (!iter) {
6408                        kfree(m);
6409                        goto out;
6410                }
6411                ret = 0;
6412
6413                iter->tr = tr;
6414                iter->trace_buffer = &tr->max_buffer;
6415                iter->cpu_file = tracing_get_cpu(inode);
6416                m->private = iter;
6417                file->private_data = m;
6418        }
6419out:
6420        if (ret < 0)
6421                trace_array_put(tr);
6422
6423        return ret;
6424}
6425
6426static ssize_t
6427tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6428                       loff_t *ppos)
6429{
6430        struct seq_file *m = filp->private_data;
6431        struct trace_iterator *iter = m->private;
6432        struct trace_array *tr = iter->tr;
6433        unsigned long val;
6434        int ret;
6435
6436        ret = tracing_update_buffers();
6437        if (ret < 0)
6438                return ret;
6439
6440        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6441        if (ret)
6442                return ret;
6443
6444        mutex_lock(&trace_types_lock);
6445
6446        if (tr->current_trace->use_max_tr) {
6447                ret = -EBUSY;
6448                goto out;
6449        }
6450
6451        switch (val) {
6452        case 0:
6453                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6454                        ret = -EINVAL;
6455                        break;
6456                }
6457                if (tr->allocated_snapshot)
6458                        free_snapshot(tr);
6459                break;
6460        case 1:
6461/* Only allow per-cpu swap if the ring buffer supports it */
6462#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6463                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6464                        ret = -EINVAL;
6465                        break;
6466                }
6467#endif
6468                if (!tr->allocated_snapshot) {
6469                        ret = tracing_alloc_snapshot_instance(tr);
6470                        if (ret < 0)
6471                                break;
6472                }
6473                local_irq_disable();
6474                /* Now, we're going to swap */
6475                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6476                        update_max_tr(tr, current, smp_processor_id());
6477                else
6478                        update_max_tr_single(tr, current, iter->cpu_file);
6479                local_irq_enable();
6480                break;
6481        default:
6482                if (tr->allocated_snapshot) {
6483                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6484                                tracing_reset_online_cpus(&tr->max_buffer);
6485                        else
6486                                tracing_reset(&tr->max_buffer, iter->cpu_file);
6487                }
6488                break;
6489        }
6490
6491        if (ret >= 0) {
6492                *ppos += cnt;
6493                ret = cnt;
6494        }
6495out:
6496        mutex_unlock(&trace_types_lock);
6497        return ret;
6498}
6499
6500static int tracing_snapshot_release(struct inode *inode, struct file *file)
6501{
6502        struct seq_file *m = file->private_data;
6503        int ret;
6504
6505        ret = tracing_release(inode, file);
6506
6507        if (file->f_mode & FMODE_READ)
6508                return ret;
6509
6510        /* If write only, the seq_file is just a stub */
6511        if (m)
6512                kfree(m->private);
6513        kfree(m);
6514
6515        return 0;
6516}
6517
6518static int tracing_buffers_open(struct inode *inode, struct file *filp);
6519static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6520                                    size_t count, loff_t *ppos);
6521static int tracing_buffers_release(struct inode *inode, struct file *file);
6522static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6523                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6524
6525static int snapshot_raw_open(struct inode *inode, struct file *filp)
6526{
6527        struct ftrace_buffer_info *info;
6528        int ret;
6529
6530        ret = tracing_buffers_open(inode, filp);
6531        if (ret < 0)
6532                return ret;
6533
6534        info = filp->private_data;
6535
6536        if (info->iter.trace->use_max_tr) {
6537                tracing_buffers_release(inode, filp);
6538                return -EBUSY;
6539        }
6540
6541        info->iter.snapshot = true;
6542        info->iter.trace_buffer = &info->iter.tr->max_buffer;
6543
6544        return ret;
6545}
6546
6547#endif /* CONFIG_TRACER_SNAPSHOT */
6548
6549
6550static const struct file_operations tracing_thresh_fops = {
6551        .open           = tracing_open_generic,
6552        .read           = tracing_thresh_read,
6553        .write          = tracing_thresh_write,
6554        .llseek         = generic_file_llseek,
6555};
6556
6557#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6558static const struct file_operations tracing_max_lat_fops = {
6559        .open           = tracing_open_generic,
6560        .read           = tracing_max_lat_read,
6561        .write          = tracing_max_lat_write,
6562        .llseek         = generic_file_llseek,
6563};
6564#endif
6565
6566static const struct file_operations set_tracer_fops = {
6567        .open           = tracing_open_generic,
6568        .read           = tracing_set_trace_read,
6569        .write          = tracing_set_trace_write,
6570        .llseek         = generic_file_llseek,
6571};
6572
6573static const struct file_operations tracing_pipe_fops = {
6574        .open           = tracing_open_pipe,
6575        .poll           = tracing_poll_pipe,
6576        .read           = tracing_read_pipe,
6577        .splice_read    = tracing_splice_read_pipe,
6578        .release        = tracing_release_pipe,
6579        .llseek         = no_llseek,
6580};
6581
6582static const struct file_operations tracing_entries_fops = {
6583        .open           = tracing_open_generic_tr,
6584        .read           = tracing_entries_read,
6585        .write          = tracing_entries_write,
6586        .llseek         = generic_file_llseek,
6587        .release        = tracing_release_generic_tr,
6588};
6589
6590static const struct file_operations tracing_total_entries_fops = {
6591        .open           = tracing_open_generic_tr,
6592        .read           = tracing_total_entries_read,
6593        .llseek         = generic_file_llseek,
6594        .release        = tracing_release_generic_tr,
6595};
6596
6597static const struct file_operations tracing_free_buffer_fops = {
6598        .open           = tracing_open_generic_tr,
6599        .write          = tracing_free_buffer_write,
6600        .release        = tracing_free_buffer_release,
6601};
6602
6603static const struct file_operations tracing_mark_fops = {
6604        .open           = tracing_open_generic_tr,
6605        .write          = tracing_mark_write,
6606        .llseek         = generic_file_llseek,
6607        .release        = tracing_release_generic_tr,
6608};
6609
6610static const struct file_operations tracing_mark_raw_fops = {
6611        .open           = tracing_open_generic_tr,
6612        .write          = tracing_mark_raw_write,
6613        .llseek         = generic_file_llseek,
6614        .release        = tracing_release_generic_tr,
6615};
6616
6617static const struct file_operations trace_clock_fops = {
6618        .open           = tracing_clock_open,
6619        .read           = seq_read,
6620        .llseek         = seq_lseek,
6621        .release        = tracing_single_release_tr,
6622        .write          = tracing_clock_write,
6623};
6624
6625static const struct file_operations trace_time_stamp_mode_fops = {
6626        .open           = tracing_time_stamp_mode_open,
6627        .read           = seq_read,
6628        .llseek         = seq_lseek,
6629        .release        = tracing_single_release_tr,
6630};
6631
6632#ifdef CONFIG_TRACER_SNAPSHOT
6633static const struct file_operations snapshot_fops = {
6634        .open           = tracing_snapshot_open,
6635        .read           = seq_read,
6636        .write          = tracing_snapshot_write,
6637        .llseek         = tracing_lseek,
6638        .release        = tracing_snapshot_release,
6639};
6640
6641static const struct file_operations snapshot_raw_fops = {
6642        .open           = snapshot_raw_open,
6643        .read           = tracing_buffers_read,
6644        .release        = tracing_buffers_release,
6645        .splice_read    = tracing_buffers_splice_read,
6646        .llseek         = no_llseek,
6647};
6648
6649#endif /* CONFIG_TRACER_SNAPSHOT */
6650
6651static int tracing_buffers_open(struct inode *inode, struct file *filp)
6652{
6653        struct trace_array *tr = inode->i_private;
6654        struct ftrace_buffer_info *info;
6655        int ret;
6656
6657        if (tracing_disabled)
6658                return -ENODEV;
6659
6660        if (trace_array_get(tr) < 0)
6661                return -ENODEV;
6662
6663        info = kzalloc(sizeof(*info), GFP_KERNEL);
6664        if (!info) {
6665                trace_array_put(tr);
6666                return -ENOMEM;
6667        }
6668
6669        mutex_lock(&trace_types_lock);
6670
6671        info->iter.tr           = tr;
6672        info->iter.cpu_file     = tracing_get_cpu(inode);
6673        info->iter.trace        = tr->current_trace;
6674        info->iter.trace_buffer = &tr->trace_buffer;
6675        info->spare             = NULL;
6676        /* Force reading ring buffer for first read */
6677        info->read              = (unsigned int)-1;
6678
6679        filp->private_data = info;
6680
6681        tr->current_trace->ref++;
6682
6683        mutex_unlock(&trace_types_lock);
6684
6685        ret = nonseekable_open(inode, filp);
6686        if (ret < 0)
6687                trace_array_put(tr);
6688
6689        return ret;
6690}
6691
6692static __poll_t
6693tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6694{
6695        struct ftrace_buffer_info *info = filp->private_data;
6696        struct trace_iterator *iter = &info->iter;
6697
6698        return trace_poll(iter, filp, poll_table);
6699}
6700
6701static ssize_t
6702tracing_buffers_read(struct file *filp, char __user *ubuf,
6703                     size_t count, loff_t *ppos)
6704{
6705        struct ftrace_buffer_info *info = filp->private_data;
6706        struct trace_iterator *iter = &info->iter;
6707        ssize_t ret = 0;
6708        ssize_t size;
6709
6710        if (!count)
6711                return 0;
6712
6713#ifdef CONFIG_TRACER_MAX_TRACE
6714        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6715                return -EBUSY;
6716#endif
6717
6718        if (!info->spare) {
6719                info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6720                                                          iter->cpu_file);
6721                if (IS_ERR(info->spare)) {
6722                        ret = PTR_ERR(info->spare);
6723                        info->spare = NULL;
6724                } else {
6725                        info->spare_cpu = iter->cpu_file;
6726                }
6727        }
6728        if (!info->spare)
6729                return ret;
6730
6731        /* Do we have previous read data to read? */
6732        if (info->read < PAGE_SIZE)
6733                goto read;
6734
6735 again:
6736        trace_access_lock(iter->cpu_file);
6737        ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6738                                    &info->spare,
6739                                    count,
6740                                    iter->cpu_file, 0);
6741        trace_access_unlock(iter->cpu_file);
6742
6743        if (ret < 0) {
6744                if (trace_empty(iter)) {
6745                        if ((filp->f_flags & O_NONBLOCK))
6746                                return -EAGAIN;
6747
6748                        ret = wait_on_pipe(iter, false);
6749                        if (ret)
6750                                return ret;
6751
6752                        goto again;
6753                }
6754                return 0;
6755        }
6756
6757        info->read = 0;
6758 read:
6759        size = PAGE_SIZE - info->read;
6760        if (size > count)
6761                size = count;
6762
6763        ret = copy_to_user(ubuf, info->spare + info->read, size);
6764        if (ret == size)
6765                return -EFAULT;
6766
6767        size -= ret;
6768
6769        *ppos += size;
6770        info->read += size;
6771
6772        return size;
6773}
6774
6775static int tracing_buffers_release(struct inode *inode, struct file *file)
6776{
6777        struct ftrace_buffer_info *info = file->private_data;
6778        struct trace_iterator *iter = &info->iter;
6779
6780        mutex_lock(&trace_types_lock);
6781
6782        iter->tr->current_trace->ref--;
6783
6784        __trace_array_put(iter->tr);
6785
6786        if (info->spare)
6787                ring_buffer_free_read_page(iter->trace_buffer->buffer,
6788                                           info->spare_cpu, info->spare);
6789        kfree(info);
6790
6791        mutex_unlock(&trace_types_lock);
6792
6793        return 0;
6794}
6795
6796struct buffer_ref {
6797        struct ring_buffer      *buffer;
6798        void                    *page;
6799        int                     cpu;
6800        int                     ref;
6801};
6802
6803static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6804                                    struct pipe_buffer *buf)
6805{
6806        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6807
6808        if (--ref->ref)
6809                return;
6810
6811        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6812        kfree(ref);
6813        buf->private = 0;
6814}
6815
6816static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6817                                struct pipe_buffer *buf)
6818{
6819        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6820
6821        ref->ref++;
6822}
6823
6824/* Pipe buffer operations for a buffer. */
6825static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6826        .can_merge              = 0,
6827        .confirm                = generic_pipe_buf_confirm,
6828        .release                = buffer_pipe_buf_release,
6829        .steal                  = generic_pipe_buf_steal,
6830        .get                    = buffer_pipe_buf_get,
6831};
6832
6833/*
6834 * Callback from splice_to_pipe(), if we need to release some pages
6835 * at the end of the spd in case we error'ed out in filling the pipe.
6836 */
6837static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6838{
6839        struct buffer_ref *ref =
6840                (struct buffer_ref *)spd->partial[i].private;
6841
6842        if (--ref->ref)
6843                return;
6844
6845        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6846        kfree(ref);
6847        spd->partial[i].private = 0;
6848}
6849
6850static ssize_t
6851tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6852                            struct pipe_inode_info *pipe, size_t len,
6853                            unsigned int flags)
6854{
6855        struct ftrace_buffer_info *info = file->private_data;
6856        struct trace_iterator *iter = &info->iter;
6857        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6858        struct page *pages_def[PIPE_DEF_BUFFERS];
6859        struct splice_pipe_desc spd = {
6860                .pages          = pages_def,
6861                .partial        = partial_def,
6862                .nr_pages_max   = PIPE_DEF_BUFFERS,
6863                .ops            = &buffer_pipe_buf_ops,
6864                .spd_release    = buffer_spd_release,
6865        };
6866        struct buffer_ref *ref;
6867        int entries, i;
6868        ssize_t ret = 0;
6869
6870#ifdef CONFIG_TRACER_MAX_TRACE
6871        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6872                return -EBUSY;
6873#endif
6874
6875        if (*ppos & (PAGE_SIZE - 1))
6876                return -EINVAL;
6877
6878        if (len & (PAGE_SIZE - 1)) {
6879                if (len < PAGE_SIZE)
6880                        return -EINVAL;
6881                len &= PAGE_MASK;
6882        }
6883
6884        if (splice_grow_spd(pipe, &spd))
6885                return -ENOMEM;
6886
6887 again:
6888        trace_access_lock(iter->cpu_file);
6889        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6890
6891        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6892                struct page *page;
6893                int r;
6894
6895                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6896                if (!ref) {
6897                        ret = -ENOMEM;
6898                        break;
6899                }
6900
6901                ref->ref = 1;
6902                ref->buffer = iter->trace_buffer->buffer;
6903                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6904                if (IS_ERR(ref->page)) {
6905                        ret = PTR_ERR(ref->page);
6906                        ref->page = NULL;
6907                        kfree(ref);
6908                        break;
6909                }
6910                ref->cpu = iter->cpu_file;
6911
6912                r = ring_buffer_read_page(ref->buffer, &ref->page,
6913                                          len, iter->cpu_file, 1);
6914                if (r < 0) {
6915                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
6916                                                   ref->page);
6917                        kfree(ref);
6918                        break;
6919                }
6920
6921                page = virt_to_page(ref->page);
6922
6923                spd.pages[i] = page;
6924                spd.partial[i].len = PAGE_SIZE;
6925                spd.partial[i].offset = 0;
6926                spd.partial[i].private = (unsigned long)ref;
6927                spd.nr_pages++;
6928                *ppos += PAGE_SIZE;
6929
6930                entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6931        }
6932
6933        trace_access_unlock(iter->cpu_file);
6934        spd.nr_pages = i;
6935
6936        /* did we read anything? */
6937        if (!spd.nr_pages) {
6938                if (ret)
6939                        goto out;
6940
6941                ret = -EAGAIN;
6942                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6943                        goto out;
6944
6945                ret = wait_on_pipe(iter, true);
6946                if (ret)
6947                        goto out;
6948
6949                goto again;
6950        }
6951
6952        ret = splice_to_pipe(pipe, &spd);
6953out:
6954        splice_shrink_spd(&spd);
6955
6956        return ret;
6957}
6958
6959static const struct file_operations tracing_buffers_fops = {
6960        .open           = tracing_buffers_open,
6961        .read           = tracing_buffers_read,
6962        .poll           = tracing_buffers_poll,
6963        .release        = tracing_buffers_release,
6964        .splice_read    = tracing_buffers_splice_read,
6965        .llseek         = no_llseek,
6966};
6967
6968static ssize_t
6969tracing_stats_read(struct file *filp, char __user *ubuf,
6970                   size_t count, loff_t *ppos)
6971{
6972        struct inode *inode = file_inode(filp);
6973        struct trace_array *tr = inode->i_private;
6974        struct trace_buffer *trace_buf = &tr->trace_buffer;
6975        int cpu = tracing_get_cpu(inode);
6976        struct trace_seq *s;
6977        unsigned long cnt;
6978        unsigned long long t;
6979        unsigned long usec_rem;
6980
6981        s = kmalloc(sizeof(*s), GFP_KERNEL);
6982        if (!s)
6983                return -ENOMEM;
6984
6985        trace_seq_init(s);
6986
6987        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6988        trace_seq_printf(s, "entries: %ld\n", cnt);
6989
6990        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6991        trace_seq_printf(s, "overrun: %ld\n", cnt);
6992
6993        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6994        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6995
6996        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6997        trace_seq_printf(s, "bytes: %ld\n", cnt);
6998
6999        if (trace_clocks[tr->clock_id].in_ns) {
7000                /* local or global for trace_clock */
7001                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7002                usec_rem = do_div(t, USEC_PER_SEC);
7003                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7004                                                                t, usec_rem);
7005
7006                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7007                usec_rem = do_div(t, USEC_PER_SEC);
7008                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7009        } else {
7010                /* counter or tsc mode for trace_clock */
7011                trace_seq_printf(s, "oldest event ts: %llu\n",
7012                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7013
7014                trace_seq_printf(s, "now ts: %llu\n",
7015                                ring_buffer_time_stamp(trace_buf->buffer, cpu));
7016        }
7017
7018        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7019        trace_seq_printf(s, "dropped events: %ld\n", cnt);
7020
7021        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7022        trace_seq_printf(s, "read events: %ld\n", cnt);
7023
7024        count = simple_read_from_buffer(ubuf, count, ppos,
7025                                        s->buffer, trace_seq_used(s));
7026
7027        kfree(s);
7028
7029        return count;
7030}
7031
7032static const struct file_operations tracing_stats_fops = {
7033        .open           = tracing_open_generic_tr,
7034        .read           = tracing_stats_read,
7035        .llseek         = generic_file_llseek,
7036        .release        = tracing_release_generic_tr,
7037};
7038
7039#ifdef CONFIG_DYNAMIC_FTRACE
7040
7041static ssize_t
7042tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7043                  size_t cnt, loff_t *ppos)
7044{
7045        unsigned long *p = filp->private_data;
7046        char buf[64]; /* Not too big for a shallow stack */
7047        int r;
7048
7049        r = scnprintf(buf, 63, "%ld", *p);
7050        buf[r++] = '\n';
7051
7052        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7053}
7054
7055static const struct file_operations tracing_dyn_info_fops = {
7056        .open           = tracing_open_generic,
7057        .read           = tracing_read_dyn_info,
7058        .llseek         = generic_file_llseek,
7059};
7060#endif /* CONFIG_DYNAMIC_FTRACE */
7061
7062#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7063static void
7064ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7065                struct trace_array *tr, struct ftrace_probe_ops *ops,
7066                void *data)
7067{
7068        tracing_snapshot_instance(tr);
7069}
7070
7071static void
7072ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7073                      struct trace_array *tr, struct ftrace_probe_ops *ops,
7074                      void *data)
7075{
7076        struct ftrace_func_mapper *mapper = data;
7077        long *count = NULL;
7078
7079        if (mapper)
7080                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7081
7082        if (count) {
7083
7084                if (*count <= 0)
7085                        return;
7086
7087                (*count)--;
7088        }
7089
7090        tracing_snapshot_instance(tr);
7091}
7092
7093static int
7094ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7095                      struct ftrace_probe_ops *ops, void *data)
7096{
7097        struct ftrace_func_mapper *mapper = data;
7098        long *count = NULL;
7099
7100        seq_printf(m, "%ps:", (void *)ip);
7101
7102        seq_puts(m, "snapshot");
7103
7104        if (mapper)
7105                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7106
7107        if (count)
7108                seq_printf(m, ":count=%ld\n", *count);
7109        else
7110                seq_puts(m, ":unlimited\n");
7111
7112        return 0;
7113}
7114
7115static int
7116ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7117                     unsigned long ip, void *init_data, void **data)
7118{
7119        struct ftrace_func_mapper *mapper = *data;
7120
7121        if (!mapper) {
7122                mapper = allocate_ftrace_func_mapper();
7123                if (!mapper)
7124                        return -ENOMEM;
7125                *data = mapper;
7126        }
7127
7128        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7129}
7130
7131static void
7132ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7133                     unsigned long ip, void *data)
7134{
7135        struct ftrace_func_mapper *mapper = data;
7136
7137        if (!ip) {
7138                if (!mapper)
7139                        return;
7140                free_ftrace_func_mapper(mapper, NULL);
7141                return;
7142        }
7143
7144        ftrace_func_mapper_remove_ip(mapper, ip);
7145}
7146
7147static struct ftrace_probe_ops snapshot_probe_ops = {
7148        .func                   = ftrace_snapshot,
7149        .print                  = ftrace_snapshot_print,
7150};
7151
7152static struct ftrace_probe_ops snapshot_count_probe_ops = {
7153        .func                   = ftrace_count_snapshot,
7154        .print                  = ftrace_snapshot_print,
7155        .init                   = ftrace_snapshot_init,
7156        .free                   = ftrace_snapshot_free,
7157};
7158
7159static int
7160ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7161                               char *glob, char *cmd, char *param, int enable)
7162{
7163        struct ftrace_probe_ops *ops;
7164        void *count = (void *)-1;
7165        char *number;
7166        int ret;
7167
7168        if (!tr)
7169                return -ENODEV;
7170
7171        /* hash funcs only work with set_ftrace_filter */
7172        if (!enable)
7173                return -EINVAL;
7174
7175        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7176
7177        if (glob[0] == '!')
7178                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7179
7180        if (!param)
7181                goto out_reg;
7182
7183        number = strsep(&param, ":");
7184
7185        if (!strlen(number))
7186                goto out_reg;
7187
7188        /*
7189         * We use the callback data field (which is a pointer)
7190         * as our counter.
7191         */
7192        ret = kstrtoul(number, 0, (unsigned long *)&count);
7193        if (ret)
7194                return ret;
7195
7196 out_reg:
7197        ret = tracing_alloc_snapshot_instance(tr);
7198        if (ret < 0)
7199                goto out;
7200
7201        ret = register_ftrace_function_probe(glob, tr, ops, count);
7202
7203 out:
7204        return ret < 0 ? ret : 0;
7205}
7206
7207static struct ftrace_func_command ftrace_snapshot_cmd = {
7208        .name                   = "snapshot",
7209        .func                   = ftrace_trace_snapshot_callback,
7210};
7211
7212static __init int register_snapshot_cmd(void)
7213{
7214        return register_ftrace_command(&ftrace_snapshot_cmd);
7215}
7216#else
7217static inline __init int register_snapshot_cmd(void) { return 0; }
7218#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7219
7220static struct dentry *tracing_get_dentry(struct trace_array *tr)
7221{
7222        if (WARN_ON(!tr->dir))
7223                return ERR_PTR(-ENODEV);
7224
7225        /* Top directory uses NULL as the parent */
7226        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7227                return NULL;
7228
7229        /* All sub buffers have a descriptor */
7230        return tr->dir;
7231}
7232
7233static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7234{
7235        struct dentry *d_tracer;
7236
7237        if (tr->percpu_dir)
7238                return tr->percpu_dir;
7239
7240        d_tracer = tracing_get_dentry(tr);
7241        if (IS_ERR(d_tracer))
7242                return NULL;
7243
7244        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7245
7246        WARN_ONCE(!tr->percpu_dir,
7247                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7248
7249        return tr->percpu_dir;
7250}
7251
7252static struct dentry *
7253trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7254                      void *data, long cpu, const struct file_operations *fops)
7255{
7256        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7257
7258        if (ret) /* See tracing_get_cpu() */
7259                d_inode(ret)->i_cdev = (void *)(cpu + 1);
7260        return ret;
7261}
7262
7263static void
7264tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7265{
7266        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7267        struct dentry *d_cpu;
7268        char cpu_dir[30]; /* 30 characters should be more than enough */
7269
7270        if (!d_percpu)
7271                return;
7272
7273        snprintf(cpu_dir, 30, "cpu%ld", cpu);
7274        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7275        if (!d_cpu) {
7276                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7277                return;
7278        }
7279
7280        /* per cpu trace_pipe */
7281        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7282                                tr, cpu, &tracing_pipe_fops);
7283
7284        /* per cpu trace */
7285        trace_create_cpu_file("trace", 0644, d_cpu,
7286                                tr, cpu, &tracing_fops);
7287
7288        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7289                                tr, cpu, &tracing_buffers_fops);
7290
7291        trace_create_cpu_file("stats", 0444, d_cpu,
7292                                tr, cpu, &tracing_stats_fops);
7293
7294        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7295                                tr, cpu, &tracing_entries_fops);
7296
7297#ifdef CONFIG_TRACER_SNAPSHOT
7298        trace_create_cpu_file("snapshot", 0644, d_cpu,
7299                                tr, cpu, &snapshot_fops);
7300
7301        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7302                                tr, cpu, &snapshot_raw_fops);
7303#endif
7304}
7305
7306#ifdef CONFIG_FTRACE_SELFTEST
7307/* Let selftest have access to static functions in this file */
7308#include "trace_selftest.c"
7309#endif
7310
7311static ssize_t
7312trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7313                        loff_t *ppos)
7314{
7315        struct trace_option_dentry *topt = filp->private_data;
7316        char *buf;
7317
7318        if (topt->flags->val & topt->opt->bit)
7319                buf = "1\n";
7320        else
7321                buf = "0\n";
7322
7323        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7324}
7325
7326static ssize_t
7327trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7328                         loff_t *ppos)
7329{
7330        struct trace_option_dentry *topt = filp->private_data;
7331        unsigned long val;
7332        int ret;
7333
7334        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7335        if (ret)
7336                return ret;
7337
7338        if (val != 0 && val != 1)
7339                return -EINVAL;
7340
7341        if (!!(topt->flags->val & topt->opt->bit) != val) {
7342                mutex_lock(&trace_types_lock);
7343                ret = __set_tracer_option(topt->tr, topt->flags,
7344                                          topt->opt, !val);
7345                mutex_unlock(&trace_types_lock);
7346                if (ret)
7347                        return ret;
7348        }
7349
7350        *ppos += cnt;
7351
7352        return cnt;
7353}
7354
7355
7356static const struct file_operations trace_options_fops = {
7357        .open = tracing_open_generic,
7358        .read = trace_options_read,
7359        .write = trace_options_write,
7360        .llseek = generic_file_llseek,
7361};
7362
7363/*
7364 * In order to pass in both the trace_array descriptor as well as the index
7365 * to the flag that the trace option file represents, the trace_array
7366 * has a character array of trace_flags_index[], which holds the index
7367 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7368 * The address of this character array is passed to the flag option file
7369 * read/write callbacks.
7370 *
7371 * In order to extract both the index and the trace_array descriptor,
7372 * get_tr_index() uses the following algorithm.
7373 *
7374 *   idx = *ptr;
7375 *
7376 * As the pointer itself contains the address of the index (remember
7377 * index[1] == 1).
7378 *
7379 * Then to get the trace_array descriptor, by subtracting that index
7380 * from the ptr, we get to the start of the index itself.
7381 *
7382 *   ptr - idx == &index[0]
7383 *
7384 * Then a simple container_of() from that pointer gets us to the
7385 * trace_array descriptor.
7386 */
7387static void get_tr_index(void *data, struct trace_array **ptr,
7388                         unsigned int *pindex)
7389{
7390        *pindex = *(unsigned char *)data;
7391
7392        *ptr = container_of(data - *pindex, struct trace_array,
7393                            trace_flags_index);
7394}
7395
7396static ssize_t
7397trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7398                        loff_t *ppos)
7399{
7400        void *tr_index = filp->private_data;
7401        struct trace_array *tr;
7402        unsigned int index;
7403        char *buf;
7404
7405        get_tr_index(tr_index, &tr, &index);
7406
7407        if (tr->trace_flags & (1 << index))
7408                buf = "1\n";
7409        else
7410                buf = "0\n";
7411
7412        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7413}
7414
7415static ssize_t
7416trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7417                         loff_t *ppos)
7418{
7419        void *tr_index = filp->private_data;
7420        struct trace_array *tr;
7421        unsigned int index;
7422        unsigned long val;
7423        int ret;
7424
7425        get_tr_index(tr_index, &tr, &index);
7426
7427        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7428        if (ret)
7429                return ret;
7430
7431        if (val != 0 && val != 1)
7432                return -EINVAL;
7433
7434        mutex_lock(&trace_types_lock);
7435        ret = set_tracer_flag(tr, 1 << index, val);
7436        mutex_unlock(&trace_types_lock);
7437
7438        if (ret < 0)
7439                return ret;
7440
7441        *ppos += cnt;
7442
7443        return cnt;
7444}
7445
7446static const struct file_operations trace_options_core_fops = {
7447        .open = tracing_open_generic,
7448        .read = trace_options_core_read,
7449        .write = trace_options_core_write,
7450        .llseek = generic_file_llseek,
7451};
7452
7453struct dentry *trace_create_file(const char *name,
7454                                 umode_t mode,
7455                                 struct dentry *parent,
7456                                 void *data,
7457                                 const struct file_operations *fops)
7458{
7459        struct dentry *ret;
7460
7461        ret = tracefs_create_file(name, mode, parent, data, fops);
7462        if (!ret)
7463                pr_warn("Could not create tracefs '%s' entry\n", name);
7464
7465        return ret;
7466}
7467
7468
7469static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7470{
7471        struct dentry *d_tracer;
7472
7473        if (tr->options)
7474                return tr->options;
7475
7476        d_tracer = tracing_get_dentry(tr);
7477        if (IS_ERR(d_tracer))
7478                return NULL;
7479
7480        tr->options = tracefs_create_dir("options", d_tracer);
7481        if (!tr->options) {
7482                pr_warn("Could not create tracefs directory 'options'\n");
7483                return NULL;
7484        }
7485
7486        return tr->options;
7487}
7488
7489static void
7490create_trace_option_file(struct trace_array *tr,
7491                         struct trace_option_dentry *topt,
7492                         struct tracer_flags *flags,
7493                         struct tracer_opt *opt)
7494{
7495        struct dentry *t_options;
7496
7497        t_options = trace_options_init_dentry(tr);
7498        if (!t_options)
7499                return;
7500
7501        topt->flags = flags;
7502        topt->opt = opt;
7503        topt->tr = tr;
7504
7505        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7506                                    &trace_options_fops);
7507
7508}
7509
7510static void
7511create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7512{
7513        struct trace_option_dentry *topts;
7514        struct trace_options *tr_topts;
7515        struct tracer_flags *flags;
7516        struct tracer_opt *opts;
7517        int cnt;
7518        int i;
7519
7520        if (!tracer)
7521                return;
7522
7523        flags = tracer->flags;
7524
7525        if (!flags || !flags->opts)
7526                return;
7527
7528        /*
7529         * If this is an instance, only create flags for tracers
7530         * the instance may have.
7531         */
7532        if (!trace_ok_for_array(tracer, tr))
7533                return;
7534
7535        for (i = 0; i < tr->nr_topts; i++) {
7536                /* Make sure there's no duplicate flags. */
7537                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7538                        return;
7539        }
7540
7541        opts = flags->opts;
7542
7543        for (cnt = 0; opts[cnt].name; cnt++)
7544                ;
7545
7546        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7547        if (!topts)
7548                return;
7549
7550        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7551                            GFP_KERNEL);
7552        if (!tr_topts) {
7553                kfree(topts);
7554                return;
7555        }
7556
7557        tr->topts = tr_topts;
7558        tr->topts[tr->nr_topts].tracer = tracer;
7559        tr->topts[tr->nr_topts].topts = topts;
7560        tr->nr_topts++;
7561
7562        for (cnt = 0; opts[cnt].name; cnt++) {
7563                create_trace_option_file(tr, &topts[cnt], flags,
7564                                         &opts[cnt]);
7565                WARN_ONCE(topts[cnt].entry == NULL,
7566                          "Failed to create trace option: %s",
7567                          opts[cnt].name);
7568        }
7569}
7570
7571static struct dentry *
7572create_trace_option_core_file(struct trace_array *tr,
7573                              const char *option, long index)
7574{
7575        struct dentry *t_options;
7576
7577        t_options = trace_options_init_dentry(tr);
7578        if (!t_options)
7579                return NULL;
7580
7581        return trace_create_file(option, 0644, t_options,
7582                                 (void *)&tr->trace_flags_index[index],
7583                                 &trace_options_core_fops);
7584}
7585
7586static void create_trace_options_dir(struct trace_array *tr)
7587{
7588        struct dentry *t_options;
7589        bool top_level = tr == &global_trace;
7590        int i;
7591
7592        t_options = trace_options_init_dentry(tr);
7593        if (!t_options)
7594                return;
7595
7596        for (i = 0; trace_options[i]; i++) {
7597                if (top_level ||
7598                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7599                        create_trace_option_core_file(tr, trace_options[i], i);
7600        }
7601}
7602
7603static ssize_t
7604rb_simple_read(struct file *filp, char __user *ubuf,
7605               size_t cnt, loff_t *ppos)
7606{
7607        struct trace_array *tr = filp->private_data;
7608        char buf[64];
7609        int r;
7610
7611        r = tracer_tracing_is_on(tr);
7612        r = sprintf(buf, "%d\n", r);
7613
7614        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7615}
7616
7617static ssize_t
7618rb_simple_write(struct file *filp, const char __user *ubuf,
7619                size_t cnt, loff_t *ppos)
7620{
7621        struct trace_array *tr = filp->private_data;
7622        struct ring_buffer *buffer = tr->trace_buffer.buffer;
7623        unsigned long val;
7624        int ret;
7625
7626        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7627        if (ret)
7628                return ret;
7629
7630        if (buffer) {
7631                mutex_lock(&trace_types_lock);
7632                if (!!val == tracer_tracing_is_on(tr)) {
7633                        val = 0; /* do nothing */
7634                } else if (val) {
7635                        tracer_tracing_on(tr);
7636                        if (tr->current_trace->start)
7637                                tr->current_trace->start(tr);
7638                } else {
7639                        tracer_tracing_off(tr);
7640                        if (tr->current_trace->stop)
7641                                tr->current_trace->stop(tr);
7642                }
7643                mutex_unlock(&trace_types_lock);
7644        }
7645
7646        (*ppos)++;
7647
7648        return cnt;
7649}
7650
7651static const struct file_operations rb_simple_fops = {
7652        .open           = tracing_open_generic_tr,
7653        .read           = rb_simple_read,
7654        .write          = rb_simple_write,
7655        .release        = tracing_release_generic_tr,
7656        .llseek         = default_llseek,
7657};
7658
7659struct dentry *trace_instance_dir;
7660
7661static void
7662init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7663
7664static int
7665allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7666{
7667        enum ring_buffer_flags rb_flags;
7668
7669        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7670
7671        buf->tr = tr;
7672
7673        buf->buffer = ring_buffer_alloc(size, rb_flags);
7674        if (!buf->buffer)
7675                return -ENOMEM;
7676
7677        buf->data = alloc_percpu(struct trace_array_cpu);
7678        if (!buf->data) {
7679                ring_buffer_free(buf->buffer);
7680                buf->buffer = NULL;
7681                return -ENOMEM;
7682        }
7683
7684        /* Allocate the first page for all buffers */
7685        set_buffer_entries(&tr->trace_buffer,
7686                           ring_buffer_size(tr->trace_buffer.buffer, 0));
7687
7688        return 0;
7689}
7690
7691static int allocate_trace_buffers(struct trace_array *tr, int size)
7692{
7693        int ret;
7694
7695        ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7696        if (ret)
7697                return ret;
7698
7699#ifdef CONFIG_TRACER_MAX_TRACE
7700        ret = allocate_trace_buffer(tr, &tr->max_buffer,
7701                                    allocate_snapshot ? size : 1);
7702        if (WARN_ON(ret)) {
7703                ring_buffer_free(tr->trace_buffer.buffer);
7704                tr->trace_buffer.buffer = NULL;
7705                free_percpu(tr->trace_buffer.data);
7706                tr->trace_buffer.data = NULL;
7707                return -ENOMEM;
7708        }
7709        tr->allocated_snapshot = allocate_snapshot;
7710
7711        /*
7712         * Only the top level trace array gets its snapshot allocated
7713         * from the kernel command line.
7714         */
7715        allocate_snapshot = false;
7716#endif
7717        return 0;
7718}
7719
7720static void free_trace_buffer(struct trace_buffer *buf)
7721{
7722        if (buf->buffer) {
7723                ring_buffer_free(buf->buffer);
7724                buf->buffer = NULL;
7725                free_percpu(buf->data);
7726                buf->data = NULL;
7727        }
7728}
7729
7730static void free_trace_buffers(struct trace_array *tr)
7731{
7732        if (!tr)
7733                return;
7734
7735        free_trace_buffer(&tr->trace_buffer);
7736
7737#ifdef CONFIG_TRACER_MAX_TRACE
7738        free_trace_buffer(&tr->max_buffer);
7739#endif
7740}
7741
7742static void init_trace_flags_index(struct trace_array *tr)
7743{
7744        int i;
7745
7746        /* Used by the trace options files */
7747        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7748                tr->trace_flags_index[i] = i;
7749}
7750
7751static void __update_tracer_options(struct trace_array *tr)
7752{
7753        struct tracer *t;
7754
7755        for (t = trace_types; t; t = t->next)
7756                add_tracer_options(tr, t);
7757}
7758
7759static void update_tracer_options(struct trace_array *tr)
7760{
7761        mutex_lock(&trace_types_lock);
7762        __update_tracer_options(tr);
7763        mutex_unlock(&trace_types_lock);
7764}
7765
7766static int instance_mkdir(const char *name)
7767{
7768        struct trace_array *tr;
7769        int ret;
7770
7771        mutex_lock(&event_mutex);
7772        mutex_lock(&trace_types_lock);
7773
7774        ret = -EEXIST;
7775        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7776                if (tr->name && strcmp(tr->name, name) == 0)
7777                        goto out_unlock;
7778        }
7779
7780        ret = -ENOMEM;
7781        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7782        if (!tr)
7783                goto out_unlock;
7784
7785        tr->name = kstrdup(name, GFP_KERNEL);
7786        if (!tr->name)
7787                goto out_free_tr;
7788
7789        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7790                goto out_free_tr;
7791
7792        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7793
7794        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7795
7796        raw_spin_lock_init(&tr->start_lock);
7797
7798        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7799
7800        tr->current_trace = &nop_trace;
7801
7802        INIT_LIST_HEAD(&tr->systems);
7803        INIT_LIST_HEAD(&tr->events);
7804        INIT_LIST_HEAD(&tr->hist_vars);
7805
7806        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7807                goto out_free_tr;
7808
7809        tr->dir = tracefs_create_dir(name, trace_instance_dir);
7810        if (!tr->dir)
7811                goto out_free_tr;
7812
7813        ret = event_trace_add_tracer(tr->dir, tr);
7814        if (ret) {
7815                tracefs_remove_recursive(tr->dir);
7816                goto out_free_tr;
7817        }
7818
7819        ftrace_init_trace_array(tr);
7820
7821        init_tracer_tracefs(tr, tr->dir);
7822        init_trace_flags_index(tr);
7823        __update_tracer_options(tr);
7824
7825        list_add(&tr->list, &ftrace_trace_arrays);
7826
7827        mutex_unlock(&trace_types_lock);
7828        mutex_unlock(&event_mutex);
7829
7830        return 0;
7831
7832 out_free_tr:
7833        free_trace_buffers(tr);
7834        free_cpumask_var(tr->tracing_cpumask);
7835        kfree(tr->name);
7836        kfree(tr);
7837
7838 out_unlock:
7839        mutex_unlock(&trace_types_lock);
7840        mutex_unlock(&event_mutex);
7841
7842        return ret;
7843
7844}
7845
7846static int instance_rmdir(const char *name)
7847{
7848        struct trace_array *tr;
7849        int found = 0;
7850        int ret;
7851        int i;
7852
7853        mutex_lock(&event_mutex);
7854        mutex_lock(&trace_types_lock);
7855
7856        ret = -ENODEV;
7857        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7858                if (tr->name && strcmp(tr->name, name) == 0) {
7859                        found = 1;
7860                        break;
7861                }
7862        }
7863        if (!found)
7864                goto out_unlock;
7865
7866        ret = -EBUSY;
7867        if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7868                goto out_unlock;
7869
7870        list_del(&tr->list);
7871
7872        /* Disable all the flags that were enabled coming in */
7873        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7874                if ((1 << i) & ZEROED_TRACE_FLAGS)
7875                        set_tracer_flag(tr, 1 << i, 0);
7876        }
7877
7878        tracing_set_nop(tr);
7879        clear_ftrace_function_probes(tr);
7880        event_trace_del_tracer(tr);
7881        ftrace_clear_pids(tr);
7882        ftrace_destroy_function_files(tr);
7883        tracefs_remove_recursive(tr->dir);
7884        free_trace_buffers(tr);
7885
7886        for (i = 0; i < tr->nr_topts; i++) {
7887                kfree(tr->topts[i].topts);
7888        }
7889        kfree(tr->topts);
7890
7891        free_cpumask_var(tr->tracing_cpumask);
7892        kfree(tr->name);
7893        kfree(tr);
7894
7895        ret = 0;
7896
7897 out_unlock:
7898        mutex_unlock(&trace_types_lock);
7899        mutex_unlock(&event_mutex);
7900
7901        return ret;
7902}
7903
7904static __init void create_trace_instances(struct dentry *d_tracer)
7905{
7906        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7907                                                         instance_mkdir,
7908                                                         instance_rmdir);
7909        if (WARN_ON(!trace_instance_dir))
7910                return;
7911}
7912
7913static void
7914init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7915{
7916        struct trace_event_file *file;
7917        int cpu;
7918
7919        trace_create_file("available_tracers", 0444, d_tracer,
7920                        tr, &show_traces_fops);
7921
7922        trace_create_file("current_tracer", 0644, d_tracer,
7923                        tr, &set_tracer_fops);
7924
7925        trace_create_file("tracing_cpumask", 0644, d_tracer,
7926                          tr, &tracing_cpumask_fops);
7927
7928        trace_create_file("trace_options", 0644, d_tracer,
7929                          tr, &tracing_iter_fops);
7930
7931        trace_create_file("trace", 0644, d_tracer,
7932                          tr, &tracing_fops);
7933
7934        trace_create_file("trace_pipe", 0444, d_tracer,
7935                          tr, &tracing_pipe_fops);
7936
7937        trace_create_file("buffer_size_kb", 0644, d_tracer,
7938                          tr, &tracing_entries_fops);
7939
7940        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7941                          tr, &tracing_total_entries_fops);
7942
7943        trace_create_file("free_buffer", 0200, d_tracer,
7944                          tr, &tracing_free_buffer_fops);
7945
7946        trace_create_file("trace_marker", 0220, d_tracer,
7947                          tr, &tracing_mark_fops);
7948
7949        file = __find_event_file(tr, "ftrace", "print");
7950        if (file && file->dir)
7951                trace_create_file("trigger", 0644, file->dir, file,
7952                                  &event_trigger_fops);
7953        tr->trace_marker_file = file;
7954
7955        trace_create_file("trace_marker_raw", 0220, d_tracer,
7956                          tr, &tracing_mark_raw_fops);
7957
7958        trace_create_file("trace_clock", 0644, d_tracer, tr,
7959                          &trace_clock_fops);
7960
7961        trace_create_file("tracing_on", 0644, d_tracer,
7962                          tr, &rb_simple_fops);
7963
7964        trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7965                          &trace_time_stamp_mode_fops);
7966
7967        create_trace_options_dir(tr);
7968
7969#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7970        trace_create_file("tracing_max_latency", 0644, d_tracer,
7971                        &tr->max_latency, &tracing_max_lat_fops);
7972#endif
7973
7974        if (ftrace_create_function_files(tr, d_tracer))
7975                WARN(1, "Could not allocate function filter files");
7976
7977#ifdef CONFIG_TRACER_SNAPSHOT
7978        trace_create_file("snapshot", 0644, d_tracer,
7979                          tr, &snapshot_fops);
7980#endif
7981
7982        for_each_tracing_cpu(cpu)
7983                tracing_init_tracefs_percpu(tr, cpu);
7984
7985        ftrace_init_tracefs(tr, d_tracer);
7986}
7987
7988static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7989{
7990        struct vfsmount *mnt;
7991        struct file_system_type *type;
7992
7993        /*
7994         * To maintain backward compatibility for tools that mount
7995         * debugfs to get to the tracing facility, tracefs is automatically
7996         * mounted to the debugfs/tracing directory.
7997         */
7998        type = get_fs_type("tracefs");
7999        if (!type)
8000                return NULL;
8001        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8002        put_filesystem(type);
8003        if (IS_ERR(mnt))
8004                return NULL;
8005        mntget(mnt);
8006
8007        return mnt;
8008}
8009
8010/**
8011 * tracing_init_dentry - initialize top level trace array
8012 *
8013 * This is called when creating files or directories in the tracing
8014 * directory. It is called via fs_initcall() by any of the boot up code
8015 * and expects to return the dentry of the top level tracing directory.
8016 */
8017struct dentry *tracing_init_dentry(void)
8018{
8019        struct trace_array *tr = &global_trace;
8020
8021        /* The top level trace array uses  NULL as parent */
8022        if (tr->dir)
8023                return NULL;
8024
8025        if (WARN_ON(!tracefs_initialized()) ||
8026                (IS_ENABLED(CONFIG_DEBUG_FS) &&
8027                 WARN_ON(!debugfs_initialized())))
8028                return ERR_PTR(-ENODEV);
8029
8030        /*
8031         * As there may still be users that expect the tracing
8032         * files to exist in debugfs/tracing, we must automount
8033         * the tracefs file system there, so older tools still
8034         * work with the newer kerenl.
8035         */
8036        tr->dir = debugfs_create_automount("tracing", NULL,
8037                                           trace_automount, NULL);
8038        if (!tr->dir) {
8039                pr_warn_once("Could not create debugfs directory 'tracing'\n");
8040                return ERR_PTR(-ENOMEM);
8041        }
8042
8043        return NULL;
8044}
8045
8046extern struct trace_eval_map *__start_ftrace_eval_maps[];
8047extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8048
8049static void __init trace_eval_init(void)
8050{
8051        int len;
8052
8053        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8054        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8055}
8056
8057#ifdef CONFIG_MODULES
8058static void trace_module_add_evals(struct module *mod)
8059{
8060        if (!mod->num_trace_evals)
8061                return;
8062
8063        /*
8064         * Modules with bad taint do not have events created, do
8065         * not bother with enums either.
8066         */
8067        if (trace_module_has_bad_taint(mod))
8068                return;
8069
8070        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8071}
8072
8073#ifdef CONFIG_TRACE_EVAL_MAP_FILE
8074static void trace_module_remove_evals(struct module *mod)
8075{
8076        union trace_eval_map_item *map;
8077        union trace_eval_map_item **last = &trace_eval_maps;
8078
8079        if (!mod->num_trace_evals)
8080                return;
8081
8082        mutex_lock(&trace_eval_mutex);
8083
8084        map = trace_eval_maps;
8085
8086        while (map) {
8087                if (map->head.mod == mod)
8088                        break;
8089                map = trace_eval_jmp_to_tail(map);
8090                last = &map->tail.next;
8091                map = map->tail.next;
8092        }
8093        if (!map)
8094                goto out;
8095
8096        *last = trace_eval_jmp_to_tail(map)->tail.next;
8097        kfree(map);
8098 out:
8099        mutex_unlock(&trace_eval_mutex);
8100}
8101#else
8102static inline void trace_module_remove_evals(struct module *mod) { }
8103#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8104
8105static int trace_module_notify(struct notifier_block *self,
8106                               unsigned long val, void *data)
8107{
8108        struct module *mod = data;
8109
8110        switch (val) {
8111        case MODULE_STATE_COMING:
8112                trace_module_add_evals(mod);
8113                break;
8114        case MODULE_STATE_GOING:
8115                trace_module_remove_evals(mod);
8116                break;
8117        }
8118
8119        return 0;
8120}
8121
8122static struct notifier_block trace_module_nb = {
8123        .notifier_call = trace_module_notify,
8124        .priority = 0,
8125};
8126#endif /* CONFIG_MODULES */
8127
8128static __init int tracer_init_tracefs(void)
8129{
8130        struct dentry *d_tracer;
8131
8132        trace_access_lock_init();
8133
8134        d_tracer = tracing_init_dentry();
8135        if (IS_ERR(d_tracer))
8136                return 0;
8137
8138        event_trace_init();
8139
8140        init_tracer_tracefs(&global_trace, d_tracer);
8141        ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8142
8143        trace_create_file("tracing_thresh", 0644, d_tracer,
8144                        &global_trace, &tracing_thresh_fops);
8145
8146        trace_create_file("README", 0444, d_tracer,
8147                        NULL, &tracing_readme_fops);
8148
8149        trace_create_file("saved_cmdlines", 0444, d_tracer,
8150                        NULL, &tracing_saved_cmdlines_fops);
8151
8152        trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8153                          NULL, &tracing_saved_cmdlines_size_fops);
8154
8155        trace_create_file("saved_tgids", 0444, d_tracer,
8156                        NULL, &tracing_saved_tgids_fops);
8157
8158        trace_eval_init();
8159
8160        trace_create_eval_file(d_tracer);
8161
8162#ifdef CONFIG_MODULES
8163        register_module_notifier(&trace_module_nb);
8164#endif
8165
8166#ifdef CONFIG_DYNAMIC_FTRACE
8167        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8168                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8169#endif
8170
8171        create_trace_instances(d_tracer);
8172
8173        update_tracer_options(&global_trace);
8174
8175        return 0;
8176}
8177
8178static int trace_panic_handler(struct notifier_block *this,
8179                               unsigned long event, void *unused)
8180{
8181        if (ftrace_dump_on_oops)
8182                ftrace_dump(ftrace_dump_on_oops);
8183        return NOTIFY_OK;
8184}
8185
8186static struct notifier_block trace_panic_notifier = {
8187        .notifier_call  = trace_panic_handler,
8188        .next           = NULL,
8189        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8190};
8191
8192static int trace_die_handler(struct notifier_block *self,
8193                             unsigned long val,
8194                             void *data)
8195{
8196        switch (val) {
8197        case DIE_OOPS:
8198                if (ftrace_dump_on_oops)
8199                        ftrace_dump(ftrace_dump_on_oops);
8200                break;
8201        default:
8202                break;
8203        }
8204        return NOTIFY_OK;
8205}
8206
8207static struct notifier_block trace_die_notifier = {
8208        .notifier_call = trace_die_handler,
8209        .priority = 200
8210};
8211
8212/*
8213 * printk is set to max of 1024, we really don't need it that big.
8214 * Nothing should be printing 1000 characters anyway.
8215 */
8216#define TRACE_MAX_PRINT         1000
8217
8218/*
8219 * Define here KERN_TRACE so that we have one place to modify
8220 * it if we decide to change what log level the ftrace dump
8221 * should be at.
8222 */
8223#define KERN_TRACE              KERN_EMERG
8224
8225void
8226trace_printk_seq(struct trace_seq *s)
8227{
8228        /* Probably should print a warning here. */
8229        if (s->seq.len >= TRACE_MAX_PRINT)
8230                s->seq.len = TRACE_MAX_PRINT;
8231
8232        /*
8233         * More paranoid code. Although the buffer size is set to
8234         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8235         * an extra layer of protection.
8236         */
8237        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8238                s->seq.len = s->seq.size - 1;
8239
8240        /* should be zero ended, but we are paranoid. */
8241        s->buffer[s->seq.len] = 0;
8242
8243        printk(KERN_TRACE "%s", s->buffer);
8244
8245        trace_seq_init(s);
8246}
8247
8248void trace_init_global_iter(struct trace_iterator *iter)
8249{
8250        iter->tr = &global_trace;
8251        iter->trace = iter->tr->current_trace;
8252        iter->cpu_file = RING_BUFFER_ALL_CPUS;
8253        iter->trace_buffer = &global_trace.trace_buffer;
8254
8255        if (iter->trace && iter->trace->open)
8256                iter->trace->open(iter);
8257
8258        /* Annotate start of buffers if we had overruns */
8259        if (ring_buffer_overruns(iter->trace_buffer->buffer))
8260                iter->iter_flags |= TRACE_FILE_ANNOTATE;
8261
8262        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8263        if (trace_clocks[iter->tr->clock_id].in_ns)
8264                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8265}
8266
8267void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8268{
8269        /* use static because iter can be a bit big for the stack */
8270        static struct trace_iterator iter;
8271        static atomic_t dump_running;
8272        struct trace_array *tr = &global_trace;
8273        unsigned int old_userobj;
8274        unsigned long flags;
8275        int cnt = 0, cpu;
8276
8277        /* Only allow one dump user at a time. */
8278        if (atomic_inc_return(&dump_running) != 1) {
8279                atomic_dec(&dump_running);
8280                return;
8281        }
8282
8283        /*
8284         * Always turn off tracing when we dump.
8285         * We don't need to show trace output of what happens
8286         * between multiple crashes.
8287         *
8288         * If the user does a sysrq-z, then they can re-enable
8289         * tracing with echo 1 > tracing_on.
8290         */
8291        tracing_off();
8292
8293        local_irq_save(flags);
8294        printk_nmi_direct_enter();
8295
8296        /* Simulate the iterator */
8297        trace_init_global_iter(&iter);
8298
8299        for_each_tracing_cpu(cpu) {
8300                atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8301        }
8302
8303        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8304
8305        /* don't look at user memory in panic mode */
8306        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8307
8308        switch (oops_dump_mode) {
8309        case DUMP_ALL:
8310                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8311                break;
8312        case DUMP_ORIG:
8313                iter.cpu_file = raw_smp_processor_id();
8314                break;
8315        case DUMP_NONE:
8316                goto out_enable;
8317        default:
8318                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8319                iter.cpu_file = RING_BUFFER_ALL_CPUS;
8320        }
8321
8322        printk(KERN_TRACE "Dumping ftrace buffer:\n");
8323
8324        /* Did function tracer already get disabled? */
8325        if (ftrace_is_dead()) {
8326                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8327                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8328        }
8329
8330        /*
8331         * We need to stop all tracing on all CPUS to read the
8332         * the next buffer. This is a bit expensive, but is
8333         * not done often. We fill all what we can read,
8334         * and then release the locks again.
8335         */
8336
8337        while (!trace_empty(&iter)) {
8338
8339                if (!cnt)
8340                        printk(KERN_TRACE "---------------------------------\n");
8341
8342                cnt++;
8343
8344                /* reset all but tr, trace, and overruns */
8345                memset(&iter.seq, 0,
8346                       sizeof(struct trace_iterator) -
8347                       offsetof(struct trace_iterator, seq));
8348                iter.iter_flags |= TRACE_FILE_LAT_FMT;
8349                iter.pos = -1;
8350
8351                if (trace_find_next_entry_inc(&iter) != NULL) {
8352                        int ret;
8353
8354                        ret = print_trace_line(&iter);
8355                        if (ret != TRACE_TYPE_NO_CONSUME)
8356                                trace_consume(&iter);
8357                }
8358                touch_nmi_watchdog();
8359
8360                trace_printk_seq(&iter.seq);
8361        }
8362
8363        if (!cnt)
8364                printk(KERN_TRACE "   (ftrace buffer empty)\n");
8365        else
8366                printk(KERN_TRACE "---------------------------------\n");
8367
8368 out_enable:
8369        tr->trace_flags |= old_userobj;
8370
8371        for_each_tracing_cpu(cpu) {
8372                atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8373        }
8374        atomic_dec(&dump_running);
8375        printk_nmi_direct_exit();
8376        local_irq_restore(flags);
8377}
8378EXPORT_SYMBOL_GPL(ftrace_dump);
8379
8380int trace_run_command(const char *buf, int (*createfn)(int, char **))
8381{
8382        char **argv;
8383        int argc, ret;
8384
8385        argc = 0;
8386        ret = 0;
8387        argv = argv_split(GFP_KERNEL, buf, &argc);
8388        if (!argv)
8389                return -ENOMEM;
8390
8391        if (argc)
8392                ret = createfn(argc, argv);
8393
8394        argv_free(argv);
8395
8396        return ret;
8397}
8398
8399#define WRITE_BUFSIZE  4096
8400
8401ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8402                                size_t count, loff_t *ppos,
8403                                int (*createfn)(int, char **))
8404{
8405        char *kbuf, *buf, *tmp;
8406        int ret = 0;
8407        size_t done = 0;
8408        size_t size;
8409
8410        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8411        if (!kbuf)
8412                return -ENOMEM;
8413
8414        while (done < count) {
8415                size = count - done;
8416
8417                if (size >= WRITE_BUFSIZE)
8418                        size = WRITE_BUFSIZE - 1;
8419
8420                if (copy_from_user(kbuf, buffer + done, size)) {
8421                        ret = -EFAULT;
8422                        goto out;
8423                }
8424                kbuf[size] = '\0';
8425                buf = kbuf;
8426                do {
8427                        tmp = strchr(buf, '\n');
8428                        if (tmp) {
8429                                *tmp = '\0';
8430                                size = tmp - buf + 1;
8431                        } else {
8432                                size = strlen(buf);
8433                                if (done + size < count) {
8434                                        if (buf != kbuf)
8435                                                break;
8436                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8437                                        pr_warn("Line length is too long: Should be less than %d\n",
8438                                                WRITE_BUFSIZE - 2);
8439                                        ret = -EINVAL;
8440                                        goto out;
8441                                }
8442                        }
8443                        done += size;
8444
8445                        /* Remove comments */
8446                        tmp = strchr(buf, '#');
8447
8448                        if (tmp)
8449                                *tmp = '\0';
8450
8451                        ret = trace_run_command(buf, createfn);
8452                        if (ret)
8453                                goto out;
8454                        buf += size;
8455
8456                } while (done < count);
8457        }
8458        ret = done;
8459
8460out:
8461        kfree(kbuf);
8462
8463        return ret;
8464}
8465
8466__init static int tracer_alloc_buffers(void)
8467{
8468        int ring_buf_size;
8469        int ret = -ENOMEM;
8470
8471        /*
8472         * Make sure we don't accidently add more trace options
8473         * than we have bits for.
8474         */
8475        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8476
8477        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8478                goto out;
8479
8480        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8481                goto out_free_buffer_mask;
8482
8483        /* Only allocate trace_printk buffers if a trace_printk exists */
8484        if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8485                /* Must be called before global_trace.buffer is allocated */
8486                trace_printk_init_buffers();
8487
8488        /* To save memory, keep the ring buffer size to its minimum */
8489        if (ring_buffer_expanded)
8490                ring_buf_size = trace_buf_size;
8491        else
8492                ring_buf_size = 1;
8493
8494        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8495        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8496
8497        raw_spin_lock_init(&global_trace.start_lock);
8498
8499        /*
8500         * The prepare callbacks allocates some memory for the ring buffer. We
8501         * don't free the buffer if the if the CPU goes down. If we were to free
8502         * the buffer, then the user would lose any trace that was in the
8503         * buffer. The memory will be removed once the "instance" is removed.
8504         */
8505        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8506                                      "trace/RB:preapre", trace_rb_cpu_prepare,
8507                                      NULL);
8508        if (ret < 0)
8509                goto out_free_cpumask;
8510        /* Used for event triggers */
8511        ret = -ENOMEM;
8512        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8513        if (!temp_buffer)
8514                goto out_rm_hp_state;
8515
8516        if (trace_create_savedcmd() < 0)
8517                goto out_free_temp_buffer;
8518
8519        /* TODO: make the number of buffers hot pluggable with CPUS */
8520        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8521                printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8522                WARN_ON(1);
8523                goto out_free_savedcmd;
8524        }
8525
8526        if (global_trace.buffer_disabled)
8527                tracing_off();
8528
8529        if (trace_boot_clock) {
8530                ret = tracing_set_clock(&global_trace, trace_boot_clock);
8531                if (ret < 0)
8532                        pr_warn("Trace clock %s not defined, going back to default\n",
8533                                trace_boot_clock);
8534        }
8535
8536        /*
8537         * register_tracer() might reference current_trace, so it
8538         * needs to be set before we register anything. This is
8539         * just a bootstrap of current_trace anyway.
8540         */
8541        global_trace.current_trace = &nop_trace;
8542
8543        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8544
8545        ftrace_init_global_array_ops(&global_trace);
8546
8547        init_trace_flags_index(&global_trace);
8548
8549        register_tracer(&nop_trace);
8550
8551        /* Function tracing may start here (via kernel command line) */
8552        init_function_trace();
8553
8554        /* All seems OK, enable tracing */
8555        tracing_disabled = 0;
8556
8557        atomic_notifier_chain_register(&panic_notifier_list,
8558                                       &trace_panic_notifier);
8559
8560        register_die_notifier(&trace_die_notifier);
8561
8562        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8563
8564        INIT_LIST_HEAD(&global_trace.systems);
8565        INIT_LIST_HEAD(&global_trace.events);
8566        INIT_LIST_HEAD(&global_trace.hist_vars);
8567        list_add(&global_trace.list, &ftrace_trace_arrays);
8568
8569        apply_trace_boot_options();
8570
8571        register_snapshot_cmd();
8572
8573        return 0;
8574
8575out_free_savedcmd:
8576        free_saved_cmdlines_buffer(savedcmd);
8577out_free_temp_buffer:
8578        ring_buffer_free(temp_buffer);
8579out_rm_hp_state:
8580        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8581out_free_cpumask:
8582        free_cpumask_var(global_trace.tracing_cpumask);
8583out_free_buffer_mask:
8584        free_cpumask_var(tracing_buffer_mask);
8585out:
8586        return ret;
8587}
8588
8589void __init early_trace_init(void)
8590{
8591        if (tracepoint_printk) {
8592                tracepoint_print_iter =
8593                        kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8594                if (WARN_ON(!tracepoint_print_iter))
8595                        tracepoint_printk = 0;
8596                else
8597                        static_key_enable(&tracepoint_printk_key.key);
8598        }
8599        tracer_alloc_buffers();
8600}
8601
8602void __init trace_init(void)
8603{
8604        trace_event_init();
8605}
8606
8607__init static int clear_boot_tracer(void)
8608{
8609        /*
8610         * The default tracer at boot buffer is an init section.
8611         * This function is called in lateinit. If we did not
8612         * find the boot tracer, then clear it out, to prevent
8613         * later registration from accessing the buffer that is
8614         * about to be freed.
8615         */
8616        if (!default_bootup_tracer)
8617                return 0;
8618
8619        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8620               default_bootup_tracer);
8621        default_bootup_tracer = NULL;
8622
8623        return 0;
8624}
8625
8626fs_initcall(tracer_init_tracefs);
8627late_initcall_sync(clear_boot_tracer);
8628
8629#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8630__init static int tracing_set_default_clock(void)
8631{
8632        /* sched_clock_stable() is determined in late_initcall */
8633        if (!trace_boot_clock && !sched_clock_stable()) {
8634                printk(KERN_WARNING
8635                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
8636                       "If you want to keep using the local clock, then add:\n"
8637                       "  \"trace_clock=local\"\n"
8638                       "on the kernel command line\n");
8639                tracing_set_clock(&global_trace, "global");
8640        }
8641
8642        return 0;
8643}
8644late_initcall_sync(tracing_set_default_clock);
8645#endif
8646