linux/kernel/trace/trace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * ring buffer based function tracer
   4 *
   5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
   6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
   7 *
   8 * Originally taken from the RT patch by:
   9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
  10 *
  11 * Based on code from the latency_tracer, that is:
  12 *  Copyright (C) 2004-2006 Ingo Molnar
  13 *  Copyright (C) 2004 Nadia Yvette Chambers
  14 */
  15#include <linux/ring_buffer.h>
  16#include <generated/utsrelease.h>
  17#include <linux/stacktrace.h>
  18#include <linux/writeback.h>
  19#include <linux/kallsyms.h>
  20#include <linux/security.h>
  21#include <linux/seq_file.h>
  22#include <linux/notifier.h>
  23#include <linux/irqflags.h>
  24#include <linux/debugfs.h>
  25#include <linux/tracefs.h>
  26#include <linux/pagemap.h>
  27#include <linux/hardirq.h>
  28#include <linux/linkage.h>
  29#include <linux/uaccess.h>
  30#include <linux/vmalloc.h>
  31#include <linux/ftrace.h>
  32#include <linux/module.h>
  33#include <linux/percpu.h>
  34#include <linux/splice.h>
  35#include <linux/kdebug.h>
  36#include <linux/string.h>
  37#include <linux/mount.h>
  38#include <linux/rwsem.h>
  39#include <linux/slab.h>
  40#include <linux/ctype.h>
  41#include <linux/init.h>
  42#include <linux/poll.h>
  43#include <linux/nmi.h>
  44#include <linux/fs.h>
  45#include <linux/trace.h>
  46#include <linux/sched/clock.h>
  47#include <linux/sched/rt.h>
  48#include <linux/fsnotify.h>
  49#include <linux/irq_work.h>
  50#include <linux/workqueue.h>
  51
  52#include "trace.h"
  53#include "trace_output.h"
  54
  55/*
  56 * On boot up, the ring buffer is set to the minimum size, so that
  57 * we do not waste memory on systems that are not using tracing.
  58 */
  59bool ring_buffer_expanded;
  60
  61/*
  62 * We need to change this state when a selftest is running.
  63 * A selftest will lurk into the ring-buffer to count the
  64 * entries inserted during the selftest although some concurrent
  65 * insertions into the ring-buffer such as trace_printk could occurred
  66 * at the same time, giving false positive or negative results.
  67 */
  68static bool __read_mostly tracing_selftest_running;
  69
  70/*
  71 * If a tracer is running, we do not want to run SELFTEST.
  72 */
  73bool __read_mostly tracing_selftest_disabled;
  74
  75/* Pipe tracepoints to printk */
  76struct trace_iterator *tracepoint_print_iter;
  77int tracepoint_printk;
  78static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
  79
  80/* For tracers that don't implement custom flags */
  81static struct tracer_opt dummy_tracer_opt[] = {
  82        { }
  83};
  84
  85static int
  86dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  87{
  88        return 0;
  89}
  90
  91/*
  92 * To prevent the comm cache from being overwritten when no
  93 * tracing is active, only save the comm when a trace event
  94 * occurred.
  95 */
  96static DEFINE_PER_CPU(bool, trace_taskinfo_save);
  97
  98/*
  99 * Kill all tracing for good (never come back).
 100 * It is initialized to 1 but will turn to zero if the initialization
 101 * of the tracer is successful. But that is the only place that sets
 102 * this back to zero.
 103 */
 104static int tracing_disabled = 1;
 105
 106cpumask_var_t __read_mostly     tracing_buffer_mask;
 107
 108/*
 109 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 110 *
 111 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 112 * is set, then ftrace_dump is called. This will output the contents
 113 * of the ftrace buffers to the console.  This is very useful for
 114 * capturing traces that lead to crashes and outputing it to a
 115 * serial console.
 116 *
 117 * It is default off, but you can enable it with either specifying
 118 * "ftrace_dump_on_oops" in the kernel command line, or setting
 119 * /proc/sys/kernel/ftrace_dump_on_oops
 120 * Set 1 if you want to dump buffers of all CPUs
 121 * Set 2 if you want to dump the buffer of the CPU that triggered oops
 122 */
 123
 124enum ftrace_dump_mode ftrace_dump_on_oops;
 125
 126/* When set, tracing will stop when a WARN*() is hit */
 127int __disable_trace_on_warning;
 128
 129#ifdef CONFIG_TRACE_EVAL_MAP_FILE
 130/* Map of enums to their values, for "eval_map" file */
 131struct trace_eval_map_head {
 132        struct module                   *mod;
 133        unsigned long                   length;
 134};
 135
 136union trace_eval_map_item;
 137
 138struct trace_eval_map_tail {
 139        /*
 140         * "end" is first and points to NULL as it must be different
 141         * than "mod" or "eval_string"
 142         */
 143        union trace_eval_map_item       *next;
 144        const char                      *end;   /* points to NULL */
 145};
 146
 147static DEFINE_MUTEX(trace_eval_mutex);
 148
 149/*
 150 * The trace_eval_maps are saved in an array with two extra elements,
 151 * one at the beginning, and one at the end. The beginning item contains
 152 * the count of the saved maps (head.length), and the module they
 153 * belong to if not built in (head.mod). The ending item contains a
 154 * pointer to the next array of saved eval_map items.
 155 */
 156union trace_eval_map_item {
 157        struct trace_eval_map           map;
 158        struct trace_eval_map_head      head;
 159        struct trace_eval_map_tail      tail;
 160};
 161
 162static union trace_eval_map_item *trace_eval_maps;
 163#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 164
 165int tracing_set_tracer(struct trace_array *tr, const char *buf);
 166static void ftrace_trace_userstack(struct trace_buffer *buffer,
 167                                   unsigned long flags, int pc);
 168
 169#define MAX_TRACER_SIZE         100
 170static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 171static char *default_bootup_tracer;
 172
 173static bool allocate_snapshot;
 174
 175static int __init set_cmdline_ftrace(char *str)
 176{
 177        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
 178        default_bootup_tracer = bootup_tracer_buf;
 179        /* We are using ftrace early, expand it */
 180        ring_buffer_expanded = true;
 181        return 1;
 182}
 183__setup("ftrace=", set_cmdline_ftrace);
 184
 185static int __init set_ftrace_dump_on_oops(char *str)
 186{
 187        if (*str++ != '=' || !*str) {
 188                ftrace_dump_on_oops = DUMP_ALL;
 189                return 1;
 190        }
 191
 192        if (!strcmp("orig_cpu", str)) {
 193                ftrace_dump_on_oops = DUMP_ORIG;
 194                return 1;
 195        }
 196
 197        return 0;
 198}
 199__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 200
 201static int __init stop_trace_on_warning(char *str)
 202{
 203        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 204                __disable_trace_on_warning = 1;
 205        return 1;
 206}
 207__setup("traceoff_on_warning", stop_trace_on_warning);
 208
 209static int __init boot_alloc_snapshot(char *str)
 210{
 211        allocate_snapshot = true;
 212        /* We also need the main ring buffer expanded */
 213        ring_buffer_expanded = true;
 214        return 1;
 215}
 216__setup("alloc_snapshot", boot_alloc_snapshot);
 217
 218
 219static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 220
 221static int __init set_trace_boot_options(char *str)
 222{
 223        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
 224        return 0;
 225}
 226__setup("trace_options=", set_trace_boot_options);
 227
 228static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 229static char *trace_boot_clock __initdata;
 230
 231static int __init set_trace_boot_clock(char *str)
 232{
 233        strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 234        trace_boot_clock = trace_boot_clock_buf;
 235        return 0;
 236}
 237__setup("trace_clock=", set_trace_boot_clock);
 238
 239static int __init set_tracepoint_printk(char *str)
 240{
 241        if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 242                tracepoint_printk = 1;
 243        return 1;
 244}
 245__setup("tp_printk", set_tracepoint_printk);
 246
 247unsigned long long ns2usecs(u64 nsec)
 248{
 249        nsec += 500;
 250        do_div(nsec, 1000);
 251        return nsec;
 252}
 253
 254/* trace_flags holds trace_options default values */
 255#define TRACE_DEFAULT_FLAGS                                             \
 256        (FUNCTION_DEFAULT_FLAGS |                                       \
 257         TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
 258         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
 259         TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
 260         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
 261
 262/* trace_options that are only supported by global_trace */
 263#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
 264               TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
 265
 266/* trace_flags that are default zero for instances */
 267#define ZEROED_TRACE_FLAGS \
 268        (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
 269
 270/*
 271 * The global_trace is the descriptor that holds the top-level tracing
 272 * buffers for the live tracing.
 273 */
 274static struct trace_array global_trace = {
 275        .trace_flags = TRACE_DEFAULT_FLAGS,
 276};
 277
 278LIST_HEAD(ftrace_trace_arrays);
 279
 280int trace_array_get(struct trace_array *this_tr)
 281{
 282        struct trace_array *tr;
 283        int ret = -ENODEV;
 284
 285        mutex_lock(&trace_types_lock);
 286        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 287                if (tr == this_tr) {
 288                        tr->ref++;
 289                        ret = 0;
 290                        break;
 291                }
 292        }
 293        mutex_unlock(&trace_types_lock);
 294
 295        return ret;
 296}
 297
 298static void __trace_array_put(struct trace_array *this_tr)
 299{
 300        WARN_ON(!this_tr->ref);
 301        this_tr->ref--;
 302}
 303
 304/**
 305 * trace_array_put - Decrement the reference counter for this trace array.
 306 *
 307 * NOTE: Use this when we no longer need the trace array returned by
 308 * trace_array_get_by_name(). This ensures the trace array can be later
 309 * destroyed.
 310 *
 311 */
 312void trace_array_put(struct trace_array *this_tr)
 313{
 314        if (!this_tr)
 315                return;
 316
 317        mutex_lock(&trace_types_lock);
 318        __trace_array_put(this_tr);
 319        mutex_unlock(&trace_types_lock);
 320}
 321EXPORT_SYMBOL_GPL(trace_array_put);
 322
 323int tracing_check_open_get_tr(struct trace_array *tr)
 324{
 325        int ret;
 326
 327        ret = security_locked_down(LOCKDOWN_TRACEFS);
 328        if (ret)
 329                return ret;
 330
 331        if (tracing_disabled)
 332                return -ENODEV;
 333
 334        if (tr && trace_array_get(tr) < 0)
 335                return -ENODEV;
 336
 337        return 0;
 338}
 339
 340int call_filter_check_discard(struct trace_event_call *call, void *rec,
 341                              struct trace_buffer *buffer,
 342                              struct ring_buffer_event *event)
 343{
 344        if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
 345            !filter_match_preds(call->filter, rec)) {
 346                __trace_event_discard_commit(buffer, event);
 347                return 1;
 348        }
 349
 350        return 0;
 351}
 352
 353void trace_free_pid_list(struct trace_pid_list *pid_list)
 354{
 355        vfree(pid_list->pids);
 356        kfree(pid_list);
 357}
 358
 359/**
 360 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
 361 * @filtered_pids: The list of pids to check
 362 * @search_pid: The PID to find in @filtered_pids
 363 *
 364 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
 365 */
 366bool
 367trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
 368{
 369        /*
 370         * If pid_max changed after filtered_pids was created, we
 371         * by default ignore all pids greater than the previous pid_max.
 372         */
 373        if (search_pid >= filtered_pids->pid_max)
 374                return false;
 375
 376        return test_bit(search_pid, filtered_pids->pids);
 377}
 378
 379/**
 380 * trace_ignore_this_task - should a task be ignored for tracing
 381 * @filtered_pids: The list of pids to check
 382 * @task: The task that should be ignored if not filtered
 383 *
 384 * Checks if @task should be traced or not from @filtered_pids.
 385 * Returns true if @task should *NOT* be traced.
 386 * Returns false if @task should be traced.
 387 */
 388bool
 389trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
 390{
 391        /*
 392         * Return false, because if filtered_pids does not exist,
 393         * all pids are good to trace.
 394         */
 395        if (!filtered_pids)
 396                return false;
 397
 398        return !trace_find_filtered_pid(filtered_pids, task->pid);
 399}
 400
 401/**
 402 * trace_filter_add_remove_task - Add or remove a task from a pid_list
 403 * @pid_list: The list to modify
 404 * @self: The current task for fork or NULL for exit
 405 * @task: The task to add or remove
 406 *
 407 * If adding a task, if @self is defined, the task is only added if @self
 408 * is also included in @pid_list. This happens on fork and tasks should
 409 * only be added when the parent is listed. If @self is NULL, then the
 410 * @task pid will be removed from the list, which would happen on exit
 411 * of a task.
 412 */
 413void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
 414                                  struct task_struct *self,
 415                                  struct task_struct *task)
 416{
 417        if (!pid_list)
 418                return;
 419
 420        /* For forks, we only add if the forking task is listed */
 421        if (self) {
 422                if (!trace_find_filtered_pid(pid_list, self->pid))
 423                        return;
 424        }
 425
 426        /* Sorry, but we don't support pid_max changing after setting */
 427        if (task->pid >= pid_list->pid_max)
 428                return;
 429
 430        /* "self" is set for forks, and NULL for exits */
 431        if (self)
 432                set_bit(task->pid, pid_list->pids);
 433        else
 434                clear_bit(task->pid, pid_list->pids);
 435}
 436
 437/**
 438 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
 439 * @pid_list: The pid list to show
 440 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
 441 * @pos: The position of the file
 442 *
 443 * This is used by the seq_file "next" operation to iterate the pids
 444 * listed in a trace_pid_list structure.
 445 *
 446 * Returns the pid+1 as we want to display pid of zero, but NULL would
 447 * stop the iteration.
 448 */
 449void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 450{
 451        unsigned long pid = (unsigned long)v;
 452
 453        (*pos)++;
 454
 455        /* pid already is +1 of the actual prevous bit */
 456        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 457
 458        /* Return pid + 1 to allow zero to be represented */
 459        if (pid < pid_list->pid_max)
 460                return (void *)(pid + 1);
 461
 462        return NULL;
 463}
 464
 465/**
 466 * trace_pid_start - Used for seq_file to start reading pid lists
 467 * @pid_list: The pid list to show
 468 * @pos: The position of the file
 469 *
 470 * This is used by seq_file "start" operation to start the iteration
 471 * of listing pids.
 472 *
 473 * Returns the pid+1 as we want to display pid of zero, but NULL would
 474 * stop the iteration.
 475 */
 476void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
 477{
 478        unsigned long pid;
 479        loff_t l = 0;
 480
 481        pid = find_first_bit(pid_list->pids, pid_list->pid_max);
 482        if (pid >= pid_list->pid_max)
 483                return NULL;
 484
 485        /* Return pid + 1 so that zero can be the exit value */
 486        for (pid++; pid && l < *pos;
 487             pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
 488                ;
 489        return (void *)pid;
 490}
 491
 492/**
 493 * trace_pid_show - show the current pid in seq_file processing
 494 * @m: The seq_file structure to write into
 495 * @v: A void pointer of the pid (+1) value to display
 496 *
 497 * Can be directly used by seq_file operations to display the current
 498 * pid value.
 499 */
 500int trace_pid_show(struct seq_file *m, void *v)
 501{
 502        unsigned long pid = (unsigned long)v - 1;
 503
 504        seq_printf(m, "%lu\n", pid);
 505        return 0;
 506}
 507
 508/* 128 should be much more than enough */
 509#define PID_BUF_SIZE            127
 510
 511int trace_pid_write(struct trace_pid_list *filtered_pids,
 512                    struct trace_pid_list **new_pid_list,
 513                    const char __user *ubuf, size_t cnt)
 514{
 515        struct trace_pid_list *pid_list;
 516        struct trace_parser parser;
 517        unsigned long val;
 518        int nr_pids = 0;
 519        ssize_t read = 0;
 520        ssize_t ret = 0;
 521        loff_t pos;
 522        pid_t pid;
 523
 524        if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
 525                return -ENOMEM;
 526
 527        /*
 528         * Always recreate a new array. The write is an all or nothing
 529         * operation. Always create a new array when adding new pids by
 530         * the user. If the operation fails, then the current list is
 531         * not modified.
 532         */
 533        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
 534        if (!pid_list) {
 535                trace_parser_put(&parser);
 536                return -ENOMEM;
 537        }
 538
 539        pid_list->pid_max = READ_ONCE(pid_max);
 540
 541        /* Only truncating will shrink pid_max */
 542        if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
 543                pid_list->pid_max = filtered_pids->pid_max;
 544
 545        pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 546        if (!pid_list->pids) {
 547                trace_parser_put(&parser);
 548                kfree(pid_list);
 549                return -ENOMEM;
 550        }
 551
 552        if (filtered_pids) {
 553                /* copy the current bits to the new max */
 554                for_each_set_bit(pid, filtered_pids->pids,
 555                                 filtered_pids->pid_max) {
 556                        set_bit(pid, pid_list->pids);
 557                        nr_pids++;
 558                }
 559        }
 560
 561        while (cnt > 0) {
 562
 563                pos = 0;
 564
 565                ret = trace_get_user(&parser, ubuf, cnt, &pos);
 566                if (ret < 0 || !trace_parser_loaded(&parser))
 567                        break;
 568
 569                read += ret;
 570                ubuf += ret;
 571                cnt -= ret;
 572
 573                ret = -EINVAL;
 574                if (kstrtoul(parser.buffer, 0, &val))
 575                        break;
 576                if (val >= pid_list->pid_max)
 577                        break;
 578
 579                pid = (pid_t)val;
 580
 581                set_bit(pid, pid_list->pids);
 582                nr_pids++;
 583
 584                trace_parser_clear(&parser);
 585                ret = 0;
 586        }
 587        trace_parser_put(&parser);
 588
 589        if (ret < 0) {
 590                trace_free_pid_list(pid_list);
 591                return ret;
 592        }
 593
 594        if (!nr_pids) {
 595                /* Cleared the list of pids */
 596                trace_free_pid_list(pid_list);
 597                read = ret;
 598                pid_list = NULL;
 599        }
 600
 601        *new_pid_list = pid_list;
 602
 603        return read;
 604}
 605
 606static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
 607{
 608        u64 ts;
 609
 610        /* Early boot up does not have a buffer yet */
 611        if (!buf->buffer)
 612                return trace_clock_local();
 613
 614        ts = ring_buffer_time_stamp(buf->buffer, cpu);
 615        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 616
 617        return ts;
 618}
 619
 620u64 ftrace_now(int cpu)
 621{
 622        return buffer_ftrace_now(&global_trace.array_buffer, cpu);
 623}
 624
 625/**
 626 * tracing_is_enabled - Show if global_trace has been disabled
 627 *
 628 * Shows if the global trace has been enabled or not. It uses the
 629 * mirror flag "buffer_disabled" to be used in fast paths such as for
 630 * the irqsoff tracer. But it may be inaccurate due to races. If you
 631 * need to know the accurate state, use tracing_is_on() which is a little
 632 * slower, but accurate.
 633 */
 634int tracing_is_enabled(void)
 635{
 636        /*
 637         * For quick access (irqsoff uses this in fast path), just
 638         * return the mirror variable of the state of the ring buffer.
 639         * It's a little racy, but we don't really care.
 640         */
 641        smp_rmb();
 642        return !global_trace.buffer_disabled;
 643}
 644
 645/*
 646 * trace_buf_size is the size in bytes that is allocated
 647 * for a buffer. Note, the number of bytes is always rounded
 648 * to page size.
 649 *
 650 * This number is purposely set to a low number of 16384.
 651 * If the dump on oops happens, it will be much appreciated
 652 * to not have to wait for all that output. Anyway this can be
 653 * boot time and run time configurable.
 654 */
 655#define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
 656
 657static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 658
 659/* trace_types holds a link list of available tracers. */
 660static struct tracer            *trace_types __read_mostly;
 661
 662/*
 663 * trace_types_lock is used to protect the trace_types list.
 664 */
 665DEFINE_MUTEX(trace_types_lock);
 666
 667/*
 668 * serialize the access of the ring buffer
 669 *
 670 * ring buffer serializes readers, but it is low level protection.
 671 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 672 * are not protected by ring buffer.
 673 *
 674 * The content of events may become garbage if we allow other process consumes
 675 * these events concurrently:
 676 *   A) the page of the consumed events may become a normal page
 677 *      (not reader page) in ring buffer, and this page will be rewrited
 678 *      by events producer.
 679 *   B) The page of the consumed events may become a page for splice_read,
 680 *      and this page will be returned to system.
 681 *
 682 * These primitives allow multi process access to different cpu ring buffer
 683 * concurrently.
 684 *
 685 * These primitives don't distinguish read-only and read-consume access.
 686 * Multi read-only access are also serialized.
 687 */
 688
 689#ifdef CONFIG_SMP
 690static DECLARE_RWSEM(all_cpu_access_lock);
 691static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
 692
 693static inline void trace_access_lock(int cpu)
 694{
 695        if (cpu == RING_BUFFER_ALL_CPUS) {
 696                /* gain it for accessing the whole ring buffer. */
 697                down_write(&all_cpu_access_lock);
 698        } else {
 699                /* gain it for accessing a cpu ring buffer. */
 700
 701                /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
 702                down_read(&all_cpu_access_lock);
 703
 704                /* Secondly block other access to this @cpu ring buffer. */
 705                mutex_lock(&per_cpu(cpu_access_lock, cpu));
 706        }
 707}
 708
 709static inline void trace_access_unlock(int cpu)
 710{
 711        if (cpu == RING_BUFFER_ALL_CPUS) {
 712                up_write(&all_cpu_access_lock);
 713        } else {
 714                mutex_unlock(&per_cpu(cpu_access_lock, cpu));
 715                up_read(&all_cpu_access_lock);
 716        }
 717}
 718
 719static inline void trace_access_lock_init(void)
 720{
 721        int cpu;
 722
 723        for_each_possible_cpu(cpu)
 724                mutex_init(&per_cpu(cpu_access_lock, cpu));
 725}
 726
 727#else
 728
 729static DEFINE_MUTEX(access_lock);
 730
 731static inline void trace_access_lock(int cpu)
 732{
 733        (void)cpu;
 734        mutex_lock(&access_lock);
 735}
 736
 737static inline void trace_access_unlock(int cpu)
 738{
 739        (void)cpu;
 740        mutex_unlock(&access_lock);
 741}
 742
 743static inline void trace_access_lock_init(void)
 744{
 745}
 746
 747#endif
 748
 749#ifdef CONFIG_STACKTRACE
 750static void __ftrace_trace_stack(struct trace_buffer *buffer,
 751                                 unsigned long flags,
 752                                 int skip, int pc, struct pt_regs *regs);
 753static inline void ftrace_trace_stack(struct trace_array *tr,
 754                                      struct trace_buffer *buffer,
 755                                      unsigned long flags,
 756                                      int skip, int pc, struct pt_regs *regs);
 757
 758#else
 759static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
 760                                        unsigned long flags,
 761                                        int skip, int pc, struct pt_regs *regs)
 762{
 763}
 764static inline void ftrace_trace_stack(struct trace_array *tr,
 765                                      struct trace_buffer *buffer,
 766                                      unsigned long flags,
 767                                      int skip, int pc, struct pt_regs *regs)
 768{
 769}
 770
 771#endif
 772
 773static __always_inline void
 774trace_event_setup(struct ring_buffer_event *event,
 775                  int type, unsigned long flags, int pc)
 776{
 777        struct trace_entry *ent = ring_buffer_event_data(event);
 778
 779        tracing_generic_entry_update(ent, type, flags, pc);
 780}
 781
 782static __always_inline struct ring_buffer_event *
 783__trace_buffer_lock_reserve(struct trace_buffer *buffer,
 784                          int type,
 785                          unsigned long len,
 786                          unsigned long flags, int pc)
 787{
 788        struct ring_buffer_event *event;
 789
 790        event = ring_buffer_lock_reserve(buffer, len);
 791        if (event != NULL)
 792                trace_event_setup(event, type, flags, pc);
 793
 794        return event;
 795}
 796
 797void tracer_tracing_on(struct trace_array *tr)
 798{
 799        if (tr->array_buffer.buffer)
 800                ring_buffer_record_on(tr->array_buffer.buffer);
 801        /*
 802         * This flag is looked at when buffers haven't been allocated
 803         * yet, or by some tracers (like irqsoff), that just want to
 804         * know if the ring buffer has been disabled, but it can handle
 805         * races of where it gets disabled but we still do a record.
 806         * As the check is in the fast path of the tracers, it is more
 807         * important to be fast than accurate.
 808         */
 809        tr->buffer_disabled = 0;
 810        /* Make the flag seen by readers */
 811        smp_wmb();
 812}
 813
 814/**
 815 * tracing_on - enable tracing buffers
 816 *
 817 * This function enables tracing buffers that may have been
 818 * disabled with tracing_off.
 819 */
 820void tracing_on(void)
 821{
 822        tracer_tracing_on(&global_trace);
 823}
 824EXPORT_SYMBOL_GPL(tracing_on);
 825
 826
 827static __always_inline void
 828__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 829{
 830        __this_cpu_write(trace_taskinfo_save, true);
 831
 832        /* If this is the temp buffer, we need to commit fully */
 833        if (this_cpu_read(trace_buffered_event) == event) {
 834                /* Length is in event->array[0] */
 835                ring_buffer_write(buffer, event->array[0], &event->array[1]);
 836                /* Release the temp buffer */
 837                this_cpu_dec(trace_buffered_event_cnt);
 838        } else
 839                ring_buffer_unlock_commit(buffer, event);
 840}
 841
 842/**
 843 * __trace_puts - write a constant string into the trace buffer.
 844 * @ip:    The address of the caller
 845 * @str:   The constant string to write
 846 * @size:  The size of the string.
 847 */
 848int __trace_puts(unsigned long ip, const char *str, int size)
 849{
 850        struct ring_buffer_event *event;
 851        struct trace_buffer *buffer;
 852        struct print_entry *entry;
 853        unsigned long irq_flags;
 854        int alloc;
 855        int pc;
 856
 857        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 858                return 0;
 859
 860        pc = preempt_count();
 861
 862        if (unlikely(tracing_selftest_running || tracing_disabled))
 863                return 0;
 864
 865        alloc = sizeof(*entry) + size + 2; /* possible \n added */
 866
 867        local_save_flags(irq_flags);
 868        buffer = global_trace.array_buffer.buffer;
 869        ring_buffer_nest_start(buffer);
 870        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
 871                                            irq_flags, pc);
 872        if (!event) {
 873                size = 0;
 874                goto out;
 875        }
 876
 877        entry = ring_buffer_event_data(event);
 878        entry->ip = ip;
 879
 880        memcpy(&entry->buf, str, size);
 881
 882        /* Add a newline if necessary */
 883        if (entry->buf[size - 1] != '\n') {
 884                entry->buf[size] = '\n';
 885                entry->buf[size + 1] = '\0';
 886        } else
 887                entry->buf[size] = '\0';
 888
 889        __buffer_unlock_commit(buffer, event);
 890        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 891 out:
 892        ring_buffer_nest_end(buffer);
 893        return size;
 894}
 895EXPORT_SYMBOL_GPL(__trace_puts);
 896
 897/**
 898 * __trace_bputs - write the pointer to a constant string into trace buffer
 899 * @ip:    The address of the caller
 900 * @str:   The constant string to write to the buffer to
 901 */
 902int __trace_bputs(unsigned long ip, const char *str)
 903{
 904        struct ring_buffer_event *event;
 905        struct trace_buffer *buffer;
 906        struct bputs_entry *entry;
 907        unsigned long irq_flags;
 908        int size = sizeof(struct bputs_entry);
 909        int ret = 0;
 910        int pc;
 911
 912        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
 913                return 0;
 914
 915        pc = preempt_count();
 916
 917        if (unlikely(tracing_selftest_running || tracing_disabled))
 918                return 0;
 919
 920        local_save_flags(irq_flags);
 921        buffer = global_trace.array_buffer.buffer;
 922
 923        ring_buffer_nest_start(buffer);
 924        event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
 925                                            irq_flags, pc);
 926        if (!event)
 927                goto out;
 928
 929        entry = ring_buffer_event_data(event);
 930        entry->ip                       = ip;
 931        entry->str                      = str;
 932
 933        __buffer_unlock_commit(buffer, event);
 934        ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
 935
 936        ret = 1;
 937 out:
 938        ring_buffer_nest_end(buffer);
 939        return ret;
 940}
 941EXPORT_SYMBOL_GPL(__trace_bputs);
 942
 943#ifdef CONFIG_TRACER_SNAPSHOT
 944void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
 945{
 946        struct tracer *tracer = tr->current_trace;
 947        unsigned long flags;
 948
 949        if (in_nmi()) {
 950                internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
 951                internal_trace_puts("*** snapshot is being ignored        ***\n");
 952                return;
 953        }
 954
 955        if (!tr->allocated_snapshot) {
 956                internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
 957                internal_trace_puts("*** stopping trace here!   ***\n");
 958                tracing_off();
 959                return;
 960        }
 961
 962        /* Note, snapshot can not be used when the tracer uses it */
 963        if (tracer->use_max_tr) {
 964                internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
 965                internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
 966                return;
 967        }
 968
 969        local_irq_save(flags);
 970        update_max_tr(tr, current, smp_processor_id(), cond_data);
 971        local_irq_restore(flags);
 972}
 973
 974void tracing_snapshot_instance(struct trace_array *tr)
 975{
 976        tracing_snapshot_instance_cond(tr, NULL);
 977}
 978
 979/**
 980 * tracing_snapshot - take a snapshot of the current buffer.
 981 *
 982 * This causes a swap between the snapshot buffer and the current live
 983 * tracing buffer. You can use this to take snapshots of the live
 984 * trace when some condition is triggered, but continue to trace.
 985 *
 986 * Note, make sure to allocate the snapshot with either
 987 * a tracing_snapshot_alloc(), or by doing it manually
 988 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 989 *
 990 * If the snapshot buffer is not allocated, it will stop tracing.
 991 * Basically making a permanent snapshot.
 992 */
 993void tracing_snapshot(void)
 994{
 995        struct trace_array *tr = &global_trace;
 996
 997        tracing_snapshot_instance(tr);
 998}
 999EXPORT_SYMBOL_GPL(tracing_snapshot);
1000
1001/**
1002 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1003 * @tr:         The tracing instance to snapshot
1004 * @cond_data:  The data to be tested conditionally, and possibly saved
1005 *
1006 * This is the same as tracing_snapshot() except that the snapshot is
1007 * conditional - the snapshot will only happen if the
1008 * cond_snapshot.update() implementation receiving the cond_data
1009 * returns true, which means that the trace array's cond_snapshot
1010 * update() operation used the cond_data to determine whether the
1011 * snapshot should be taken, and if it was, presumably saved it along
1012 * with the snapshot.
1013 */
1014void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1015{
1016        tracing_snapshot_instance_cond(tr, cond_data);
1017}
1018EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1019
1020/**
1021 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1022 * @tr:         The tracing instance
1023 *
1024 * When the user enables a conditional snapshot using
1025 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1026 * with the snapshot.  This accessor is used to retrieve it.
1027 *
1028 * Should not be called from cond_snapshot.update(), since it takes
1029 * the tr->max_lock lock, which the code calling
1030 * cond_snapshot.update() has already done.
1031 *
1032 * Returns the cond_data associated with the trace array's snapshot.
1033 */
1034void *tracing_cond_snapshot_data(struct trace_array *tr)
1035{
1036        void *cond_data = NULL;
1037
1038        arch_spin_lock(&tr->max_lock);
1039
1040        if (tr->cond_snapshot)
1041                cond_data = tr->cond_snapshot->cond_data;
1042
1043        arch_spin_unlock(&tr->max_lock);
1044
1045        return cond_data;
1046}
1047EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1048
1049static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1050                                        struct array_buffer *size_buf, int cpu_id);
1051static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1052
1053int tracing_alloc_snapshot_instance(struct trace_array *tr)
1054{
1055        int ret;
1056
1057        if (!tr->allocated_snapshot) {
1058
1059                /* allocate spare buffer */
1060                ret = resize_buffer_duplicate_size(&tr->max_buffer,
1061                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1062                if (ret < 0)
1063                        return ret;
1064
1065                tr->allocated_snapshot = true;
1066        }
1067
1068        return 0;
1069}
1070
1071static void free_snapshot(struct trace_array *tr)
1072{
1073        /*
1074         * We don't free the ring buffer. instead, resize it because
1075         * The max_tr ring buffer has some state (e.g. ring->clock) and
1076         * we want preserve it.
1077         */
1078        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1079        set_buffer_entries(&tr->max_buffer, 1);
1080        tracing_reset_online_cpus(&tr->max_buffer);
1081        tr->allocated_snapshot = false;
1082}
1083
1084/**
1085 * tracing_alloc_snapshot - allocate snapshot buffer.
1086 *
1087 * This only allocates the snapshot buffer if it isn't already
1088 * allocated - it doesn't also take a snapshot.
1089 *
1090 * This is meant to be used in cases where the snapshot buffer needs
1091 * to be set up for events that can't sleep but need to be able to
1092 * trigger a snapshot.
1093 */
1094int tracing_alloc_snapshot(void)
1095{
1096        struct trace_array *tr = &global_trace;
1097        int ret;
1098
1099        ret = tracing_alloc_snapshot_instance(tr);
1100        WARN_ON(ret < 0);
1101
1102        return ret;
1103}
1104EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1105
1106/**
1107 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1108 *
1109 * This is similar to tracing_snapshot(), but it will allocate the
1110 * snapshot buffer if it isn't already allocated. Use this only
1111 * where it is safe to sleep, as the allocation may sleep.
1112 *
1113 * This causes a swap between the snapshot buffer and the current live
1114 * tracing buffer. You can use this to take snapshots of the live
1115 * trace when some condition is triggered, but continue to trace.
1116 */
1117void tracing_snapshot_alloc(void)
1118{
1119        int ret;
1120
1121        ret = tracing_alloc_snapshot();
1122        if (ret < 0)
1123                return;
1124
1125        tracing_snapshot();
1126}
1127EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1128
1129/**
1130 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1131 * @tr:         The tracing instance
1132 * @cond_data:  User data to associate with the snapshot
1133 * @update:     Implementation of the cond_snapshot update function
1134 *
1135 * Check whether the conditional snapshot for the given instance has
1136 * already been enabled, or if the current tracer is already using a
1137 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1138 * save the cond_data and update function inside.
1139 *
1140 * Returns 0 if successful, error otherwise.
1141 */
1142int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1143                                 cond_update_fn_t update)
1144{
1145        struct cond_snapshot *cond_snapshot;
1146        int ret = 0;
1147
1148        cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1149        if (!cond_snapshot)
1150                return -ENOMEM;
1151
1152        cond_snapshot->cond_data = cond_data;
1153        cond_snapshot->update = update;
1154
1155        mutex_lock(&trace_types_lock);
1156
1157        ret = tracing_alloc_snapshot_instance(tr);
1158        if (ret)
1159                goto fail_unlock;
1160
1161        if (tr->current_trace->use_max_tr) {
1162                ret = -EBUSY;
1163                goto fail_unlock;
1164        }
1165
1166        /*
1167         * The cond_snapshot can only change to NULL without the
1168         * trace_types_lock. We don't care if we race with it going
1169         * to NULL, but we want to make sure that it's not set to
1170         * something other than NULL when we get here, which we can
1171         * do safely with only holding the trace_types_lock and not
1172         * having to take the max_lock.
1173         */
1174        if (tr->cond_snapshot) {
1175                ret = -EBUSY;
1176                goto fail_unlock;
1177        }
1178
1179        arch_spin_lock(&tr->max_lock);
1180        tr->cond_snapshot = cond_snapshot;
1181        arch_spin_unlock(&tr->max_lock);
1182
1183        mutex_unlock(&trace_types_lock);
1184
1185        return ret;
1186
1187 fail_unlock:
1188        mutex_unlock(&trace_types_lock);
1189        kfree(cond_snapshot);
1190        return ret;
1191}
1192EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1193
1194/**
1195 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1196 * @tr:         The tracing instance
1197 *
1198 * Check whether the conditional snapshot for the given instance is
1199 * enabled; if so, free the cond_snapshot associated with it,
1200 * otherwise return -EINVAL.
1201 *
1202 * Returns 0 if successful, error otherwise.
1203 */
1204int tracing_snapshot_cond_disable(struct trace_array *tr)
1205{
1206        int ret = 0;
1207
1208        arch_spin_lock(&tr->max_lock);
1209
1210        if (!tr->cond_snapshot)
1211                ret = -EINVAL;
1212        else {
1213                kfree(tr->cond_snapshot);
1214                tr->cond_snapshot = NULL;
1215        }
1216
1217        arch_spin_unlock(&tr->max_lock);
1218
1219        return ret;
1220}
1221EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1222#else
1223void tracing_snapshot(void)
1224{
1225        WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1226}
1227EXPORT_SYMBOL_GPL(tracing_snapshot);
1228void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1229{
1230        WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1231}
1232EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1233int tracing_alloc_snapshot(void)
1234{
1235        WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1236        return -ENODEV;
1237}
1238EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1239void tracing_snapshot_alloc(void)
1240{
1241        /* Give warning */
1242        tracing_snapshot();
1243}
1244EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1245void *tracing_cond_snapshot_data(struct trace_array *tr)
1246{
1247        return NULL;
1248}
1249EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1250int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1251{
1252        return -ENODEV;
1253}
1254EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1255int tracing_snapshot_cond_disable(struct trace_array *tr)
1256{
1257        return false;
1258}
1259EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1260#endif /* CONFIG_TRACER_SNAPSHOT */
1261
1262void tracer_tracing_off(struct trace_array *tr)
1263{
1264        if (tr->array_buffer.buffer)
1265                ring_buffer_record_off(tr->array_buffer.buffer);
1266        /*
1267         * This flag is looked at when buffers haven't been allocated
1268         * yet, or by some tracers (like irqsoff), that just want to
1269         * know if the ring buffer has been disabled, but it can handle
1270         * races of where it gets disabled but we still do a record.
1271         * As the check is in the fast path of the tracers, it is more
1272         * important to be fast than accurate.
1273         */
1274        tr->buffer_disabled = 1;
1275        /* Make the flag seen by readers */
1276        smp_wmb();
1277}
1278
1279/**
1280 * tracing_off - turn off tracing buffers
1281 *
1282 * This function stops the tracing buffers from recording data.
1283 * It does not disable any overhead the tracers themselves may
1284 * be causing. This function simply causes all recording to
1285 * the ring buffers to fail.
1286 */
1287void tracing_off(void)
1288{
1289        tracer_tracing_off(&global_trace);
1290}
1291EXPORT_SYMBOL_GPL(tracing_off);
1292
1293void disable_trace_on_warning(void)
1294{
1295        if (__disable_trace_on_warning)
1296                tracing_off();
1297}
1298
1299/**
1300 * tracer_tracing_is_on - show real state of ring buffer enabled
1301 * @tr : the trace array to know if ring buffer is enabled
1302 *
1303 * Shows real state of the ring buffer if it is enabled or not.
1304 */
1305bool tracer_tracing_is_on(struct trace_array *tr)
1306{
1307        if (tr->array_buffer.buffer)
1308                return ring_buffer_record_is_on(tr->array_buffer.buffer);
1309        return !tr->buffer_disabled;
1310}
1311
1312/**
1313 * tracing_is_on - show state of ring buffers enabled
1314 */
1315int tracing_is_on(void)
1316{
1317        return tracer_tracing_is_on(&global_trace);
1318}
1319EXPORT_SYMBOL_GPL(tracing_is_on);
1320
1321static int __init set_buf_size(char *str)
1322{
1323        unsigned long buf_size;
1324
1325        if (!str)
1326                return 0;
1327        buf_size = memparse(str, &str);
1328        /* nr_entries can not be zero */
1329        if (buf_size == 0)
1330                return 0;
1331        trace_buf_size = buf_size;
1332        return 1;
1333}
1334__setup("trace_buf_size=", set_buf_size);
1335
1336static int __init set_tracing_thresh(char *str)
1337{
1338        unsigned long threshold;
1339        int ret;
1340
1341        if (!str)
1342                return 0;
1343        ret = kstrtoul(str, 0, &threshold);
1344        if (ret < 0)
1345                return 0;
1346        tracing_thresh = threshold * 1000;
1347        return 1;
1348}
1349__setup("tracing_thresh=", set_tracing_thresh);
1350
1351unsigned long nsecs_to_usecs(unsigned long nsecs)
1352{
1353        return nsecs / 1000;
1354}
1355
1356/*
1357 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1358 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1359 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1360 * of strings in the order that the evals (enum) were defined.
1361 */
1362#undef C
1363#define C(a, b) b
1364
1365/* These must match the bit postions in trace_iterator_flags */
1366static const char *trace_options[] = {
1367        TRACE_FLAGS
1368        NULL
1369};
1370
1371static struct {
1372        u64 (*func)(void);
1373        const char *name;
1374        int in_ns;              /* is this clock in nanoseconds? */
1375} trace_clocks[] = {
1376        { trace_clock_local,            "local",        1 },
1377        { trace_clock_global,           "global",       1 },
1378        { trace_clock_counter,          "counter",      0 },
1379        { trace_clock_jiffies,          "uptime",       0 },
1380        { trace_clock,                  "perf",         1 },
1381        { ktime_get_mono_fast_ns,       "mono",         1 },
1382        { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1383        { ktime_get_boot_fast_ns,       "boot",         1 },
1384        ARCH_TRACE_CLOCKS
1385};
1386
1387bool trace_clock_in_ns(struct trace_array *tr)
1388{
1389        if (trace_clocks[tr->clock_id].in_ns)
1390                return true;
1391
1392        return false;
1393}
1394
1395/*
1396 * trace_parser_get_init - gets the buffer for trace parser
1397 */
1398int trace_parser_get_init(struct trace_parser *parser, int size)
1399{
1400        memset(parser, 0, sizeof(*parser));
1401
1402        parser->buffer = kmalloc(size, GFP_KERNEL);
1403        if (!parser->buffer)
1404                return 1;
1405
1406        parser->size = size;
1407        return 0;
1408}
1409
1410/*
1411 * trace_parser_put - frees the buffer for trace parser
1412 */
1413void trace_parser_put(struct trace_parser *parser)
1414{
1415        kfree(parser->buffer);
1416        parser->buffer = NULL;
1417}
1418
1419/*
1420 * trace_get_user - reads the user input string separated by  space
1421 * (matched by isspace(ch))
1422 *
1423 * For each string found the 'struct trace_parser' is updated,
1424 * and the function returns.
1425 *
1426 * Returns number of bytes read.
1427 *
1428 * See kernel/trace/trace.h for 'struct trace_parser' details.
1429 */
1430int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1431        size_t cnt, loff_t *ppos)
1432{
1433        char ch;
1434        size_t read = 0;
1435        ssize_t ret;
1436
1437        if (!*ppos)
1438                trace_parser_clear(parser);
1439
1440        ret = get_user(ch, ubuf++);
1441        if (ret)
1442                goto out;
1443
1444        read++;
1445        cnt--;
1446
1447        /*
1448         * The parser is not finished with the last write,
1449         * continue reading the user input without skipping spaces.
1450         */
1451        if (!parser->cont) {
1452                /* skip white space */
1453                while (cnt && isspace(ch)) {
1454                        ret = get_user(ch, ubuf++);
1455                        if (ret)
1456                                goto out;
1457                        read++;
1458                        cnt--;
1459                }
1460
1461                parser->idx = 0;
1462
1463                /* only spaces were written */
1464                if (isspace(ch) || !ch) {
1465                        *ppos += read;
1466                        ret = read;
1467                        goto out;
1468                }
1469        }
1470
1471        /* read the non-space input */
1472        while (cnt && !isspace(ch) && ch) {
1473                if (parser->idx < parser->size - 1)
1474                        parser->buffer[parser->idx++] = ch;
1475                else {
1476                        ret = -EINVAL;
1477                        goto out;
1478                }
1479                ret = get_user(ch, ubuf++);
1480                if (ret)
1481                        goto out;
1482                read++;
1483                cnt--;
1484        }
1485
1486        /* We either got finished input or we have to wait for another call. */
1487        if (isspace(ch) || !ch) {
1488                parser->buffer[parser->idx] = 0;
1489                parser->cont = false;
1490        } else if (parser->idx < parser->size - 1) {
1491                parser->cont = true;
1492                parser->buffer[parser->idx++] = ch;
1493                /* Make sure the parsed string always terminates with '\0'. */
1494                parser->buffer[parser->idx] = 0;
1495        } else {
1496                ret = -EINVAL;
1497                goto out;
1498        }
1499
1500        *ppos += read;
1501        ret = read;
1502
1503out:
1504        return ret;
1505}
1506
1507/* TODO add a seq_buf_to_buffer() */
1508static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1509{
1510        int len;
1511
1512        if (trace_seq_used(s) <= s->seq.readpos)
1513                return -EBUSY;
1514
1515        len = trace_seq_used(s) - s->seq.readpos;
1516        if (cnt > len)
1517                cnt = len;
1518        memcpy(buf, s->buffer + s->seq.readpos, cnt);
1519
1520        s->seq.readpos += cnt;
1521        return cnt;
1522}
1523
1524unsigned long __read_mostly     tracing_thresh;
1525static const struct file_operations tracing_max_lat_fops;
1526
1527#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1528        defined(CONFIG_FSNOTIFY)
1529
1530static struct workqueue_struct *fsnotify_wq;
1531
1532static void latency_fsnotify_workfn(struct work_struct *work)
1533{
1534        struct trace_array *tr = container_of(work, struct trace_array,
1535                                              fsnotify_work);
1536        fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
1537                 tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
1538}
1539
1540static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1541{
1542        struct trace_array *tr = container_of(iwork, struct trace_array,
1543                                              fsnotify_irqwork);
1544        queue_work(fsnotify_wq, &tr->fsnotify_work);
1545}
1546
1547static void trace_create_maxlat_file(struct trace_array *tr,
1548                                     struct dentry *d_tracer)
1549{
1550        INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1551        init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1552        tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1553                                              d_tracer, &tr->max_latency,
1554                                              &tracing_max_lat_fops);
1555}
1556
1557__init static int latency_fsnotify_init(void)
1558{
1559        fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1560                                      WQ_UNBOUND | WQ_HIGHPRI, 0);
1561        if (!fsnotify_wq) {
1562                pr_err("Unable to allocate tr_max_lat_wq\n");
1563                return -ENOMEM;
1564        }
1565        return 0;
1566}
1567
1568late_initcall_sync(latency_fsnotify_init);
1569
1570void latency_fsnotify(struct trace_array *tr)
1571{
1572        if (!fsnotify_wq)
1573                return;
1574        /*
1575         * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1576         * possible that we are called from __schedule() or do_idle(), which
1577         * could cause a deadlock.
1578         */
1579        irq_work_queue(&tr->fsnotify_irqwork);
1580}
1581
1582/*
1583 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1584 *  defined(CONFIG_FSNOTIFY)
1585 */
1586#else
1587
1588#define trace_create_maxlat_file(tr, d_tracer)                          \
1589        trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1590                          &tr->max_latency, &tracing_max_lat_fops)
1591
1592#endif
1593
1594#ifdef CONFIG_TRACER_MAX_TRACE
1595/*
1596 * Copy the new maximum trace into the separate maximum-trace
1597 * structure. (this way the maximum trace is permanently saved,
1598 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1599 */
1600static void
1601__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1602{
1603        struct array_buffer *trace_buf = &tr->array_buffer;
1604        struct array_buffer *max_buf = &tr->max_buffer;
1605        struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1606        struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1607
1608        max_buf->cpu = cpu;
1609        max_buf->time_start = data->preempt_timestamp;
1610
1611        max_data->saved_latency = tr->max_latency;
1612        max_data->critical_start = data->critical_start;
1613        max_data->critical_end = data->critical_end;
1614
1615        strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1616        max_data->pid = tsk->pid;
1617        /*
1618         * If tsk == current, then use current_uid(), as that does not use
1619         * RCU. The irq tracer can be called out of RCU scope.
1620         */
1621        if (tsk == current)
1622                max_data->uid = current_uid();
1623        else
1624                max_data->uid = task_uid(tsk);
1625
1626        max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1627        max_data->policy = tsk->policy;
1628        max_data->rt_priority = tsk->rt_priority;
1629
1630        /* record this tasks comm */
1631        tracing_record_cmdline(tsk);
1632        latency_fsnotify(tr);
1633}
1634
1635/**
1636 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1637 * @tr: tracer
1638 * @tsk: the task with the latency
1639 * @cpu: The cpu that initiated the trace.
1640 * @cond_data: User data associated with a conditional snapshot
1641 *
1642 * Flip the buffers between the @tr and the max_tr and record information
1643 * about which task was the cause of this latency.
1644 */
1645void
1646update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1647              void *cond_data)
1648{
1649        if (tr->stop_count)
1650                return;
1651
1652        WARN_ON_ONCE(!irqs_disabled());
1653
1654        if (!tr->allocated_snapshot) {
1655                /* Only the nop tracer should hit this when disabling */
1656                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1657                return;
1658        }
1659
1660        arch_spin_lock(&tr->max_lock);
1661
1662        /* Inherit the recordable setting from array_buffer */
1663        if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1664                ring_buffer_record_on(tr->max_buffer.buffer);
1665        else
1666                ring_buffer_record_off(tr->max_buffer.buffer);
1667
1668#ifdef CONFIG_TRACER_SNAPSHOT
1669        if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1670                goto out_unlock;
1671#endif
1672        swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1673
1674        __update_max_tr(tr, tsk, cpu);
1675
1676 out_unlock:
1677        arch_spin_unlock(&tr->max_lock);
1678}
1679
1680/**
1681 * update_max_tr_single - only copy one trace over, and reset the rest
1682 * @tr: tracer
1683 * @tsk: task with the latency
1684 * @cpu: the cpu of the buffer to copy.
1685 *
1686 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1687 */
1688void
1689update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1690{
1691        int ret;
1692
1693        if (tr->stop_count)
1694                return;
1695
1696        WARN_ON_ONCE(!irqs_disabled());
1697        if (!tr->allocated_snapshot) {
1698                /* Only the nop tracer should hit this when disabling */
1699                WARN_ON_ONCE(tr->current_trace != &nop_trace);
1700                return;
1701        }
1702
1703        arch_spin_lock(&tr->max_lock);
1704
1705        ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1706
1707        if (ret == -EBUSY) {
1708                /*
1709                 * We failed to swap the buffer due to a commit taking
1710                 * place on this CPU. We fail to record, but we reset
1711                 * the max trace buffer (no one writes directly to it)
1712                 * and flag that it failed.
1713                 */
1714                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1715                        "Failed to swap buffers due to commit in progress\n");
1716        }
1717
1718        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1719
1720        __update_max_tr(tr, tsk, cpu);
1721        arch_spin_unlock(&tr->max_lock);
1722}
1723#endif /* CONFIG_TRACER_MAX_TRACE */
1724
1725static int wait_on_pipe(struct trace_iterator *iter, int full)
1726{
1727        /* Iterators are static, they should be filled or empty */
1728        if (trace_buffer_iter(iter, iter->cpu_file))
1729                return 0;
1730
1731        return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1732                                full);
1733}
1734
1735#ifdef CONFIG_FTRACE_STARTUP_TEST
1736static bool selftests_can_run;
1737
1738struct trace_selftests {
1739        struct list_head                list;
1740        struct tracer                   *type;
1741};
1742
1743static LIST_HEAD(postponed_selftests);
1744
1745static int save_selftest(struct tracer *type)
1746{
1747        struct trace_selftests *selftest;
1748
1749        selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1750        if (!selftest)
1751                return -ENOMEM;
1752
1753        selftest->type = type;
1754        list_add(&selftest->list, &postponed_selftests);
1755        return 0;
1756}
1757
1758static int run_tracer_selftest(struct tracer *type)
1759{
1760        struct trace_array *tr = &global_trace;
1761        struct tracer *saved_tracer = tr->current_trace;
1762        int ret;
1763
1764        if (!type->selftest || tracing_selftest_disabled)
1765                return 0;
1766
1767        /*
1768         * If a tracer registers early in boot up (before scheduling is
1769         * initialized and such), then do not run its selftests yet.
1770         * Instead, run it a little later in the boot process.
1771         */
1772        if (!selftests_can_run)
1773                return save_selftest(type);
1774
1775        /*
1776         * Run a selftest on this tracer.
1777         * Here we reset the trace buffer, and set the current
1778         * tracer to be this tracer. The tracer can then run some
1779         * internal tracing to verify that everything is in order.
1780         * If we fail, we do not register this tracer.
1781         */
1782        tracing_reset_online_cpus(&tr->array_buffer);
1783
1784        tr->current_trace = type;
1785
1786#ifdef CONFIG_TRACER_MAX_TRACE
1787        if (type->use_max_tr) {
1788                /* If we expanded the buffers, make sure the max is expanded too */
1789                if (ring_buffer_expanded)
1790                        ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1791                                           RING_BUFFER_ALL_CPUS);
1792                tr->allocated_snapshot = true;
1793        }
1794#endif
1795
1796        /* the test is responsible for initializing and enabling */
1797        pr_info("Testing tracer %s: ", type->name);
1798        ret = type->selftest(type, tr);
1799        /* the test is responsible for resetting too */
1800        tr->current_trace = saved_tracer;
1801        if (ret) {
1802                printk(KERN_CONT "FAILED!\n");
1803                /* Add the warning after printing 'FAILED' */
1804                WARN_ON(1);
1805                return -1;
1806        }
1807        /* Only reset on passing, to avoid touching corrupted buffers */
1808        tracing_reset_online_cpus(&tr->array_buffer);
1809
1810#ifdef CONFIG_TRACER_MAX_TRACE
1811        if (type->use_max_tr) {
1812                tr->allocated_snapshot = false;
1813
1814                /* Shrink the max buffer again */
1815                if (ring_buffer_expanded)
1816                        ring_buffer_resize(tr->max_buffer.buffer, 1,
1817                                           RING_BUFFER_ALL_CPUS);
1818        }
1819#endif
1820
1821        printk(KERN_CONT "PASSED\n");
1822        return 0;
1823}
1824
1825static __init int init_trace_selftests(void)
1826{
1827        struct trace_selftests *p, *n;
1828        struct tracer *t, **last;
1829        int ret;
1830
1831        selftests_can_run = true;
1832
1833        mutex_lock(&trace_types_lock);
1834
1835        if (list_empty(&postponed_selftests))
1836                goto out;
1837
1838        pr_info("Running postponed tracer tests:\n");
1839
1840        tracing_selftest_running = true;
1841        list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1842                /* This loop can take minutes when sanitizers are enabled, so
1843                 * lets make sure we allow RCU processing.
1844                 */
1845                cond_resched();
1846                ret = run_tracer_selftest(p->type);
1847                /* If the test fails, then warn and remove from available_tracers */
1848                if (ret < 0) {
1849                        WARN(1, "tracer: %s failed selftest, disabling\n",
1850                             p->type->name);
1851                        last = &trace_types;
1852                        for (t = trace_types; t; t = t->next) {
1853                                if (t == p->type) {
1854                                        *last = t->next;
1855                                        break;
1856                                }
1857                                last = &t->next;
1858                        }
1859                }
1860                list_del(&p->list);
1861                kfree(p);
1862        }
1863        tracing_selftest_running = false;
1864
1865 out:
1866        mutex_unlock(&trace_types_lock);
1867
1868        return 0;
1869}
1870core_initcall(init_trace_selftests);
1871#else
1872static inline int run_tracer_selftest(struct tracer *type)
1873{
1874        return 0;
1875}
1876#endif /* CONFIG_FTRACE_STARTUP_TEST */
1877
1878static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1879
1880static void __init apply_trace_boot_options(void);
1881
1882/**
1883 * register_tracer - register a tracer with the ftrace system.
1884 * @type: the plugin for the tracer
1885 *
1886 * Register a new plugin tracer.
1887 */
1888int __init register_tracer(struct tracer *type)
1889{
1890        struct tracer *t;
1891        int ret = 0;
1892
1893        if (!type->name) {
1894                pr_info("Tracer must have a name\n");
1895                return -1;
1896        }
1897
1898        if (strlen(type->name) >= MAX_TRACER_SIZE) {
1899                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1900                return -1;
1901        }
1902
1903        if (security_locked_down(LOCKDOWN_TRACEFS)) {
1904                pr_warn("Can not register tracer %s due to lockdown\n",
1905                           type->name);
1906                return -EPERM;
1907        }
1908
1909        mutex_lock(&trace_types_lock);
1910
1911        tracing_selftest_running = true;
1912
1913        for (t = trace_types; t; t = t->next) {
1914                if (strcmp(type->name, t->name) == 0) {
1915                        /* already found */
1916                        pr_info("Tracer %s already registered\n",
1917                                type->name);
1918                        ret = -1;
1919                        goto out;
1920                }
1921        }
1922
1923        if (!type->set_flag)
1924                type->set_flag = &dummy_set_flag;
1925        if (!type->flags) {
1926                /*allocate a dummy tracer_flags*/
1927                type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1928                if (!type->flags) {
1929                        ret = -ENOMEM;
1930                        goto out;
1931                }
1932                type->flags->val = 0;
1933                type->flags->opts = dummy_tracer_opt;
1934        } else
1935                if (!type->flags->opts)
1936                        type->flags->opts = dummy_tracer_opt;
1937
1938        /* store the tracer for __set_tracer_option */
1939        type->flags->trace = type;
1940
1941        ret = run_tracer_selftest(type);
1942        if (ret < 0)
1943                goto out;
1944
1945        type->next = trace_types;
1946        trace_types = type;
1947        add_tracer_options(&global_trace, type);
1948
1949 out:
1950        tracing_selftest_running = false;
1951        mutex_unlock(&trace_types_lock);
1952
1953        if (ret || !default_bootup_tracer)
1954                goto out_unlock;
1955
1956        if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1957                goto out_unlock;
1958
1959        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1960        /* Do we want this tracer to start on bootup? */
1961        tracing_set_tracer(&global_trace, type->name);
1962        default_bootup_tracer = NULL;
1963
1964        apply_trace_boot_options();
1965
1966        /* disable other selftests, since this will break it. */
1967        tracing_selftest_disabled = true;
1968#ifdef CONFIG_FTRACE_STARTUP_TEST
1969        printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1970               type->name);
1971#endif
1972
1973 out_unlock:
1974        return ret;
1975}
1976
1977static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1978{
1979        struct trace_buffer *buffer = buf->buffer;
1980
1981        if (!buffer)
1982                return;
1983
1984        ring_buffer_record_disable(buffer);
1985
1986        /* Make sure all commits have finished */
1987        synchronize_rcu();
1988        ring_buffer_reset_cpu(buffer, cpu);
1989
1990        ring_buffer_record_enable(buffer);
1991}
1992
1993void tracing_reset_online_cpus(struct array_buffer *buf)
1994{
1995        struct trace_buffer *buffer = buf->buffer;
1996        int cpu;
1997
1998        if (!buffer)
1999                return;
2000
2001        ring_buffer_record_disable(buffer);
2002
2003        /* Make sure all commits have finished */
2004        synchronize_rcu();
2005
2006        buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2007
2008        for_each_online_cpu(cpu)
2009                ring_buffer_reset_cpu(buffer, cpu);
2010
2011        ring_buffer_record_enable(buffer);
2012}
2013
2014/* Must have trace_types_lock held */
2015void tracing_reset_all_online_cpus(void)
2016{
2017        struct trace_array *tr;
2018
2019        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2020                if (!tr->clear_trace)
2021                        continue;
2022                tr->clear_trace = false;
2023                tracing_reset_online_cpus(&tr->array_buffer);
2024#ifdef CONFIG_TRACER_MAX_TRACE
2025                tracing_reset_online_cpus(&tr->max_buffer);
2026#endif
2027        }
2028}
2029
2030static int *tgid_map;
2031
2032#define SAVED_CMDLINES_DEFAULT 128
2033#define NO_CMDLINE_MAP UINT_MAX
2034static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2035struct saved_cmdlines_buffer {
2036        unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2037        unsigned *map_cmdline_to_pid;
2038        unsigned cmdline_num;
2039        int cmdline_idx;
2040        char *saved_cmdlines;
2041};
2042static struct saved_cmdlines_buffer *savedcmd;
2043
2044/* temporary disable recording */
2045static atomic_t trace_record_taskinfo_disabled __read_mostly;
2046
2047static inline char *get_saved_cmdlines(int idx)
2048{
2049        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2050}
2051
2052static inline void set_cmdline(int idx, const char *cmdline)
2053{
2054        strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2055}
2056
2057static int allocate_cmdlines_buffer(unsigned int val,
2058                                    struct saved_cmdlines_buffer *s)
2059{
2060        s->map_cmdline_to_pid = kmalloc_array(val,
2061                                              sizeof(*s->map_cmdline_to_pid),
2062                                              GFP_KERNEL);
2063        if (!s->map_cmdline_to_pid)
2064                return -ENOMEM;
2065
2066        s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2067        if (!s->saved_cmdlines) {
2068                kfree(s->map_cmdline_to_pid);
2069                return -ENOMEM;
2070        }
2071
2072        s->cmdline_idx = 0;
2073        s->cmdline_num = val;
2074        memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2075               sizeof(s->map_pid_to_cmdline));
2076        memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2077               val * sizeof(*s->map_cmdline_to_pid));
2078
2079        return 0;
2080}
2081
2082static int trace_create_savedcmd(void)
2083{
2084        int ret;
2085
2086        savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2087        if (!savedcmd)
2088                return -ENOMEM;
2089
2090        ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2091        if (ret < 0) {
2092                kfree(savedcmd);
2093                savedcmd = NULL;
2094                return -ENOMEM;
2095        }
2096
2097        return 0;
2098}
2099
2100int is_tracing_stopped(void)
2101{
2102        return global_trace.stop_count;
2103}
2104
2105/**
2106 * tracing_start - quick start of the tracer
2107 *
2108 * If tracing is enabled but was stopped by tracing_stop,
2109 * this will start the tracer back up.
2110 */
2111void tracing_start(void)
2112{
2113        struct trace_buffer *buffer;
2114        unsigned long flags;
2115
2116        if (tracing_disabled)
2117                return;
2118
2119        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2120        if (--global_trace.stop_count) {
2121                if (global_trace.stop_count < 0) {
2122                        /* Someone screwed up their debugging */
2123                        WARN_ON_ONCE(1);
2124                        global_trace.stop_count = 0;
2125                }
2126                goto out;
2127        }
2128
2129        /* Prevent the buffers from switching */
2130        arch_spin_lock(&global_trace.max_lock);
2131
2132        buffer = global_trace.array_buffer.buffer;
2133        if (buffer)
2134                ring_buffer_record_enable(buffer);
2135
2136#ifdef CONFIG_TRACER_MAX_TRACE
2137        buffer = global_trace.max_buffer.buffer;
2138        if (buffer)
2139                ring_buffer_record_enable(buffer);
2140#endif
2141
2142        arch_spin_unlock(&global_trace.max_lock);
2143
2144 out:
2145        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2146}
2147
2148static void tracing_start_tr(struct trace_array *tr)
2149{
2150        struct trace_buffer *buffer;
2151        unsigned long flags;
2152
2153        if (tracing_disabled)
2154                return;
2155
2156        /* If global, we need to also start the max tracer */
2157        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2158                return tracing_start();
2159
2160        raw_spin_lock_irqsave(&tr->start_lock, flags);
2161
2162        if (--tr->stop_count) {
2163                if (tr->stop_count < 0) {
2164                        /* Someone screwed up their debugging */
2165                        WARN_ON_ONCE(1);
2166                        tr->stop_count = 0;
2167                }
2168                goto out;
2169        }
2170
2171        buffer = tr->array_buffer.buffer;
2172        if (buffer)
2173                ring_buffer_record_enable(buffer);
2174
2175 out:
2176        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2177}
2178
2179/**
2180 * tracing_stop - quick stop of the tracer
2181 *
2182 * Light weight way to stop tracing. Use in conjunction with
2183 * tracing_start.
2184 */
2185void tracing_stop(void)
2186{
2187        struct trace_buffer *buffer;
2188        unsigned long flags;
2189
2190        raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2191        if (global_trace.stop_count++)
2192                goto out;
2193
2194        /* Prevent the buffers from switching */
2195        arch_spin_lock(&global_trace.max_lock);
2196
2197        buffer = global_trace.array_buffer.buffer;
2198        if (buffer)
2199                ring_buffer_record_disable(buffer);
2200
2201#ifdef CONFIG_TRACER_MAX_TRACE
2202        buffer = global_trace.max_buffer.buffer;
2203        if (buffer)
2204                ring_buffer_record_disable(buffer);
2205#endif
2206
2207        arch_spin_unlock(&global_trace.max_lock);
2208
2209 out:
2210        raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2211}
2212
2213static void tracing_stop_tr(struct trace_array *tr)
2214{
2215        struct trace_buffer *buffer;
2216        unsigned long flags;
2217
2218        /* If global, we need to also stop the max tracer */
2219        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2220                return tracing_stop();
2221
2222        raw_spin_lock_irqsave(&tr->start_lock, flags);
2223        if (tr->stop_count++)
2224                goto out;
2225
2226        buffer = tr->array_buffer.buffer;
2227        if (buffer)
2228                ring_buffer_record_disable(buffer);
2229
2230 out:
2231        raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2232}
2233
2234static int trace_save_cmdline(struct task_struct *tsk)
2235{
2236        unsigned pid, idx;
2237
2238        /* treat recording of idle task as a success */
2239        if (!tsk->pid)
2240                return 1;
2241
2242        if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2243                return 0;
2244
2245        /*
2246         * It's not the end of the world if we don't get
2247         * the lock, but we also don't want to spin
2248         * nor do we want to disable interrupts,
2249         * so if we miss here, then better luck next time.
2250         */
2251        if (!arch_spin_trylock(&trace_cmdline_lock))
2252                return 0;
2253
2254        idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2255        if (idx == NO_CMDLINE_MAP) {
2256                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2257
2258                /*
2259                 * Check whether the cmdline buffer at idx has a pid
2260                 * mapped. We are going to overwrite that entry so we
2261                 * need to clear the map_pid_to_cmdline. Otherwise we
2262                 * would read the new comm for the old pid.
2263                 */
2264                pid = savedcmd->map_cmdline_to_pid[idx];
2265                if (pid != NO_CMDLINE_MAP)
2266                        savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2267
2268                savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2269                savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2270
2271                savedcmd->cmdline_idx = idx;
2272        }
2273
2274        set_cmdline(idx, tsk->comm);
2275
2276        arch_spin_unlock(&trace_cmdline_lock);
2277
2278        return 1;
2279}
2280
2281static void __trace_find_cmdline(int pid, char comm[])
2282{
2283        unsigned map;
2284
2285        if (!pid) {
2286                strcpy(comm, "<idle>");
2287                return;
2288        }
2289
2290        if (WARN_ON_ONCE(pid < 0)) {
2291                strcpy(comm, "<XXX>");
2292                return;
2293        }
2294
2295        if (pid > PID_MAX_DEFAULT) {
2296                strcpy(comm, "<...>");
2297                return;
2298        }
2299
2300        map = savedcmd->map_pid_to_cmdline[pid];
2301        if (map != NO_CMDLINE_MAP)
2302                strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2303        else
2304                strcpy(comm, "<...>");
2305}
2306
2307void trace_find_cmdline(int pid, char comm[])
2308{
2309        preempt_disable();
2310        arch_spin_lock(&trace_cmdline_lock);
2311
2312        __trace_find_cmdline(pid, comm);
2313
2314        arch_spin_unlock(&trace_cmdline_lock);
2315        preempt_enable();
2316}
2317
2318int trace_find_tgid(int pid)
2319{
2320        if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2321                return 0;
2322
2323        return tgid_map[pid];
2324}
2325
2326static int trace_save_tgid(struct task_struct *tsk)
2327{
2328        /* treat recording of idle task as a success */
2329        if (!tsk->pid)
2330                return 1;
2331
2332        if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2333                return 0;
2334
2335        tgid_map[tsk->pid] = tsk->tgid;
2336        return 1;
2337}
2338
2339static bool tracing_record_taskinfo_skip(int flags)
2340{
2341        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2342                return true;
2343        if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2344                return true;
2345        if (!__this_cpu_read(trace_taskinfo_save))
2346                return true;
2347        return false;
2348}
2349
2350/**
2351 * tracing_record_taskinfo - record the task info of a task
2352 *
2353 * @task:  task to record
2354 * @flags: TRACE_RECORD_CMDLINE for recording comm
2355 *         TRACE_RECORD_TGID for recording tgid
2356 */
2357void tracing_record_taskinfo(struct task_struct *task, int flags)
2358{
2359        bool done;
2360
2361        if (tracing_record_taskinfo_skip(flags))
2362                return;
2363
2364        /*
2365         * Record as much task information as possible. If some fail, continue
2366         * to try to record the others.
2367         */
2368        done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2369        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2370
2371        /* If recording any information failed, retry again soon. */
2372        if (!done)
2373                return;
2374
2375        __this_cpu_write(trace_taskinfo_save, false);
2376}
2377
2378/**
2379 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2380 *
2381 * @prev: previous task during sched_switch
2382 * @next: next task during sched_switch
2383 * @flags: TRACE_RECORD_CMDLINE for recording comm
2384 *         TRACE_RECORD_TGID for recording tgid
2385 */
2386void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2387                                          struct task_struct *next, int flags)
2388{
2389        bool done;
2390
2391        if (tracing_record_taskinfo_skip(flags))
2392                return;
2393
2394        /*
2395         * Record as much task information as possible. If some fail, continue
2396         * to try to record the others.
2397         */
2398        done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2399        done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2400        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2401        done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2402
2403        /* If recording any information failed, retry again soon. */
2404        if (!done)
2405                return;
2406
2407        __this_cpu_write(trace_taskinfo_save, false);
2408}
2409
2410/* Helpers to record a specific task information */
2411void tracing_record_cmdline(struct task_struct *task)
2412{
2413        tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2414}
2415
2416void tracing_record_tgid(struct task_struct *task)
2417{
2418        tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2419}
2420
2421/*
2422 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2423 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2424 * simplifies those functions and keeps them in sync.
2425 */
2426enum print_line_t trace_handle_return(struct trace_seq *s)
2427{
2428        return trace_seq_has_overflowed(s) ?
2429                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2430}
2431EXPORT_SYMBOL_GPL(trace_handle_return);
2432
2433void
2434tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2435                             unsigned long flags, int pc)
2436{
2437        struct task_struct *tsk = current;
2438
2439        entry->preempt_count            = pc & 0xff;
2440        entry->pid                      = (tsk) ? tsk->pid : 0;
2441        entry->type                     = type;
2442        entry->flags =
2443#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2444                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2445#else
2446                TRACE_FLAG_IRQS_NOSUPPORT |
2447#endif
2448                ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2449                ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2450                ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2451                (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2452                (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2453}
2454EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2455
2456struct ring_buffer_event *
2457trace_buffer_lock_reserve(struct trace_buffer *buffer,
2458                          int type,
2459                          unsigned long len,
2460                          unsigned long flags, int pc)
2461{
2462        return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2463}
2464
2465DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2466DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2467static int trace_buffered_event_ref;
2468
2469/**
2470 * trace_buffered_event_enable - enable buffering events
2471 *
2472 * When events are being filtered, it is quicker to use a temporary
2473 * buffer to write the event data into if there's a likely chance
2474 * that it will not be committed. The discard of the ring buffer
2475 * is not as fast as committing, and is much slower than copying
2476 * a commit.
2477 *
2478 * When an event is to be filtered, allocate per cpu buffers to
2479 * write the event data into, and if the event is filtered and discarded
2480 * it is simply dropped, otherwise, the entire data is to be committed
2481 * in one shot.
2482 */
2483void trace_buffered_event_enable(void)
2484{
2485        struct ring_buffer_event *event;
2486        struct page *page;
2487        int cpu;
2488
2489        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2490
2491        if (trace_buffered_event_ref++)
2492                return;
2493
2494        for_each_tracing_cpu(cpu) {
2495                page = alloc_pages_node(cpu_to_node(cpu),
2496                                        GFP_KERNEL | __GFP_NORETRY, 0);
2497                if (!page)
2498                        goto failed;
2499
2500                event = page_address(page);
2501                memset(event, 0, sizeof(*event));
2502
2503                per_cpu(trace_buffered_event, cpu) = event;
2504
2505                preempt_disable();
2506                if (cpu == smp_processor_id() &&
2507                    this_cpu_read(trace_buffered_event) !=
2508                    per_cpu(trace_buffered_event, cpu))
2509                        WARN_ON_ONCE(1);
2510                preempt_enable();
2511        }
2512
2513        return;
2514 failed:
2515        trace_buffered_event_disable();
2516}
2517
2518static void enable_trace_buffered_event(void *data)
2519{
2520        /* Probably not needed, but do it anyway */
2521        smp_rmb();
2522        this_cpu_dec(trace_buffered_event_cnt);
2523}
2524
2525static void disable_trace_buffered_event(void *data)
2526{
2527        this_cpu_inc(trace_buffered_event_cnt);
2528}
2529
2530/**
2531 * trace_buffered_event_disable - disable buffering events
2532 *
2533 * When a filter is removed, it is faster to not use the buffered
2534 * events, and to commit directly into the ring buffer. Free up
2535 * the temp buffers when there are no more users. This requires
2536 * special synchronization with current events.
2537 */
2538void trace_buffered_event_disable(void)
2539{
2540        int cpu;
2541
2542        WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2543
2544        if (WARN_ON_ONCE(!trace_buffered_event_ref))
2545                return;
2546
2547        if (--trace_buffered_event_ref)
2548                return;
2549
2550        preempt_disable();
2551        /* For each CPU, set the buffer as used. */
2552        smp_call_function_many(tracing_buffer_mask,
2553                               disable_trace_buffered_event, NULL, 1);
2554        preempt_enable();
2555
2556        /* Wait for all current users to finish */
2557        synchronize_rcu();
2558
2559        for_each_tracing_cpu(cpu) {
2560                free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2561                per_cpu(trace_buffered_event, cpu) = NULL;
2562        }
2563        /*
2564         * Make sure trace_buffered_event is NULL before clearing
2565         * trace_buffered_event_cnt.
2566         */
2567        smp_wmb();
2568
2569        preempt_disable();
2570        /* Do the work on each cpu */
2571        smp_call_function_many(tracing_buffer_mask,
2572                               enable_trace_buffered_event, NULL, 1);
2573        preempt_enable();
2574}
2575
2576static struct trace_buffer *temp_buffer;
2577
2578struct ring_buffer_event *
2579trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2580                          struct trace_event_file *trace_file,
2581                          int type, unsigned long len,
2582                          unsigned long flags, int pc)
2583{
2584        struct ring_buffer_event *entry;
2585        int val;
2586
2587        *current_rb = trace_file->tr->array_buffer.buffer;
2588
2589        if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2590             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2591            (entry = this_cpu_read(trace_buffered_event))) {
2592                /* Try to use the per cpu buffer first */
2593                val = this_cpu_inc_return(trace_buffered_event_cnt);
2594                if (val == 1) {
2595                        trace_event_setup(entry, type, flags, pc);
2596                        entry->array[0] = len;
2597                        return entry;
2598                }
2599                this_cpu_dec(trace_buffered_event_cnt);
2600        }
2601
2602        entry = __trace_buffer_lock_reserve(*current_rb,
2603                                            type, len, flags, pc);
2604        /*
2605         * If tracing is off, but we have triggers enabled
2606         * we still need to look at the event data. Use the temp_buffer
2607         * to store the trace event for the tigger to use. It's recusive
2608         * safe and will not be recorded anywhere.
2609         */
2610        if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2611                *current_rb = temp_buffer;
2612                entry = __trace_buffer_lock_reserve(*current_rb,
2613                                                    type, len, flags, pc);
2614        }
2615        return entry;
2616}
2617EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2618
2619static DEFINE_SPINLOCK(tracepoint_iter_lock);
2620static DEFINE_MUTEX(tracepoint_printk_mutex);
2621
2622static void output_printk(struct trace_event_buffer *fbuffer)
2623{
2624        struct trace_event_call *event_call;
2625        struct trace_event_file *file;
2626        struct trace_event *event;
2627        unsigned long flags;
2628        struct trace_iterator *iter = tracepoint_print_iter;
2629
2630        /* We should never get here if iter is NULL */
2631        if (WARN_ON_ONCE(!iter))
2632                return;
2633
2634        event_call = fbuffer->trace_file->event_call;
2635        if (!event_call || !event_call->event.funcs ||
2636            !event_call->event.funcs->trace)
2637                return;
2638
2639        file = fbuffer->trace_file;
2640        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2641            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2642             !filter_match_preds(file->filter, fbuffer->entry)))
2643                return;
2644
2645        event = &fbuffer->trace_file->event_call->event;
2646
2647        spin_lock_irqsave(&tracepoint_iter_lock, flags);
2648        trace_seq_init(&iter->seq);
2649        iter->ent = fbuffer->entry;
2650        event_call->event.funcs->trace(iter, 0, event);
2651        trace_seq_putc(&iter->seq, 0);
2652        printk("%s", iter->seq.buffer);
2653
2654        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2655}
2656
2657int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2658                             void __user *buffer, size_t *lenp,
2659                             loff_t *ppos)
2660{
2661        int save_tracepoint_printk;
2662        int ret;
2663
2664        mutex_lock(&tracepoint_printk_mutex);
2665        save_tracepoint_printk = tracepoint_printk;
2666
2667        ret = proc_dointvec(table, write, buffer, lenp, ppos);
2668
2669        /*
2670         * This will force exiting early, as tracepoint_printk
2671         * is always zero when tracepoint_printk_iter is not allocated
2672         */
2673        if (!tracepoint_print_iter)
2674                tracepoint_printk = 0;
2675
2676        if (save_tracepoint_printk == tracepoint_printk)
2677                goto out;
2678
2679        if (tracepoint_printk)
2680                static_key_enable(&tracepoint_printk_key.key);
2681        else
2682                static_key_disable(&tracepoint_printk_key.key);
2683
2684 out:
2685        mutex_unlock(&tracepoint_printk_mutex);
2686
2687        return ret;
2688}
2689
2690void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2691{
2692        if (static_key_false(&tracepoint_printk_key.key))
2693                output_printk(fbuffer);
2694
2695        event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2696                                    fbuffer->event, fbuffer->entry,
2697                                    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2698}
2699EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2700
2701/*
2702 * Skip 3:
2703 *
2704 *   trace_buffer_unlock_commit_regs()
2705 *   trace_event_buffer_commit()
2706 *   trace_event_raw_event_xxx()
2707 */
2708# define STACK_SKIP 3
2709
2710void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2711                                     struct trace_buffer *buffer,
2712                                     struct ring_buffer_event *event,
2713                                     unsigned long flags, int pc,
2714                                     struct pt_regs *regs)
2715{
2716        __buffer_unlock_commit(buffer, event);
2717
2718        /*
2719         * If regs is not set, then skip the necessary functions.
2720         * Note, we can still get here via blktrace, wakeup tracer
2721         * and mmiotrace, but that's ok if they lose a function or
2722         * two. They are not that meaningful.
2723         */
2724        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2725        ftrace_trace_userstack(buffer, flags, pc);
2726}
2727
2728/*
2729 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2730 */
2731void
2732trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2733                                   struct ring_buffer_event *event)
2734{
2735        __buffer_unlock_commit(buffer, event);
2736}
2737
2738static void
2739trace_process_export(struct trace_export *export,
2740               struct ring_buffer_event *event)
2741{
2742        struct trace_entry *entry;
2743        unsigned int size = 0;
2744
2745        entry = ring_buffer_event_data(event);
2746        size = ring_buffer_event_length(event);
2747        export->write(export, entry, size);
2748}
2749
2750static DEFINE_MUTEX(ftrace_export_lock);
2751
2752static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2753
2754static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2755
2756static inline void ftrace_exports_enable(void)
2757{
2758        static_branch_enable(&ftrace_exports_enabled);
2759}
2760
2761static inline void ftrace_exports_disable(void)
2762{
2763        static_branch_disable(&ftrace_exports_enabled);
2764}
2765
2766static void ftrace_exports(struct ring_buffer_event *event)
2767{
2768        struct trace_export *export;
2769
2770        preempt_disable_notrace();
2771
2772        export = rcu_dereference_raw_check(ftrace_exports_list);
2773        while (export) {
2774                trace_process_export(export, event);
2775                export = rcu_dereference_raw_check(export->next);
2776        }
2777
2778        preempt_enable_notrace();
2779}
2780
2781static inline void
2782add_trace_export(struct trace_export **list, struct trace_export *export)
2783{
2784        rcu_assign_pointer(export->next, *list);
2785        /*
2786         * We are entering export into the list but another
2787         * CPU might be walking that list. We need to make sure
2788         * the export->next pointer is valid before another CPU sees
2789         * the export pointer included into the list.
2790         */
2791        rcu_assign_pointer(*list, export);
2792}
2793
2794static inline int
2795rm_trace_export(struct trace_export **list, struct trace_export *export)
2796{
2797        struct trace_export **p;
2798
2799        for (p = list; *p != NULL; p = &(*p)->next)
2800                if (*p == export)
2801                        break;
2802
2803        if (*p != export)
2804                return -1;
2805
2806        rcu_assign_pointer(*p, (*p)->next);
2807
2808        return 0;
2809}
2810
2811static inline void
2812add_ftrace_export(struct trace_export **list, struct trace_export *export)
2813{
2814        if (*list == NULL)
2815                ftrace_exports_enable();
2816
2817        add_trace_export(list, export);
2818}
2819
2820static inline int
2821rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2822{
2823        int ret;
2824
2825        ret = rm_trace_export(list, export);
2826        if (*list == NULL)
2827                ftrace_exports_disable();
2828
2829        return ret;
2830}
2831
2832int register_ftrace_export(struct trace_export *export)
2833{
2834        if (WARN_ON_ONCE(!export->write))
2835                return -1;
2836
2837        mutex_lock(&ftrace_export_lock);
2838
2839        add_ftrace_export(&ftrace_exports_list, export);
2840
2841        mutex_unlock(&ftrace_export_lock);
2842
2843        return 0;
2844}
2845EXPORT_SYMBOL_GPL(register_ftrace_export);
2846
2847int unregister_ftrace_export(struct trace_export *export)
2848{
2849        int ret;
2850
2851        mutex_lock(&ftrace_export_lock);
2852
2853        ret = rm_ftrace_export(&ftrace_exports_list, export);
2854
2855        mutex_unlock(&ftrace_export_lock);
2856
2857        return ret;
2858}
2859EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2860
2861void
2862trace_function(struct trace_array *tr,
2863               unsigned long ip, unsigned long parent_ip, unsigned long flags,
2864               int pc)
2865{
2866        struct trace_event_call *call = &event_function;
2867        struct trace_buffer *buffer = tr->array_buffer.buffer;
2868        struct ring_buffer_event *event;
2869        struct ftrace_entry *entry;
2870
2871        event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2872                                            flags, pc);
2873        if (!event)
2874                return;
2875        entry   = ring_buffer_event_data(event);
2876        entry->ip                       = ip;
2877        entry->parent_ip                = parent_ip;
2878
2879        if (!call_filter_check_discard(call, entry, buffer, event)) {
2880                if (static_branch_unlikely(&ftrace_exports_enabled))
2881                        ftrace_exports(event);
2882                __buffer_unlock_commit(buffer, event);
2883        }
2884}
2885
2886#ifdef CONFIG_STACKTRACE
2887
2888/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2889#define FTRACE_KSTACK_NESTING   4
2890
2891#define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2892
2893struct ftrace_stack {
2894        unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2895};
2896
2897
2898struct ftrace_stacks {
2899        struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2900};
2901
2902static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2903static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2904
2905static void __ftrace_trace_stack(struct trace_buffer *buffer,
2906                                 unsigned long flags,
2907                                 int skip, int pc, struct pt_regs *regs)
2908{
2909        struct trace_event_call *call = &event_kernel_stack;
2910        struct ring_buffer_event *event;
2911        unsigned int size, nr_entries;
2912        struct ftrace_stack *fstack;
2913        struct stack_entry *entry;
2914        int stackidx;
2915
2916        /*
2917         * Add one, for this function and the call to save_stack_trace()
2918         * If regs is set, then these functions will not be in the way.
2919         */
2920#ifndef CONFIG_UNWINDER_ORC
2921        if (!regs)
2922                skip++;
2923#endif
2924
2925        /*
2926         * Since events can happen in NMIs there's no safe way to
2927         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2928         * or NMI comes in, it will just have to use the default
2929         * FTRACE_STACK_SIZE.
2930         */
2931        preempt_disable_notrace();
2932
2933        stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2934
2935        /* This should never happen. If it does, yell once and skip */
2936        if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2937                goto out;
2938
2939        /*
2940         * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2941         * interrupt will either see the value pre increment or post
2942         * increment. If the interrupt happens pre increment it will have
2943         * restored the counter when it returns.  We just need a barrier to
2944         * keep gcc from moving things around.
2945         */
2946        barrier();
2947
2948        fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2949        size = ARRAY_SIZE(fstack->calls);
2950
2951        if (regs) {
2952                nr_entries = stack_trace_save_regs(regs, fstack->calls,
2953                                                   size, skip);
2954        } else {
2955                nr_entries = stack_trace_save(fstack->calls, size, skip);
2956        }
2957
2958        size = nr_entries * sizeof(unsigned long);
2959        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2960                                            sizeof(*entry) + size, flags, pc);
2961        if (!event)
2962                goto out;
2963        entry = ring_buffer_event_data(event);
2964
2965        memcpy(&entry->caller, fstack->calls, size);
2966        entry->size = nr_entries;
2967
2968        if (!call_filter_check_discard(call, entry, buffer, event))
2969                __buffer_unlock_commit(buffer, event);
2970
2971 out:
2972        /* Again, don't let gcc optimize things here */
2973        barrier();
2974        __this_cpu_dec(ftrace_stack_reserve);
2975        preempt_enable_notrace();
2976
2977}
2978
2979static inline void ftrace_trace_stack(struct trace_array *tr,
2980                                      struct trace_buffer *buffer,
2981                                      unsigned long flags,
2982                                      int skip, int pc, struct pt_regs *regs)
2983{
2984        if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2985                return;
2986
2987        __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2988}
2989
2990void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2991                   int pc)
2992{
2993        struct trace_buffer *buffer = tr->array_buffer.buffer;
2994
2995        if (rcu_is_watching()) {
2996                __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2997                return;
2998        }
2999
3000        /*
3001         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3002         * but if the above rcu_is_watching() failed, then the NMI
3003         * triggered someplace critical, and rcu_irq_enter() should
3004         * not be called from NMI.
3005         */
3006        if (unlikely(in_nmi()))
3007                return;
3008
3009        rcu_irq_enter_irqson();
3010        __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3011        rcu_irq_exit_irqson();
3012}
3013
3014/**
3015 * trace_dump_stack - record a stack back trace in the trace buffer
3016 * @skip: Number of functions to skip (helper handlers)
3017 */
3018void trace_dump_stack(int skip)
3019{
3020        unsigned long flags;
3021
3022        if (tracing_disabled || tracing_selftest_running)
3023                return;
3024
3025        local_save_flags(flags);
3026
3027#ifndef CONFIG_UNWINDER_ORC
3028        /* Skip 1 to skip this function. */
3029        skip++;
3030#endif
3031        __ftrace_trace_stack(global_trace.array_buffer.buffer,
3032                             flags, skip, preempt_count(), NULL);
3033}
3034EXPORT_SYMBOL_GPL(trace_dump_stack);
3035
3036#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3037static DEFINE_PER_CPU(int, user_stack_count);
3038
3039static void
3040ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3041{
3042        struct trace_event_call *call = &event_user_stack;
3043        struct ring_buffer_event *event;
3044        struct userstack_entry *entry;
3045
3046        if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3047                return;
3048
3049        /*
3050         * NMIs can not handle page faults, even with fix ups.
3051         * The save user stack can (and often does) fault.
3052         */
3053        if (unlikely(in_nmi()))
3054                return;
3055
3056        /*
3057         * prevent recursion, since the user stack tracing may
3058         * trigger other kernel events.
3059         */
3060        preempt_disable();
3061        if (__this_cpu_read(user_stack_count))
3062                goto out;
3063
3064        __this_cpu_inc(user_stack_count);
3065
3066        event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3067                                            sizeof(*entry), flags, pc);
3068        if (!event)
3069                goto out_drop_count;
3070        entry   = ring_buffer_event_data(event);
3071
3072        entry->tgid             = current->tgid;
3073        memset(&entry->caller, 0, sizeof(entry->caller));
3074
3075        stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3076        if (!call_filter_check_discard(call, entry, buffer, event))
3077                __buffer_unlock_commit(buffer, event);
3078
3079 out_drop_count:
3080        __this_cpu_dec(user_stack_count);
3081 out:
3082        preempt_enable();
3083}
3084#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3085static void ftrace_trace_userstack(struct trace_buffer *buffer,
3086                                   unsigned long flags, int pc)
3087{
3088}
3089#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3090
3091#endif /* CONFIG_STACKTRACE */
3092
3093/* created for use with alloc_percpu */
3094struct trace_buffer_struct {
3095        int nesting;
3096        char buffer[4][TRACE_BUF_SIZE];
3097};
3098
3099static struct trace_buffer_struct *trace_percpu_buffer;
3100
3101/*
3102 * Thise allows for lockless recording.  If we're nested too deeply, then
3103 * this returns NULL.
3104 */
3105static char *get_trace_buf(void)
3106{
3107        struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3108
3109        if (!buffer || buffer->nesting >= 4)
3110                return NULL;
3111
3112        buffer->nesting++;
3113
3114        /* Interrupts must see nesting incremented before we use the buffer */
3115        barrier();
3116        return &buffer->buffer[buffer->nesting][0];
3117}
3118
3119static void put_trace_buf(void)
3120{
3121        /* Don't let the decrement of nesting leak before this */
3122        barrier();
3123        this_cpu_dec(trace_percpu_buffer->nesting);
3124}
3125
3126static int alloc_percpu_trace_buffer(void)
3127{
3128        struct trace_buffer_struct *buffers;
3129
3130        buffers = alloc_percpu(struct trace_buffer_struct);
3131        if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3132                return -ENOMEM;
3133
3134        trace_percpu_buffer = buffers;
3135        return 0;
3136}
3137
3138static int buffers_allocated;
3139
3140void trace_printk_init_buffers(void)
3141{
3142        if (buffers_allocated)
3143                return;
3144
3145        if (alloc_percpu_trace_buffer())
3146                return;
3147
3148        /* trace_printk() is for debug use only. Don't use it in production. */
3149
3150        pr_warn("\n");
3151        pr_warn("**********************************************************\n");
3152        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3153        pr_warn("**                                                      **\n");
3154        pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3155        pr_warn("**                                                      **\n");
3156        pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3157        pr_warn("** unsafe for production use.                           **\n");
3158        pr_warn("**                                                      **\n");
3159        pr_warn("** If you see this message and you are not debugging    **\n");
3160        pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3161        pr_warn("**                                                      **\n");
3162        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3163        pr_warn("**********************************************************\n");
3164
3165        /* Expand the buffers to set size */
3166        tracing_update_buffers();
3167
3168        buffers_allocated = 1;
3169
3170        /*
3171         * trace_printk_init_buffers() can be called by modules.
3172         * If that happens, then we need to start cmdline recording
3173         * directly here. If the global_trace.buffer is already
3174         * allocated here, then this was called by module code.
3175         */
3176        if (global_trace.array_buffer.buffer)
3177                tracing_start_cmdline_record();
3178}
3179EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3180
3181void trace_printk_start_comm(void)
3182{
3183        /* Start tracing comms if trace printk is set */
3184        if (!buffers_allocated)
3185                return;
3186        tracing_start_cmdline_record();
3187}
3188
3189static void trace_printk_start_stop_comm(int enabled)
3190{
3191        if (!buffers_allocated)
3192                return;
3193
3194        if (enabled)
3195                tracing_start_cmdline_record();
3196        else
3197                tracing_stop_cmdline_record();
3198}
3199
3200/**
3201 * trace_vbprintk - write binary msg to tracing buffer
3202 * @ip:    The address of the caller
3203 * @fmt:   The string format to write to the buffer
3204 * @args:  Arguments for @fmt
3205 */
3206int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3207{
3208        struct trace_event_call *call = &event_bprint;
3209        struct ring_buffer_event *event;
3210        struct trace_buffer *buffer;
3211        struct trace_array *tr = &global_trace;
3212        struct bprint_entry *entry;
3213        unsigned long flags;
3214        char *tbuffer;
3215        int len = 0, size, pc;
3216
3217        if (unlikely(tracing_selftest_running || tracing_disabled))
3218                return 0;
3219
3220        /* Don't pollute graph traces with trace_vprintk internals */
3221        pause_graph_tracing();
3222
3223        pc = preempt_count();
3224        preempt_disable_notrace();
3225
3226        tbuffer = get_trace_buf();
3227        if (!tbuffer) {
3228                len = 0;
3229                goto out_nobuffer;
3230        }
3231
3232        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3233
3234        if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3235                goto out_put;
3236
3237        local_save_flags(flags);
3238        size = sizeof(*entry) + sizeof(u32) * len;
3239        buffer = tr->array_buffer.buffer;
3240        ring_buffer_nest_start(buffer);
3241        event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3242                                            flags, pc);
3243        if (!event)
3244                goto out;
3245        entry = ring_buffer_event_data(event);
3246        entry->ip                       = ip;
3247        entry->fmt                      = fmt;
3248
3249        memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3250        if (!call_filter_check_discard(call, entry, buffer, event)) {
3251                __buffer_unlock_commit(buffer, event);
3252                ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3253        }
3254
3255out:
3256        ring_buffer_nest_end(buffer);
3257out_put:
3258        put_trace_buf();
3259
3260out_nobuffer:
3261        preempt_enable_notrace();
3262        unpause_graph_tracing();
3263
3264        return len;
3265}
3266EXPORT_SYMBOL_GPL(trace_vbprintk);
3267
3268__printf(3, 0)
3269static int
3270__trace_array_vprintk(struct trace_buffer *buffer,
3271                      unsigned long ip, const char *fmt, va_list args)
3272{
3273        struct trace_event_call *call = &event_print;
3274        struct ring_buffer_event *event;
3275        int len = 0, size, pc;
3276        struct print_entry *entry;
3277        unsigned long flags;
3278        char *tbuffer;
3279
3280        if (tracing_disabled || tracing_selftest_running)
3281                return 0;
3282
3283        /* Don't pollute graph traces with trace_vprintk internals */
3284        pause_graph_tracing();
3285
3286        pc = preempt_count();
3287        preempt_disable_notrace();
3288
3289
3290        tbuffer = get_trace_buf();
3291        if (!tbuffer) {
3292                len = 0;
3293                goto out_nobuffer;
3294        }
3295
3296        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3297
3298        local_save_flags(flags);
3299        size = sizeof(*entry) + len + 1;
3300        ring_buffer_nest_start(buffer);
3301        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3302                                            flags, pc);
3303        if (!event)
3304                goto out;
3305        entry = ring_buffer_event_data(event);
3306        entry->ip = ip;
3307
3308        memcpy(&entry->buf, tbuffer, len + 1);
3309        if (!call_filter_check_discard(call, entry, buffer, event)) {
3310                __buffer_unlock_commit(buffer, event);
3311                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3312        }
3313
3314out:
3315        ring_buffer_nest_end(buffer);
3316        put_trace_buf();
3317
3318out_nobuffer:
3319        preempt_enable_notrace();
3320        unpause_graph_tracing();
3321
3322        return len;
3323}
3324
3325__printf(3, 0)
3326int trace_array_vprintk(struct trace_array *tr,
3327                        unsigned long ip, const char *fmt, va_list args)
3328{
3329        return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3330}
3331
3332__printf(3, 0)
3333int trace_array_printk(struct trace_array *tr,
3334                       unsigned long ip, const char *fmt, ...)
3335{
3336        int ret;
3337        va_list ap;
3338
3339        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3340                return 0;
3341
3342        if (!tr)
3343                return -ENOENT;
3344
3345        va_start(ap, fmt);
3346        ret = trace_array_vprintk(tr, ip, fmt, ap);
3347        va_end(ap);
3348        return ret;
3349}
3350EXPORT_SYMBOL_GPL(trace_array_printk);
3351
3352__printf(3, 4)
3353int trace_array_printk_buf(struct trace_buffer *buffer,
3354                           unsigned long ip, const char *fmt, ...)
3355{
3356        int ret;
3357        va_list ap;
3358
3359        if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3360                return 0;
3361
3362        va_start(ap, fmt);
3363        ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3364        va_end(ap);
3365        return ret;
3366}
3367
3368__printf(2, 0)
3369int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3370{
3371        return trace_array_vprintk(&global_trace, ip, fmt, args);
3372}
3373EXPORT_SYMBOL_GPL(trace_vprintk);
3374
3375static void trace_iterator_increment(struct trace_iterator *iter)
3376{
3377        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3378
3379        iter->idx++;
3380        if (buf_iter)
3381                ring_buffer_read(buf_iter, NULL);
3382}
3383
3384static struct trace_entry *
3385peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3386                unsigned long *lost_events)
3387{
3388        struct ring_buffer_event *event;
3389        struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3390
3391        if (buf_iter)
3392                event = ring_buffer_iter_peek(buf_iter, ts);
3393        else
3394                event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3395                                         lost_events);
3396
3397        if (event) {
3398                iter->ent_size = ring_buffer_event_length(event);
3399                return ring_buffer_event_data(event);
3400        }
3401        iter->ent_size = 0;
3402        return NULL;
3403}
3404
3405static struct trace_entry *
3406__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3407                  unsigned long *missing_events, u64 *ent_ts)
3408{
3409        struct trace_buffer *buffer = iter->array_buffer->buffer;
3410        struct trace_entry *ent, *next = NULL;
3411        unsigned long lost_events = 0, next_lost = 0;
3412        int cpu_file = iter->cpu_file;
3413        u64 next_ts = 0, ts;
3414        int next_cpu = -1;
3415        int next_size = 0;
3416        int cpu;
3417
3418        /*
3419         * If we are in a per_cpu trace file, don't bother by iterating over
3420         * all cpu and peek directly.
3421         */
3422        if (cpu_file > RING_BUFFER_ALL_CPUS) {
3423                if (ring_buffer_empty_cpu(buffer, cpu_file))
3424                        return NULL;
3425                ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3426                if (ent_cpu)
3427                        *ent_cpu = cpu_file;
3428
3429                return ent;
3430        }
3431
3432        for_each_tracing_cpu(cpu) {
3433
3434                if (ring_buffer_empty_cpu(buffer, cpu))
3435                        continue;
3436
3437                ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3438
3439                /*
3440                 * Pick the entry with the smallest timestamp:
3441                 */
3442                if (ent && (!next || ts < next_ts)) {
3443                        next = ent;
3444                        next_cpu = cpu;
3445                        next_ts = ts;
3446                        next_lost = lost_events;
3447                        next_size = iter->ent_size;
3448                }
3449        }
3450
3451        iter->ent_size = next_size;
3452
3453        if (ent_cpu)
3454                *ent_cpu = next_cpu;
3455
3456        if (ent_ts)
3457                *ent_ts = next_ts;
3458
3459        if (missing_events)
3460                *missing_events = next_lost;
3461
3462        return next;
3463}
3464
3465/* Find the next real entry, without updating the iterator itself */
3466struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3467                                          int *ent_cpu, u64 *ent_ts)
3468{
3469        return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3470}
3471
3472/* Find the next real entry, and increment the iterator to the next entry */
3473void *trace_find_next_entry_inc(struct trace_iterator *iter)
3474{
3475        iter->ent = __find_next_entry(iter, &iter->cpu,
3476                                      &iter->lost_events, &iter->ts);
3477
3478        if (iter->ent)
3479                trace_iterator_increment(iter);
3480
3481        return iter->ent ? iter : NULL;
3482}
3483
3484static void trace_consume(struct trace_iterator *iter)
3485{
3486        ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3487                            &iter->lost_events);
3488}
3489
3490static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3491{
3492        struct trace_iterator *iter = m->private;
3493        int i = (int)*pos;
3494        void *ent;
3495
3496        WARN_ON_ONCE(iter->leftover);
3497
3498        (*pos)++;
3499
3500        /* can't go backwards */
3501        if (iter->idx > i)
3502                return NULL;
3503
3504        if (iter->idx < 0)
3505                ent = trace_find_next_entry_inc(iter);
3506        else
3507                ent = iter;
3508
3509        while (ent && iter->idx < i)
3510                ent = trace_find_next_entry_inc(iter);
3511
3512        iter->pos = *pos;
3513
3514        return ent;
3515}
3516
3517void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3518{
3519        struct ring_buffer_event *event;
3520        struct ring_buffer_iter *buf_iter;
3521        unsigned long entries = 0;
3522        u64 ts;
3523
3524        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3525
3526        buf_iter = trace_buffer_iter(iter, cpu);
3527        if (!buf_iter)
3528                return;
3529
3530        ring_buffer_iter_reset(buf_iter);
3531
3532        /*
3533         * We could have the case with the max latency tracers
3534         * that a reset never took place on a cpu. This is evident
3535         * by the timestamp being before the start of the buffer.
3536         */
3537        while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3538                if (ts >= iter->array_buffer->time_start)
3539                        break;
3540                entries++;
3541                ring_buffer_read(buf_iter, NULL);
3542        }
3543
3544        per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3545}
3546
3547/*
3548 * The current tracer is copied to avoid a global locking
3549 * all around.
3550 */
3551static void *s_start(struct seq_file *m, loff_t *pos)
3552{
3553        struct trace_iterator *iter = m->private;
3554        struct trace_array *tr = iter->tr;
3555        int cpu_file = iter->cpu_file;
3556        void *p = NULL;
3557        loff_t l = 0;
3558        int cpu;
3559
3560        /*
3561         * copy the tracer to avoid using a global lock all around.
3562         * iter->trace is a copy of current_trace, the pointer to the
3563         * name may be used instead of a strcmp(), as iter->trace->name
3564         * will point to the same string as current_trace->name.
3565         */
3566        mutex_lock(&trace_types_lock);
3567        if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3568                *iter->trace = *tr->current_trace;
3569        mutex_unlock(&trace_types_lock);
3570
3571#ifdef CONFIG_TRACER_MAX_TRACE
3572        if (iter->snapshot && iter->trace->use_max_tr)
3573                return ERR_PTR(-EBUSY);
3574#endif
3575
3576        if (!iter->snapshot)
3577                atomic_inc(&trace_record_taskinfo_disabled);
3578
3579        if (*pos != iter->pos) {
3580                iter->ent = NULL;
3581                iter->cpu = 0;
3582                iter->idx = -1;
3583
3584                if (cpu_file == RING_BUFFER_ALL_CPUS) {
3585                        for_each_tracing_cpu(cpu)
3586                                tracing_iter_reset(iter, cpu);
3587                } else
3588                        tracing_iter_reset(iter, cpu_file);
3589
3590                iter->leftover = 0;
3591                for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3592                        ;
3593
3594        } else {
3595                /*
3596                 * If we overflowed the seq_file before, then we want
3597                 * to just reuse the trace_seq buffer again.
3598                 */
3599                if (iter->leftover)
3600                        p = iter;
3601                else {
3602                        l = *pos - 1;
3603                        p = s_next(m, p, &l);
3604                }
3605        }
3606
3607        trace_event_read_lock();
3608        trace_access_lock(cpu_file);
3609        return p;
3610}
3611
3612static void s_stop(struct seq_file *m, void *p)
3613{
3614        struct trace_iterator *iter = m->private;
3615
3616#ifdef CONFIG_TRACER_MAX_TRACE
3617        if (iter->snapshot && iter->trace->use_max_tr)
3618                return;
3619#endif
3620
3621        if (!iter->snapshot)
3622                atomic_dec(&trace_record_taskinfo_disabled);
3623
3624        trace_access_unlock(iter->cpu_file);
3625        trace_event_read_unlock();
3626}
3627
3628static void
3629get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3630                      unsigned long *entries, int cpu)
3631{
3632        unsigned long count;
3633
3634        count = ring_buffer_entries_cpu(buf->buffer, cpu);
3635        /*
3636         * If this buffer has skipped entries, then we hold all
3637         * entries for the trace and we need to ignore the
3638         * ones before the time stamp.
3639         */
3640        if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3641                count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3642                /* total is the same as the entries */
3643                *total = count;
3644        } else
3645                *total = count +
3646                        ring_buffer_overrun_cpu(buf->buffer, cpu);
3647        *entries = count;
3648}
3649
3650static void
3651get_total_entries(struct array_buffer *buf,
3652                  unsigned long *total, unsigned long *entries)
3653{
3654        unsigned long t, e;
3655        int cpu;
3656
3657        *total = 0;
3658        *entries = 0;
3659
3660        for_each_tracing_cpu(cpu) {
3661                get_total_entries_cpu(buf, &t, &e, cpu);
3662                *total += t;
3663                *entries += e;
3664        }
3665}
3666
3667unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3668{
3669        unsigned long total, entries;
3670
3671        if (!tr)
3672                tr = &global_trace;
3673
3674        get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3675
3676        return entries;
3677}
3678
3679unsigned long trace_total_entries(struct trace_array *tr)
3680{
3681        unsigned long total, entries;
3682
3683        if (!tr)
3684                tr = &global_trace;
3685
3686        get_total_entries(&tr->array_buffer, &total, &entries);
3687
3688        return entries;
3689}
3690
3691static void print_lat_help_header(struct seq_file *m)
3692{
3693        seq_puts(m, "#                  _------=> CPU#            \n"
3694                    "#                 / _-----=> irqs-off        \n"
3695                    "#                | / _----=> need-resched    \n"
3696                    "#                || / _---=> hardirq/softirq \n"
3697                    "#                ||| / _--=> preempt-depth   \n"
3698                    "#                |||| /     delay            \n"
3699                    "#  cmd     pid   ||||| time  |   caller      \n"
3700                    "#     \\   /      |||||  \\    |   /         \n");
3701}
3702
3703static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3704{
3705        unsigned long total;
3706        unsigned long entries;
3707
3708        get_total_entries(buf, &total, &entries);
3709        seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3710                   entries, total, num_online_cpus());
3711        seq_puts(m, "#\n");
3712}
3713
3714static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3715                                   unsigned int flags)
3716{
3717        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3718
3719        print_event_info(buf, m);
3720
3721        seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3722        seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3723}
3724
3725static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3726                                       unsigned int flags)
3727{
3728        bool tgid = flags & TRACE_ITER_RECORD_TGID;
3729        const char *space = "          ";
3730        int prec = tgid ? 10 : 2;
3731
3732        print_event_info(buf, m);
3733
3734        seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3735        seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3736        seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3737        seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3738        seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3739        seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3740        seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3741}
3742
3743void
3744print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3745{
3746        unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3747        struct array_buffer *buf = iter->array_buffer;
3748        struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3749        struct tracer *type = iter->trace;
3750        unsigned long entries;
3751        unsigned long total;
3752        const char *name = "preemption";
3753
3754        name = type->name;
3755
3756        get_total_entries(buf, &total, &entries);
3757
3758        seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3759                   name, UTS_RELEASE);
3760        seq_puts(m, "# -----------------------------------"
3761                 "---------------------------------\n");
3762        seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3763                   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3764                   nsecs_to_usecs(data->saved_latency),
3765                   entries,
3766                   total,
3767                   buf->cpu,
3768#if defined(CONFIG_PREEMPT_NONE)
3769                   "server",
3770#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3771                   "desktop",
3772#elif defined(CONFIG_PREEMPT)
3773                   "preempt",
3774#elif defined(CONFIG_PREEMPT_RT)
3775                   "preempt_rt",
3776#else
3777                   "unknown",
3778#endif
3779                   /* These are reserved for later use */
3780                   0, 0, 0, 0);
3781#ifdef CONFIG_SMP
3782        seq_printf(m, " #P:%d)\n", num_online_cpus());
3783#else
3784        seq_puts(m, ")\n");
3785#endif
3786        seq_puts(m, "#    -----------------\n");
3787        seq_printf(m, "#    | task: %.16s-%d "
3788                   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3789                   data->comm, data->pid,
3790                   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3791                   data->policy, data->rt_priority);
3792        seq_puts(m, "#    -----------------\n");
3793
3794        if (data->critical_start) {
3795                seq_puts(m, "#  => started at: ");
3796                seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3797                trace_print_seq(m, &iter->seq);
3798                seq_puts(m, "\n#  => ended at:   ");
3799                seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3800                trace_print_seq(m, &iter->seq);
3801                seq_puts(m, "\n#\n");
3802        }
3803
3804        seq_puts(m, "#\n");
3805}
3806
3807static void test_cpu_buff_start(struct trace_iterator *iter)
3808{
3809        struct trace_seq *s = &iter->seq;
3810        struct trace_array *tr = iter->tr;
3811
3812        if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3813                return;
3814
3815        if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3816                return;
3817
3818        if (cpumask_available(iter->started) &&
3819            cpumask_test_cpu(iter->cpu, iter->started))
3820                return;
3821
3822        if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3823                return;
3824
3825        if (cpumask_available(iter->started))
3826                cpumask_set_cpu(iter->cpu, iter->started);
3827
3828        /* Don't print started cpu buffer for the first entry of the trace */
3829        if (iter->idx > 1)
3830                trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3831                                iter->cpu);
3832}
3833
3834static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3835{
3836        struct trace_array *tr = iter->tr;
3837        struct trace_seq *s = &iter->seq;
3838        unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3839        struct trace_entry *entry;
3840        struct trace_event *event;
3841
3842        entry = iter->ent;
3843
3844        test_cpu_buff_start(iter);
3845
3846        event = ftrace_find_event(entry->type);
3847
3848        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3849                if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3850                        trace_print_lat_context(iter);
3851                else
3852                        trace_print_context(iter);
3853        }
3854
3855        if (trace_seq_has_overflowed(s))
3856                return TRACE_TYPE_PARTIAL_LINE;
3857
3858        if (event)
3859                return event->funcs->trace(iter, sym_flags, event);
3860
3861        trace_seq_printf(s, "Unknown type %d\n", entry->type);
3862
3863        return trace_handle_return(s);
3864}
3865
3866static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3867{
3868        struct trace_array *tr = iter->tr;
3869        struct trace_seq *s = &iter->seq;
3870        struct trace_entry *entry;
3871        struct trace_event *event;
3872
3873        entry = iter->ent;
3874
3875        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3876                trace_seq_printf(s, "%d %d %llu ",
3877                                 entry->pid, iter->cpu, iter->ts);
3878
3879        if (trace_seq_has_overflowed(s))
3880                return TRACE_TYPE_PARTIAL_LINE;
3881
3882        event = ftrace_find_event(entry->type);
3883        if (event)
3884                return event->funcs->raw(iter, 0, event);
3885
3886        trace_seq_printf(s, "%d ?\n", entry->type);
3887
3888        return trace_handle_return(s);
3889}
3890
3891static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3892{
3893        struct trace_array *tr = iter->tr;
3894        struct trace_seq *s = &iter->seq;
3895        unsigned char newline = '\n';
3896        struct trace_entry *entry;
3897        struct trace_event *event;
3898
3899        entry = iter->ent;
3900
3901        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3902                SEQ_PUT_HEX_FIELD(s, entry->pid);
3903                SEQ_PUT_HEX_FIELD(s, iter->cpu);
3904                SEQ_PUT_HEX_FIELD(s, iter->ts);
3905                if (trace_seq_has_overflowed(s))
3906                        return TRACE_TYPE_PARTIAL_LINE;
3907        }
3908
3909        event = ftrace_find_event(entry->type);
3910        if (event) {
3911                enum print_line_t ret = event->funcs->hex(iter, 0, event);
3912                if (ret != TRACE_TYPE_HANDLED)
3913                        return ret;
3914        }
3915
3916        SEQ_PUT_FIELD(s, newline);
3917
3918        return trace_handle_return(s);
3919}
3920
3921static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3922{
3923        struct trace_array *tr = iter->tr;
3924        struct trace_seq *s = &iter->seq;
3925        struct trace_entry *entry;
3926        struct trace_event *event;
3927
3928        entry = iter->ent;
3929
3930        if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3931                SEQ_PUT_FIELD(s, entry->pid);
3932                SEQ_PUT_FIELD(s, iter->cpu);
3933                SEQ_PUT_FIELD(s, iter->ts);
3934                if (trace_seq_has_overflowed(s))
3935                        return TRACE_TYPE_PARTIAL_LINE;
3936        }
3937
3938        event = ftrace_find_event(entry->type);
3939        return event ? event->funcs->binary(iter, 0, event) :
3940                TRACE_TYPE_HANDLED;
3941}
3942
3943int trace_empty(struct trace_iterator *iter)
3944{
3945        struct ring_buffer_iter *buf_iter;
3946        int cpu;
3947
3948        /* If we are looking at one CPU buffer, only check that one */
3949        if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3950                cpu = iter->cpu_file;
3951                buf_iter = trace_buffer_iter(iter, cpu);
3952                if (buf_iter) {
3953                        if (!ring_buffer_iter_empty(buf_iter))
3954                                return 0;
3955                } else {
3956                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3957                                return 0;
3958                }
3959                return 1;
3960        }
3961
3962        for_each_tracing_cpu(cpu) {
3963                buf_iter = trace_buffer_iter(iter, cpu);
3964                if (buf_iter) {
3965                        if (!ring_buffer_iter_empty(buf_iter))
3966                                return 0;
3967                } else {
3968                        if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3969                                return 0;
3970                }
3971        }
3972
3973        return 1;
3974}
3975
3976/*  Called with trace_event_read_lock() held. */
3977enum print_line_t print_trace_line(struct trace_iterator *iter)
3978{
3979        struct trace_array *tr = iter->tr;
3980        unsigned long trace_flags = tr->trace_flags;
3981        enum print_line_t ret;
3982
3983        if (iter->lost_events) {
3984                trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3985                                 iter->cpu, iter->lost_events);
3986                if (trace_seq_has_overflowed(&iter->seq))
3987                        return TRACE_TYPE_PARTIAL_LINE;
3988        }
3989
3990        if (iter->trace && iter->trace->print_line) {
3991                ret = iter->trace->print_line(iter);
3992                if (ret != TRACE_TYPE_UNHANDLED)
3993                        return ret;
3994        }
3995
3996        if (iter->ent->type == TRACE_BPUTS &&
3997                        trace_flags & TRACE_ITER_PRINTK &&
3998                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3999                return trace_print_bputs_msg_only(iter);
4000
4001        if (iter->ent->type == TRACE_BPRINT &&
4002                        trace_flags & TRACE_ITER_PRINTK &&
4003                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4004                return trace_print_bprintk_msg_only(iter);
4005
4006        if (iter->ent->type == TRACE_PRINT &&
4007                        trace_flags & TRACE_ITER_PRINTK &&
4008                        trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4009                return trace_print_printk_msg_only(iter);
4010
4011        if (trace_flags & TRACE_ITER_BIN)
4012                return print_bin_fmt(iter);
4013
4014        if (trace_flags & TRACE_ITER_HEX)
4015                return print_hex_fmt(iter);
4016
4017        if (trace_flags & TRACE_ITER_RAW)
4018                return print_raw_fmt(iter);
4019
4020        return print_trace_fmt(iter);
4021}
4022
4023void trace_latency_header(struct seq_file *m)
4024{
4025        struct trace_iterator *iter = m->private;
4026        struct trace_array *tr = iter->tr;
4027
4028        /* print nothing if the buffers are empty */
4029        if (trace_empty(iter))
4030                return;
4031
4032        if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4033                print_trace_header(m, iter);
4034
4035        if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4036                print_lat_help_header(m);
4037}
4038
4039void trace_default_header(struct seq_file *m)
4040{
4041        struct trace_iterator *iter = m->private;
4042        struct trace_array *tr = iter->tr;
4043        unsigned long trace_flags = tr->trace_flags;
4044
4045        if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4046                return;
4047
4048        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4049                /* print nothing if the buffers are empty */
4050                if (trace_empty(iter))
4051                        return;
4052                print_trace_header(m, iter);
4053                if (!(trace_flags & TRACE_ITER_VERBOSE))
4054                        print_lat_help_header(m);
4055        } else {
4056                if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4057                        if (trace_flags & TRACE_ITER_IRQ_INFO)
4058                                print_func_help_header_irq(iter->array_buffer,
4059                                                           m, trace_flags);
4060                        else
4061                                print_func_help_header(iter->array_buffer, m,
4062                                                       trace_flags);
4063                }
4064        }
4065}
4066
4067static void test_ftrace_alive(struct seq_file *m)
4068{
4069        if (!ftrace_is_dead())
4070                return;
4071        seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4072                    "#          MAY BE MISSING FUNCTION EVENTS\n");
4073}
4074
4075#ifdef CONFIG_TRACER_MAX_TRACE
4076static void show_snapshot_main_help(struct seq_file *m)
4077{
4078        seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4079                    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4080                    "#                      Takes a snapshot of the main buffer.\n"
4081                    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4082                    "#                      (Doesn't have to be '2' works with any number that\n"
4083                    "#                       is not a '0' or '1')\n");
4084}
4085
4086static void show_snapshot_percpu_help(struct seq_file *m)
4087{
4088        seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4089#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4090        seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4091                    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4092#else
4093        seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4094                    "#                     Must use main snapshot file to allocate.\n");
4095#endif
4096        seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4097                    "#                      (Doesn't have to be '2' works with any number that\n"
4098                    "#                       is not a '0' or '1')\n");
4099}
4100
4101static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4102{
4103        if (iter->tr->allocated_snapshot)
4104                seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4105        else
4106                seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4107
4108        seq_puts(m, "# Snapshot commands:\n");
4109        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4110                show_snapshot_main_help(m);
4111        else
4112                show_snapshot_percpu_help(m);
4113}
4114#else
4115/* Should never be called */
4116static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4117#endif
4118
4119static int s_show(struct seq_file *m, void *v)
4120{
4121        struct trace_iterator *iter = v;
4122        int ret;
4123
4124        if (iter->ent == NULL) {
4125                if (iter->tr) {
4126                        seq_printf(m, "# tracer: %s\n", iter->trace->name);
4127                        seq_puts(m, "#\n");
4128                        test_ftrace_alive(m);
4129                }
4130                if (iter->snapshot && trace_empty(iter))
4131                        print_snapshot_help(m, iter);
4132                else if (iter->trace && iter->trace->print_header)
4133                        iter->trace->print_header(m);
4134                else
4135                        trace_default_header(m);
4136
4137        } else if (iter->leftover) {
4138                /*
4139                 * If we filled the seq_file buffer earlier, we
4140                 * want to just show it now.
4141                 */
4142                ret = trace_print_seq(m, &iter->seq);
4143
4144                /* ret should this time be zero, but you never know */
4145                iter->leftover = ret;
4146
4147        } else {
4148                print_trace_line(iter);
4149                ret = trace_print_seq(m, &iter->seq);
4150                /*
4151                 * If we overflow the seq_file buffer, then it will
4152                 * ask us for this data again at start up.
4153                 * Use that instead.
4154                 *  ret is 0 if seq_file write succeeded.
4155                 *        -1 otherwise.
4156                 */
4157                iter->leftover = ret;
4158        }
4159
4160        return 0;
4161}
4162
4163/*
4164 * Should be used after trace_array_get(), trace_types_lock
4165 * ensures that i_cdev was already initialized.
4166 */
4167static inline int tracing_get_cpu(struct inode *inode)
4168{
4169        if (inode->i_cdev) /* See trace_create_cpu_file() */
4170                return (long)inode->i_cdev - 1;
4171        return RING_BUFFER_ALL_CPUS;
4172}
4173
4174static const struct seq_operations tracer_seq_ops = {
4175        .start          = s_start,
4176        .next           = s_next,
4177        .stop           = s_stop,
4178        .show           = s_show,
4179};
4180
4181static struct trace_iterator *
4182__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4183{
4184        struct trace_array *tr = inode->i_private;
4185        struct trace_iterator *iter;
4186        int cpu;
4187
4188        if (tracing_disabled)
4189                return ERR_PTR(-ENODEV);
4190
4191        iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4192        if (!iter)
4193                return ERR_PTR(-ENOMEM);
4194
4195        iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4196                                    GFP_KERNEL);
4197        if (!iter->buffer_iter)
4198                goto release;
4199
4200        /*
4201         * We make a copy of the current tracer to avoid concurrent
4202         * changes on it while we are reading.
4203         */
4204        mutex_lock(&trace_types_lock);
4205        iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4206        if (!iter->trace)
4207                goto fail;
4208
4209        *iter->trace = *tr->current_trace;
4210
4211        if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4212                goto fail;
4213
4214        iter->tr = tr;
4215
4216#ifdef CONFIG_TRACER_MAX_TRACE
4217        /* Currently only the top directory has a snapshot */
4218        if (tr->current_trace->print_max || snapshot)
4219                iter->array_buffer = &tr->max_buffer;
4220        else
4221#endif
4222                iter->array_buffer = &tr->array_buffer;
4223        iter->snapshot = snapshot;
4224        iter->pos = -1;
4225        iter->cpu_file = tracing_get_cpu(inode);
4226        mutex_init(&iter->mutex);
4227
4228        /* Notify the tracer early; before we stop tracing. */
4229        if (iter->trace->open)
4230                iter->trace->open(iter);
4231
4232        /* Annotate start of buffers if we had overruns */
4233        if (ring_buffer_overruns(iter->array_buffer->buffer))
4234                iter->iter_flags |= TRACE_FILE_ANNOTATE;
4235
4236        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4237        if (trace_clocks[tr->clock_id].in_ns)
4238                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4239
4240        /* stop the trace while dumping if we are not opening "snapshot" */
4241        if (!iter->snapshot)
4242                tracing_stop_tr(tr);
4243
4244        if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4245                for_each_tracing_cpu(cpu) {
4246                        iter->buffer_iter[cpu] =
4247                                ring_buffer_read_prepare(iter->array_buffer->buffer,
4248                                                         cpu, GFP_KERNEL);
4249                }
4250                ring_buffer_read_prepare_sync();
4251                for_each_tracing_cpu(cpu) {
4252                        ring_buffer_read_start(iter->buffer_iter[cpu]);
4253                        tracing_iter_reset(iter, cpu);
4254                }
4255        } else {
4256                cpu = iter->cpu_file;
4257                iter->buffer_iter[cpu] =
4258                        ring_buffer_read_prepare(iter->array_buffer->buffer,
4259                                                 cpu, GFP_KERNEL);
4260                ring_buffer_read_prepare_sync();
4261                ring_buffer_read_start(iter->buffer_iter[cpu]);
4262                tracing_iter_reset(iter, cpu);
4263        }
4264
4265        mutex_unlock(&trace_types_lock);
4266
4267        return iter;
4268
4269 fail:
4270        mutex_unlock(&trace_types_lock);
4271        kfree(iter->trace);
4272        kfree(iter->buffer_iter);
4273release:
4274        seq_release_private(inode, file);
4275        return ERR_PTR(-ENOMEM);
4276}
4277
4278int tracing_open_generic(struct inode *inode, struct file *filp)
4279{
4280        int ret;
4281
4282        ret = tracing_check_open_get_tr(NULL);
4283        if (ret)
4284                return ret;
4285
4286        filp->private_data = inode->i_private;
4287        return 0;
4288}
4289
4290bool tracing_is_disabled(void)
4291{
4292        return (tracing_disabled) ? true: false;
4293}
4294
4295/*
4296 * Open and update trace_array ref count.
4297 * Must have the current trace_array passed to it.
4298 */
4299int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4300{
4301        struct trace_array *tr = inode->i_private;
4302        int ret;
4303
4304        ret = tracing_check_open_get_tr(tr);
4305        if (ret)
4306                return ret;
4307
4308        filp->private_data = inode->i_private;
4309
4310        return 0;
4311}
4312
4313static int tracing_release(struct inode *inode, struct file *file)
4314{
4315        struct trace_array *tr = inode->i_private;
4316        struct seq_file *m = file->private_data;
4317        struct trace_iterator *iter;
4318        int cpu;
4319
4320        if (!(file->f_mode & FMODE_READ)) {
4321                trace_array_put(tr);
4322                return 0;
4323        }
4324
4325        /* Writes do not use seq_file */
4326        iter = m->private;
4327        mutex_lock(&trace_types_lock);
4328
4329        for_each_tracing_cpu(cpu) {
4330                if (iter->buffer_iter[cpu])
4331                        ring_buffer_read_finish(iter->buffer_iter[cpu]);
4332        }
4333
4334        if (iter->trace && iter->trace->close)
4335                iter->trace->close(iter);
4336
4337        if (!iter->snapshot)
4338                /* reenable tracing if it was previously enabled */
4339                tracing_start_tr(tr);
4340
4341        __trace_array_put(tr);
4342
4343        mutex_unlock(&trace_types_lock);
4344
4345        mutex_destroy(&iter->mutex);
4346        free_cpumask_var(iter->started);
4347        kfree(iter->trace);
4348        kfree(iter->buffer_iter);
4349        seq_release_private(inode, file);
4350
4351        return 0;
4352}
4353
4354static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4355{
4356        struct trace_array *tr = inode->i_private;
4357
4358        trace_array_put(tr);
4359        return 0;
4360}
4361
4362static int tracing_single_release_tr(struct inode *inode, struct file *file)
4363{
4364        struct trace_array *tr = inode->i_private;
4365
4366        trace_array_put(tr);
4367
4368        return single_release(inode, file);
4369}
4370
4371static int tracing_open(struct inode *inode, struct file *file)
4372{
4373        struct trace_array *tr = inode->i_private;
4374        struct trace_iterator *iter;
4375        int ret;
4376
4377        ret = tracing_check_open_get_tr(tr);
4378        if (ret)
4379                return ret;
4380
4381        /* If this file was open for write, then erase contents */
4382        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4383                int cpu = tracing_get_cpu(inode);
4384                struct array_buffer *trace_buf = &tr->array_buffer;
4385
4386#ifdef CONFIG_TRACER_MAX_TRACE
4387                if (tr->current_trace->print_max)
4388                        trace_buf = &tr->max_buffer;
4389#endif
4390
4391                if (cpu == RING_BUFFER_ALL_CPUS)
4392                        tracing_reset_online_cpus(trace_buf);
4393                else
4394                        tracing_reset_cpu(trace_buf, cpu);
4395        }
4396
4397        if (file->f_mode & FMODE_READ) {
4398                iter = __tracing_open(inode, file, false);
4399                if (IS_ERR(iter))
4400                        ret = PTR_ERR(iter);
4401                else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4402                        iter->iter_flags |= TRACE_FILE_LAT_FMT;
4403        }
4404
4405        if (ret < 0)
4406                trace_array_put(tr);
4407
4408        return ret;
4409}
4410
4411/*
4412 * Some tracers are not suitable for instance buffers.
4413 * A tracer is always available for the global array (toplevel)
4414 * or if it explicitly states that it is.
4415 */
4416static bool
4417trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4418{
4419        return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4420}
4421
4422/* Find the next tracer that this trace array may use */
4423static struct tracer *
4424get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4425{
4426        while (t && !trace_ok_for_array(t, tr))
4427                t = t->next;
4428
4429        return t;
4430}
4431
4432static void *
4433t_next(struct seq_file *m, void *v, loff_t *pos)
4434{
4435        struct trace_array *tr = m->private;
4436        struct tracer *t = v;
4437
4438        (*pos)++;
4439
4440        if (t)
4441                t = get_tracer_for_array(tr, t->next);
4442
4443        return t;
4444}
4445
4446static void *t_start(struct seq_file *m, loff_t *pos)
4447{
4448        struct trace_array *tr = m->private;
4449        struct tracer *t;
4450        loff_t l = 0;
4451
4452        mutex_lock(&trace_types_lock);
4453
4454        t = get_tracer_for_array(tr, trace_types);
4455        for (; t && l < *pos; t = t_next(m, t, &l))
4456                        ;
4457
4458        return t;
4459}
4460
4461static void t_stop(struct seq_file *m, void *p)
4462{
4463        mutex_unlock(&trace_types_lock);
4464}
4465
4466static int t_show(struct seq_file *m, void *v)
4467{
4468        struct tracer *t = v;
4469
4470        if (!t)
4471                return 0;
4472
4473        seq_puts(m, t->name);
4474        if (t->next)
4475                seq_putc(m, ' ');
4476        else
4477                seq_putc(m, '\n');
4478
4479        return 0;
4480}
4481
4482static const struct seq_operations show_traces_seq_ops = {
4483        .start          = t_start,
4484        .next           = t_next,
4485        .stop           = t_stop,
4486        .show           = t_show,
4487};
4488
4489static int show_traces_open(struct inode *inode, struct file *file)
4490{
4491        struct trace_array *tr = inode->i_private;
4492        struct seq_file *m;
4493        int ret;
4494
4495        ret = tracing_check_open_get_tr(tr);
4496        if (ret)
4497                return ret;
4498
4499        ret = seq_open(file, &show_traces_seq_ops);
4500        if (ret) {
4501                trace_array_put(tr);
4502                return ret;
4503        }
4504
4505        m = file->private_data;
4506        m->private = tr;
4507
4508        return 0;
4509}
4510
4511static int show_traces_release(struct inode *inode, struct file *file)
4512{
4513        struct trace_array *tr = inode->i_private;
4514
4515        trace_array_put(tr);
4516        return seq_release(inode, file);
4517}
4518
4519static ssize_t
4520tracing_write_stub(struct file *filp, const char __user *ubuf,
4521                   size_t count, loff_t *ppos)
4522{
4523        return count;
4524}
4525
4526loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4527{
4528        int ret;
4529
4530        if (file->f_mode & FMODE_READ)
4531                ret = seq_lseek(file, offset, whence);
4532        else
4533                file->f_pos = ret = 0;
4534
4535        return ret;
4536}
4537
4538static const struct file_operations tracing_fops = {
4539        .open           = tracing_open,
4540        .read           = seq_read,
4541        .write          = tracing_write_stub,
4542        .llseek         = tracing_lseek,
4543        .release        = tracing_release,
4544};
4545
4546static const struct file_operations show_traces_fops = {
4547        .open           = show_traces_open,
4548        .read           = seq_read,
4549        .llseek         = seq_lseek,
4550        .release        = show_traces_release,
4551};
4552
4553static ssize_t
4554tracing_cpumask_read(struct file *filp, char __user *ubuf,
4555                     size_t count, loff_t *ppos)
4556{
4557        struct trace_array *tr = file_inode(filp)->i_private;
4558        char *mask_str;
4559        int len;
4560
4561        len = snprintf(NULL, 0, "%*pb\n",
4562                       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4563        mask_str = kmalloc(len, GFP_KERNEL);
4564        if (!mask_str)
4565                return -ENOMEM;
4566
4567        len = snprintf(mask_str, len, "%*pb\n",
4568                       cpumask_pr_args(tr->tracing_cpumask));
4569        if (len >= count) {
4570                count = -EINVAL;
4571                goto out_err;
4572        }
4573        count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4574
4575out_err:
4576        kfree(mask_str);
4577
4578        return count;
4579}
4580
4581int tracing_set_cpumask(struct trace_array *tr,
4582                        cpumask_var_t tracing_cpumask_new)
4583{
4584        int cpu;
4585
4586        if (!tr)
4587                return -EINVAL;
4588
4589        local_irq_disable();
4590        arch_spin_lock(&tr->max_lock);
4591        for_each_tracing_cpu(cpu) {
4592                /*
4593                 * Increase/decrease the disabled counter if we are
4594                 * about to flip a bit in the cpumask:
4595                 */
4596                if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4597                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4598                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4599                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4600                }
4601                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4602                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4603                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4604                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4605                }
4606        }
4607        arch_spin_unlock(&tr->max_lock);
4608        local_irq_enable();
4609
4610        cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4611
4612        return 0;
4613}
4614
4615static ssize_t
4616tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4617                      size_t count, loff_t *ppos)
4618{
4619        struct trace_array *tr = file_inode(filp)->i_private;
4620        cpumask_var_t tracing_cpumask_new;
4621        int err;
4622
4623        if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4624                return -ENOMEM;
4625
4626        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4627        if (err)
4628                goto err_free;
4629
4630        err = tracing_set_cpumask(tr, tracing_cpumask_new);
4631        if (err)
4632                goto err_free;
4633
4634        free_cpumask_var(tracing_cpumask_new);
4635
4636        return count;
4637
4638err_free:
4639        free_cpumask_var(tracing_cpumask_new);
4640
4641        return err;
4642}
4643
4644static const struct file_operations tracing_cpumask_fops = {
4645        .open           = tracing_open_generic_tr,
4646        .read           = tracing_cpumask_read,
4647        .write          = tracing_cpumask_write,
4648        .release        = tracing_release_generic_tr,
4649        .llseek         = generic_file_llseek,
4650};
4651
4652static int tracing_trace_options_show(struct seq_file *m, void *v)
4653{
4654        struct tracer_opt *trace_opts;
4655        struct trace_array *tr = m->private;
4656        u32 tracer_flags;
4657        int i;
4658
4659        mutex_lock(&trace_types_lock);
4660        tracer_flags = tr->current_trace->flags->val;
4661        trace_opts = tr->current_trace->flags->opts;
4662
4663        for (i = 0; trace_options[i]; i++) {
4664                if (tr->trace_flags & (1 << i))
4665                        seq_printf(m, "%s\n", trace_options[i]);
4666                else
4667                        seq_printf(m, "no%s\n", trace_options[i]);
4668        }
4669
4670        for (i = 0; trace_opts[i].name; i++) {
4671                if (tracer_flags & trace_opts[i].bit)
4672                        seq_printf(m, "%s\n", trace_opts[i].name);
4673                else
4674                        seq_printf(m, "no%s\n", trace_opts[i].name);
4675        }
4676        mutex_unlock(&trace_types_lock);
4677
4678        return 0;
4679}
4680
4681static int __set_tracer_option(struct trace_array *tr,
4682                               struct tracer_flags *tracer_flags,
4683                               struct tracer_opt *opts, int neg)
4684{
4685        struct tracer *trace = tracer_flags->trace;
4686        int ret;
4687
4688        ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4689        if (ret)
4690                return ret;
4691
4692        if (neg)
4693                tracer_flags->val &= ~opts->bit;
4694        else
4695                tracer_flags->val |= opts->bit;
4696        return 0;
4697}
4698
4699/* Try to assign a tracer specific option */
4700static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4701{
4702        struct tracer *trace = tr->current_trace;
4703        struct tracer_flags *tracer_flags = trace->flags;
4704        struct tracer_opt *opts = NULL;
4705        int i;
4706
4707        for (i = 0; tracer_flags->opts[i].name; i++) {
4708                opts = &tracer_flags->opts[i];
4709
4710                if (strcmp(cmp, opts->name) == 0)
4711                        return __set_tracer_option(tr, trace->flags, opts, neg);
4712        }
4713
4714        return -EINVAL;
4715}
4716
4717/* Some tracers require overwrite to stay enabled */
4718int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4719{
4720        if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4721                return -1;
4722
4723        return 0;
4724}
4725
4726int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4727{
4728        if ((mask == TRACE_ITER_RECORD_TGID) ||
4729            (mask == TRACE_ITER_RECORD_CMD))
4730                lockdep_assert_held(&event_mutex);
4731
4732        /* do nothing if flag is already set */
4733        if (!!(tr->trace_flags & mask) == !!enabled)
4734                return 0;
4735
4736        /* Give the tracer a chance to approve the change */
4737        if (tr->current_trace->flag_changed)
4738                if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4739                        return -EINVAL;
4740
4741        if (enabled)
4742                tr->trace_flags |= mask;
4743        else
4744                tr->trace_flags &= ~mask;
4745
4746        if (mask == TRACE_ITER_RECORD_CMD)
4747                trace_event_enable_cmd_record(enabled);
4748
4749        if (mask == TRACE_ITER_RECORD_TGID) {
4750                if (!tgid_map)
4751                        tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4752                                           sizeof(*tgid_map),
4753                                           GFP_KERNEL);
4754                if (!tgid_map) {
4755                        tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4756                        return -ENOMEM;
4757                }
4758
4759                trace_event_enable_tgid_record(enabled);
4760        }
4761
4762        if (mask == TRACE_ITER_EVENT_FORK)
4763                trace_event_follow_fork(tr, enabled);
4764
4765        if (mask == TRACE_ITER_FUNC_FORK)
4766                ftrace_pid_follow_fork(tr, enabled);
4767
4768        if (mask == TRACE_ITER_OVERWRITE) {
4769                ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4770#ifdef CONFIG_TRACER_MAX_TRACE
4771                ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4772#endif
4773        }
4774
4775        if (mask == TRACE_ITER_PRINTK) {
4776                trace_printk_start_stop_comm(enabled);
4777                trace_printk_control(enabled);
4778        }
4779
4780        return 0;
4781}
4782
4783int trace_set_options(struct trace_array *tr, char *option)
4784{
4785        char *cmp;
4786        int neg = 0;
4787        int ret;
4788        size_t orig_len = strlen(option);
4789        int len;
4790
4791        cmp = strstrip(option);
4792
4793        len = str_has_prefix(cmp, "no");
4794        if (len)
4795                neg = 1;
4796
4797        cmp += len;
4798
4799        mutex_lock(&event_mutex);
4800        mutex_lock(&trace_types_lock);
4801
4802        ret = match_string(trace_options, -1, cmp);
4803        /* If no option could be set, test the specific tracer options */
4804        if (ret < 0)
4805                ret = set_tracer_option(tr, cmp, neg);
4806        else
4807                ret = set_tracer_flag(tr, 1 << ret, !neg);
4808
4809        mutex_unlock(&trace_types_lock);
4810        mutex_unlock(&event_mutex);
4811
4812        /*
4813         * If the first trailing whitespace is replaced with '\0' by strstrip,
4814         * turn it back into a space.
4815         */
4816        if (orig_len > strlen(option))
4817                option[strlen(option)] = ' ';
4818
4819        return ret;
4820}
4821
4822static void __init apply_trace_boot_options(void)
4823{
4824        char *buf = trace_boot_options_buf;
4825        char *option;
4826
4827        while (true) {
4828                option = strsep(&buf, ",");
4829
4830                if (!option)
4831                        break;
4832
4833                if (*option)
4834                        trace_set_options(&global_trace, option);
4835
4836                /* Put back the comma to allow this to be called again */
4837                if (buf)
4838                        *(buf - 1) = ',';
4839        }
4840}
4841
4842static ssize_t
4843tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4844                        size_t cnt, loff_t *ppos)
4845{
4846        struct seq_file *m = filp->private_data;
4847        struct trace_array *tr = m->private;
4848        char buf[64];
4849        int ret;
4850
4851        if (cnt >= sizeof(buf))
4852                return -EINVAL;
4853
4854        if (copy_from_user(buf, ubuf, cnt))
4855                return -EFAULT;
4856
4857        buf[cnt] = 0;
4858
4859        ret = trace_set_options(tr, buf);
4860        if (ret < 0)
4861                return ret;
4862
4863        *ppos += cnt;
4864
4865        return cnt;
4866}
4867
4868static int tracing_trace_options_open(struct inode *inode, struct file *file)
4869{
4870        struct trace_array *tr = inode->i_private;
4871        int ret;
4872
4873        ret = tracing_check_open_get_tr(tr);
4874        if (ret)
4875                return ret;
4876
4877        ret = single_open(file, tracing_trace_options_show, inode->i_private);
4878        if (ret < 0)
4879                trace_array_put(tr);
4880
4881        return ret;
4882}
4883
4884static const struct file_operations tracing_iter_fops = {
4885        .open           = tracing_trace_options_open,
4886        .read           = seq_read,
4887        .llseek         = seq_lseek,
4888        .release        = tracing_single_release_tr,
4889        .write          = tracing_trace_options_write,
4890};
4891
4892static const char readme_msg[] =
4893        "tracing mini-HOWTO:\n\n"
4894        "# echo 0 > tracing_on : quick way to disable tracing\n"
4895        "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4896        " Important files:\n"
4897        "  trace\t\t\t- The static contents of the buffer\n"
4898        "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4899        "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4900        "  current_tracer\t- function and latency tracers\n"
4901        "  available_tracers\t- list of configured tracers for current_tracer\n"
4902        "  error_log\t- error log for failed commands (that support it)\n"
4903        "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4904        "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4905        "  trace_clock\t\t-change the clock used to order events\n"
4906        "       local:   Per cpu clock but may not be synced across CPUs\n"
4907        "      global:   Synced across CPUs but slows tracing down.\n"
4908        "     counter:   Not a clock, but just an increment\n"
4909        "      uptime:   Jiffy counter from time of boot\n"
4910        "        perf:   Same clock that perf events use\n"
4911#ifdef CONFIG_X86_64
4912        "     x86-tsc:   TSC cycle counter\n"
4913#endif
4914        "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4915        "       delta:   Delta difference against a buffer-wide timestamp\n"
4916        "    absolute:   Absolute (standalone) timestamp\n"
4917        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4918        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4919        "  tracing_cpumask\t- Limit which CPUs to trace\n"
4920        "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4921        "\t\t\t  Remove sub-buffer with rmdir\n"
4922        "  trace_options\t\t- Set format or modify how tracing happens\n"
4923        "\t\t\t  Disable an option by prefixing 'no' to the\n"
4924        "\t\t\t  option name\n"
4925        "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4926#ifdef CONFIG_DYNAMIC_FTRACE
4927        "\n  available_filter_functions - list of functions that can be filtered on\n"
4928        "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4929        "\t\t\t  functions\n"
4930        "\t     accepts: func_full_name or glob-matching-pattern\n"
4931        "\t     modules: Can select a group via module\n"
4932        "\t      Format: :mod:<module-name>\n"
4933        "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4934        "\t    triggers: a command to perform when function is hit\n"
4935        "\t      Format: <function>:<trigger>[:count]\n"
4936        "\t     trigger: traceon, traceoff\n"
4937        "\t\t      enable_event:<system>:<event>\n"
4938        "\t\t      disable_event:<system>:<event>\n"
4939#ifdef CONFIG_STACKTRACE
4940        "\t\t      stacktrace\n"
4941#endif
4942#ifdef CONFIG_TRACER_SNAPSHOT
4943        "\t\t      snapshot\n"
4944#endif
4945        "\t\t      dump\n"
4946        "\t\t      cpudump\n"
4947        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4948        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4949        "\t     The first one will disable tracing every time do_fault is hit\n"
4950        "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4951        "\t       The first time do trap is hit and it disables tracing, the\n"
4952        "\t       counter will decrement to 2. If tracing is already disabled,\n"
4953        "\t       the counter will not decrement. It only decrements when the\n"
4954        "\t       trigger did work\n"
4955        "\t     To remove trigger without count:\n"
4956        "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4957        "\t     To remove trigger with a count:\n"
4958        "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4959        "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4960        "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4961        "\t    modules: Can select a group via module command :mod:\n"
4962        "\t    Does not accept triggers\n"
4963#endif /* CONFIG_DYNAMIC_FTRACE */
4964#ifdef CONFIG_FUNCTION_TRACER
4965        "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4966        "\t\t    (function)\n"
4967#endif
4968#ifdef CONFIG_FUNCTION_GRAPH_TRACER
4969        "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4970        "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4971        "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4972#endif
4973#ifdef CONFIG_TRACER_SNAPSHOT
4974        "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4975        "\t\t\t  snapshot buffer. Read the contents for more\n"
4976        "\t\t\t  information\n"
4977#endif
4978#ifdef CONFIG_STACK_TRACER
4979        "  stack_trace\t\t- Shows the max stack trace when active\n"
4980        "  stack_max_size\t- Shows current max stack size that was traced\n"
4981        "\t\t\t  Write into this file to reset the max size (trigger a\n"
4982        "\t\t\t  new trace)\n"
4983#ifdef CONFIG_DYNAMIC_FTRACE
4984        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4985        "\t\t\t  traces\n"
4986#endif
4987#endif /* CONFIG_STACK_TRACER */
4988#ifdef CONFIG_DYNAMIC_EVENTS
4989        "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4990        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4991#endif
4992#ifdef CONFIG_KPROBE_EVENTS
4993        "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4994        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4995#endif
4996#ifdef CONFIG_UPROBE_EVENTS
4997        "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4998        "\t\t\t  Write into this file to define/undefine new trace events.\n"
4999#endif
5000#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5001        "\t  accepts: event-definitions (one definition per line)\n"
5002        "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5003        "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5004#ifdef CONFIG_HIST_TRIGGERS
5005        "\t           s:[synthetic/]<event> <field> [<field>]\n"
5006#endif
5007        "\t           -:[<group>/]<event>\n"
5008#ifdef CONFIG_KPROBE_EVENTS
5009        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5010  "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5011#endif
5012#ifdef CONFIG_UPROBE_EVENTS
5013  "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5014#endif
5015        "\t     args: <name>=fetcharg[:type]\n"
5016        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5017#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5018        "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5019#else
5020        "\t           $stack<index>, $stack, $retval, $comm,\n"
5021#endif
5022        "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5023        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5024        "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5025        "\t           <type>\\[<array-size>\\]\n"
5026#ifdef CONFIG_HIST_TRIGGERS
5027        "\t    field: <stype> <name>;\n"
5028        "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5029        "\t           [unsigned] char/int/long\n"
5030#endif
5031#endif
5032        "  events/\t\t- Directory containing all trace event subsystems:\n"
5033        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5034        "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5035        "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5036        "\t\t\t  events\n"
5037        "      filter\t\t- If set, only events passing filter are traced\n"
5038        "  events/<system>/<event>/\t- Directory containing control files for\n"
5039        "\t\t\t  <event>:\n"
5040        "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5041        "      filter\t\t- If set, only events passing filter are traced\n"
5042        "      trigger\t\t- If set, a command to perform when event is hit\n"
5043        "\t    Format: <trigger>[:count][if <filter>]\n"
5044        "\t   trigger: traceon, traceoff\n"
5045        "\t            enable_event:<system>:<event>\n"
5046        "\t            disable_event:<system>:<event>\n"
5047#ifdef CONFIG_HIST_TRIGGERS
5048        "\t            enable_hist:<system>:<event>\n"
5049        "\t            disable_hist:<system>:<event>\n"
5050#endif
5051#ifdef CONFIG_STACKTRACE
5052        "\t\t    stacktrace\n"
5053#endif
5054#ifdef CONFIG_TRACER_SNAPSHOT
5055        "\t\t    snapshot\n"
5056#endif
5057#ifdef CONFIG_HIST_TRIGGERS
5058        "\t\t    hist (see below)\n"
5059#endif
5060        "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5061        "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5062        "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5063        "\t                  events/block/block_unplug/trigger\n"
5064        "\t   The first disables tracing every time block_unplug is hit.\n"
5065        "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5066        "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5067        "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5068        "\t   Like function triggers, the counter is only decremented if it\n"
5069        "\t    enabled or disabled tracing.\n"
5070        "\t   To remove a trigger without a count:\n"
5071        "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5072        "\t   To remove a trigger with a count:\n"
5073        "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5074        "\t   Filters can be ignored when removing a trigger.\n"
5075#ifdef CONFIG_HIST_TRIGGERS
5076        "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5077        "\t    Format: hist:keys=<field1[,field2,...]>\n"
5078        "\t            [:values=<field1[,field2,...]>]\n"
5079        "\t            [:sort=<field1[,field2,...]>]\n"
5080        "\t            [:size=#entries]\n"
5081        "\t            [:pause][:continue][:clear]\n"
5082        "\t            [:name=histname1]\n"
5083        "\t            [:<handler>.<action>]\n"
5084        "\t            [if <filter>]\n\n"
5085        "\t    When a matching event is hit, an entry is added to a hash\n"
5086        "\t    table using the key(s) and value(s) named, and the value of a\n"
5087        "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5088        "\t    correspond to fields in the event's format description.  Keys\n"
5089        "\t    can be any field, or the special string 'stacktrace'.\n"
5090        "\t    Compound keys consisting of up to two fields can be specified\n"
5091        "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5092        "\t    fields.  Sort keys consisting of up to two fields can be\n"
5093        "\t    specified using the 'sort' keyword.  The sort direction can\n"
5094        "\t    be modified by appending '.descending' or '.ascending' to a\n"
5095        "\t    sort field.  The 'size' parameter can be used to specify more\n"
5096        "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5097        "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5098        "\t    its histogram data will be shared with other triggers of the\n"
5099        "\t    same name, and trigger hits will update this common data.\n\n"
5100        "\t    Reading the 'hist' file for the event will dump the hash\n"
5101        "\t    table in its entirety to stdout.  If there are multiple hist\n"
5102        "\t    triggers attached to an event, there will be a table for each\n"
5103        "\t    trigger in the output.  The table displayed for a named\n"
5104        "\t    trigger will be the same as any other instance having the\n"
5105        "\t    same name.  The default format used to display a given field\n"
5106        "\t    can be modified by appending any of the following modifiers\n"
5107        "\t    to the field name, as applicable:\n\n"
5108        "\t            .hex        display a number as a hex value\n"
5109        "\t            .sym        display an address as a symbol\n"
5110        "\t            .sym-offset display an address as a symbol and offset\n"
5111        "\t            .execname   display a common_pid as a program name\n"
5112        "\t            .syscall    display a syscall id as a syscall name\n"
5113        "\t            .log2       display log2 value rather than raw number\n"
5114        "\t            .usecs      display a common_timestamp in microseconds\n\n"
5115        "\t    The 'pause' parameter can be used to pause an existing hist\n"
5116        "\t    trigger or to start a hist trigger but not log any events\n"
5117        "\t    until told to do so.  'continue' can be used to start or\n"
5118        "\t    restart a paused hist trigger.\n\n"
5119        "\t    The 'clear' parameter will clear the contents of a running\n"
5120        "\t    hist trigger and leave its current paused/active state\n"
5121        "\t    unchanged.\n\n"
5122        "\t    The enable_hist and disable_hist triggers can be used to\n"
5123        "\t    have one event conditionally start and stop another event's\n"
5124        "\t    already-attached hist trigger.  The syntax is analogous to\n"
5125        "\t    the enable_event and disable_event triggers.\n\n"
5126        "\t    Hist trigger handlers and actions are executed whenever a\n"
5127        "\t    a histogram entry is added or updated.  They take the form:\n\n"
5128        "\t        <handler>.<action>\n\n"
5129        "\t    The available handlers are:\n\n"
5130        "\t        onmatch(matching.event)  - invoke on addition or update\n"
5131        "\t        onmax(var)               - invoke if var exceeds current max\n"
5132        "\t        onchange(var)            - invoke action if var changes\n\n"
5133        "\t    The available actions are:\n\n"
5134        "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5135        "\t        save(field,...)                      - save current event fields\n"
5136#ifdef CONFIG_TRACER_SNAPSHOT
5137        "\t        snapshot()                           - snapshot the trace buffer\n"
5138#endif
5139#endif
5140;
5141
5142static ssize_t
5143tracing_readme_read(struct file *filp, char __user *ubuf,
5144                       size_t cnt, loff_t *ppos)
5145{
5146        return simple_read_from_buffer(ubuf, cnt, ppos,
5147                                        readme_msg, strlen(readme_msg));
5148}
5149
5150static const struct file_operations tracing_readme_fops = {
5151        .open           = tracing_open_generic,
5152        .read           = tracing_readme_read,
5153        .llseek         = generic_file_llseek,
5154};
5155
5156static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5157{
5158        int *ptr = v;
5159
5160        if (*pos || m->count)
5161                ptr++;
5162
5163        (*pos)++;
5164
5165        for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5166                if (trace_find_tgid(*ptr))
5167                        return ptr;
5168        }
5169
5170        return NULL;
5171}
5172
5173static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5174{
5175        void *v;
5176        loff_t l = 0;
5177
5178        if (!tgid_map)
5179                return NULL;
5180
5181        v = &tgid_map[0];
5182        while (l <= *pos) {
5183                v = saved_tgids_next(m, v, &l);
5184                if (!v)
5185                        return NULL;
5186        }
5187
5188        return v;
5189}
5190
5191static void saved_tgids_stop(struct seq_file *m, void *v)
5192{
5193}
5194
5195static int saved_tgids_show(struct seq_file *m, void *v)
5196{
5197        int pid = (int *)v - tgid_map;
5198
5199        seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5200        return 0;
5201}
5202
5203static const struct seq_operations tracing_saved_tgids_seq_ops = {
5204        .start          = saved_tgids_start,
5205        .stop           = saved_tgids_stop,
5206        .next           = saved_tgids_next,
5207        .show           = saved_tgids_show,
5208};
5209
5210static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5211{
5212        int ret;
5213
5214        ret = tracing_check_open_get_tr(NULL);
5215        if (ret)
5216                return ret;
5217
5218        return seq_open(filp, &tracing_saved_tgids_seq_ops);
5219}
5220
5221
5222static const struct file_operations tracing_saved_tgids_fops = {
5223        .open           = tracing_saved_tgids_open,
5224        .read           = seq_read,
5225        .llseek         = seq_lseek,
5226        .release        = seq_release,
5227};
5228
5229static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5230{
5231        unsigned int *ptr = v;
5232
5233        if (*pos || m->count)
5234                ptr++;
5235
5236        (*pos)++;
5237
5238        for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5239             ptr++) {
5240                if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5241                        continue;
5242
5243                return ptr;
5244        }
5245
5246        return NULL;
5247}
5248
5249static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5250{
5251        void *v;
5252        loff_t l = 0;
5253
5254        preempt_disable();
5255        arch_spin_lock(&trace_cmdline_lock);
5256
5257        v = &savedcmd->map_cmdline_to_pid[0];
5258        while (l <= *pos) {
5259                v = saved_cmdlines_next(m, v, &l);
5260                if (!v)
5261                        return NULL;
5262        }
5263
5264        return v;
5265}
5266
5267static void saved_cmdlines_stop(struct seq_file *m, void *v)
5268{
5269        arch_spin_unlock(&trace_cmdline_lock);
5270        preempt_enable();
5271}
5272
5273static int saved_cmdlines_show(struct seq_file *m, void *v)
5274{
5275        char buf[TASK_COMM_LEN];
5276        unsigned int *pid = v;
5277
5278        __trace_find_cmdline(*pid, buf);
5279        seq_printf(m, "%d %s\n", *pid, buf);
5280        return 0;
5281}
5282
5283static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5284        .start          = saved_cmdlines_start,
5285        .next           = saved_cmdlines_next,
5286        .stop           = saved_cmdlines_stop,
5287        .show           = saved_cmdlines_show,
5288};
5289
5290static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5291{
5292        int ret;
5293
5294        ret = tracing_check_open_get_tr(NULL);
5295        if (ret)
5296                return ret;
5297
5298        return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5299}
5300
5301static const struct file_operations tracing_saved_cmdlines_fops = {
5302        .open           = tracing_saved_cmdlines_open,
5303        .read           = seq_read,
5304        .llseek         = seq_lseek,
5305        .release        = seq_release,
5306};
5307
5308static ssize_t
5309tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5310                                 size_t cnt, loff_t *ppos)
5311{
5312        char buf[64];
5313        int r;
5314
5315        arch_spin_lock(&trace_cmdline_lock);
5316        r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5317        arch_spin_unlock(&trace_cmdline_lock);
5318
5319        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5320}
5321
5322static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5323{
5324        kfree(s->saved_cmdlines);
5325        kfree(s->map_cmdline_to_pid);
5326        kfree(s);
5327}
5328
5329static int tracing_resize_saved_cmdlines(unsigned int val)
5330{
5331        struct saved_cmdlines_buffer *s, *savedcmd_temp;
5332
5333        s = kmalloc(sizeof(*s), GFP_KERNEL);
5334        if (!s)
5335                return -ENOMEM;
5336
5337        if (allocate_cmdlines_buffer(val, s) < 0) {
5338                kfree(s);
5339                return -ENOMEM;
5340        }
5341
5342        arch_spin_lock(&trace_cmdline_lock);
5343        savedcmd_temp = savedcmd;
5344        savedcmd = s;
5345        arch_spin_unlock(&trace_cmdline_lock);
5346        free_saved_cmdlines_buffer(savedcmd_temp);
5347
5348        return 0;
5349}
5350
5351static ssize_t
5352tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5353                                  size_t cnt, loff_t *ppos)
5354{
5355        unsigned long val;
5356        int ret;
5357
5358        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5359        if (ret)
5360                return ret;
5361
5362        /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5363        if (!val || val > PID_MAX_DEFAULT)
5364                return -EINVAL;
5365
5366        ret = tracing_resize_saved_cmdlines((unsigned int)val);
5367        if (ret < 0)
5368                return ret;
5369
5370        *ppos += cnt;
5371
5372        return cnt;
5373}
5374
5375static const struct file_operations tracing_saved_cmdlines_size_fops = {
5376        .open           = tracing_open_generic,
5377        .read           = tracing_saved_cmdlines_size_read,
5378        .write          = tracing_saved_cmdlines_size_write,
5379};
5380
5381#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5382static union trace_eval_map_item *
5383update_eval_map(union trace_eval_map_item *ptr)
5384{
5385        if (!ptr->map.eval_string) {
5386                if (ptr->tail.next) {
5387                        ptr = ptr->tail.next;
5388                        /* Set ptr to the next real item (skip head) */
5389                        ptr++;
5390                } else
5391                        return NULL;
5392        }
5393        return ptr;
5394}
5395
5396static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5397{
5398        union trace_eval_map_item *ptr = v;
5399
5400        /*
5401         * Paranoid! If ptr points to end, we don't want to increment past it.
5402         * This really should never happen.
5403         */
5404        (*pos)++;
5405        ptr = update_eval_map(ptr);
5406        if (WARN_ON_ONCE(!ptr))
5407                return NULL;
5408
5409        ptr++;
5410        ptr = update_eval_map(ptr);
5411
5412        return ptr;
5413}
5414
5415static void *eval_map_start(struct seq_file *m, loff_t *pos)
5416{
5417        union trace_eval_map_item *v;
5418        loff_t l = 0;
5419
5420        mutex_lock(&trace_eval_mutex);
5421
5422        v = trace_eval_maps;
5423        if (v)
5424                v++;
5425
5426        while (v && l < *pos) {
5427                v = eval_map_next(m, v, &l);
5428        }
5429
5430        return v;
5431}
5432
5433static void eval_map_stop(struct seq_file *m, void *v)
5434{
5435        mutex_unlock(&trace_eval_mutex);
5436}
5437
5438static int eval_map_show(struct seq_file *m, void *v)
5439{
5440        union trace_eval_map_item *ptr = v;
5441
5442        seq_printf(m, "%s %ld (%s)\n",
5443                   ptr->map.eval_string, ptr->map.eval_value,
5444                   ptr->map.system);
5445
5446        return 0;
5447}
5448
5449static const struct seq_operations tracing_eval_map_seq_ops = {
5450        .start          = eval_map_start,
5451        .next           = eval_map_next,
5452        .stop           = eval_map_stop,
5453        .show           = eval_map_show,
5454};
5455
5456static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5457{
5458        int ret;
5459
5460        ret = tracing_check_open_get_tr(NULL);
5461        if (ret)
5462                return ret;
5463
5464        return seq_open(filp, &tracing_eval_map_seq_ops);
5465}
5466
5467static const struct file_operations tracing_eval_map_fops = {
5468        .open           = tracing_eval_map_open,
5469        .read           = seq_read,
5470        .llseek         = seq_lseek,
5471        .release        = seq_release,
5472};
5473
5474static inline union trace_eval_map_item *
5475trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5476{
5477        /* Return tail of array given the head */
5478        return ptr + ptr->head.length + 1;
5479}
5480
5481static void
5482trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5483                           int len)
5484{
5485        struct trace_eval_map **stop;
5486        struct trace_eval_map **map;
5487        union trace_eval_map_item *map_array;
5488        union trace_eval_map_item *ptr;
5489
5490        stop = start + len;
5491
5492        /*
5493         * The trace_eval_maps contains the map plus a head and tail item,
5494         * where the head holds the module and length of array, and the
5495         * tail holds a pointer to the next list.
5496         */
5497        map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5498        if (!map_array) {
5499                pr_warn("Unable to allocate trace eval mapping\n");
5500                return;
5501        }
5502
5503        mutex_lock(&trace_eval_mutex);
5504
5505        if (!trace_eval_maps)
5506                trace_eval_maps = map_array;
5507        else {
5508                ptr = trace_eval_maps;
5509                for (;;) {
5510                        ptr = trace_eval_jmp_to_tail(ptr);
5511                        if (!ptr->tail.next)
5512                                break;
5513                        ptr = ptr->tail.next;
5514
5515                }
5516                ptr->tail.next = map_array;
5517        }
5518        map_array->head.mod = mod;
5519        map_array->head.length = len;
5520        map_array++;
5521
5522        for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5523                map_array->map = **map;
5524                map_array++;
5525        }
5526        memset(map_array, 0, sizeof(*map_array));
5527
5528        mutex_unlock(&trace_eval_mutex);
5529}
5530
5531static void trace_create_eval_file(struct dentry *d_tracer)
5532{
5533        trace_create_file("eval_map", 0444, d_tracer,
5534                          NULL, &tracing_eval_map_fops);
5535}
5536
5537#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5538static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5539static inline void trace_insert_eval_map_file(struct module *mod,
5540                              struct trace_eval_map **start, int len) { }
5541#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5542
5543static void trace_insert_eval_map(struct module *mod,
5544                                  struct trace_eval_map **start, int len)
5545{
5546        struct trace_eval_map **map;
5547
5548        if (len <= 0)
5549                return;
5550
5551        map = start;
5552
5553        trace_event_eval_update(map, len);
5554
5555        trace_insert_eval_map_file(mod, start, len);
5556}
5557
5558static ssize_t
5559tracing_set_trace_read(struct file *filp, char __user *ubuf,
5560                       size_t cnt, loff_t *ppos)
5561{
5562        struct trace_array *tr = filp->private_data;
5563        char buf[MAX_TRACER_SIZE+2];
5564        int r;
5565
5566        mutex_lock(&trace_types_lock);
5567        r = sprintf(buf, "%s\n", tr->current_trace->name);
5568        mutex_unlock(&trace_types_lock);
5569
5570        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5571}
5572
5573int tracer_init(struct tracer *t, struct trace_array *tr)
5574{
5575        tracing_reset_online_cpus(&tr->array_buffer);
5576        return t->init(tr);
5577}
5578
5579static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5580{
5581        int cpu;
5582
5583        for_each_tracing_cpu(cpu)
5584                per_cpu_ptr(buf->data, cpu)->entries = val;
5585}
5586
5587#ifdef CONFIG_TRACER_MAX_TRACE
5588/* resize @tr's buffer to the size of @size_tr's entries */
5589static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5590                                        struct array_buffer *size_buf, int cpu_id)
5591{
5592        int cpu, ret = 0;
5593
5594        if (cpu_id == RING_BUFFER_ALL_CPUS) {
5595                for_each_tracing_cpu(cpu) {
5596                        ret = ring_buffer_resize(trace_buf->buffer,
5597                                 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5598                        if (ret < 0)
5599                                break;
5600                        per_cpu_ptr(trace_buf->data, cpu)->entries =
5601                                per_cpu_ptr(size_buf->data, cpu)->entries;
5602                }
5603        } else {
5604                ret = ring_buffer_resize(trace_buf->buffer,
5605                                 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5606                if (ret == 0)
5607                        per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5608                                per_cpu_ptr(size_buf->data, cpu_id)->entries;
5609        }
5610
5611        return ret;
5612}
5613#endif /* CONFIG_TRACER_MAX_TRACE */
5614
5615static int __tracing_resize_ring_buffer(struct trace_array *tr,
5616                                        unsigned long size, int cpu)
5617{
5618        int ret;
5619
5620        /*
5621         * If kernel or user changes the size of the ring buffer
5622         * we use the size that was given, and we can forget about
5623         * expanding it later.
5624         */
5625        ring_buffer_expanded = true;
5626
5627        /* May be called before buffers are initialized */
5628        if (!tr->array_buffer.buffer)
5629                return 0;
5630
5631        ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5632        if (ret < 0)
5633                return ret;
5634
5635#ifdef CONFIG_TRACER_MAX_TRACE
5636        if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5637            !tr->current_trace->use_max_tr)
5638                goto out;
5639
5640        ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5641        if (ret < 0) {
5642                int r = resize_buffer_duplicate_size(&tr->array_buffer,
5643                                                     &tr->array_buffer, cpu);
5644                if (r < 0) {
5645                        /*
5646                         * AARGH! We are left with different
5647                         * size max buffer!!!!
5648                         * The max buffer is our "snapshot" buffer.
5649                         * When a tracer needs a snapshot (one of the
5650                         * latency tracers), it swaps the max buffer
5651                         * with the saved snap shot. We succeeded to
5652                         * update the size of the main buffer, but failed to
5653                         * update the size of the max buffer. But when we tried
5654                         * to reset the main buffer to the original size, we
5655                         * failed there too. This is very unlikely to
5656                         * happen, but if it does, warn and kill all
5657                         * tracing.
5658                         */
5659                        WARN_ON(1);
5660                        tracing_disabled = 1;
5661                }
5662                return ret;
5663        }
5664
5665        if (cpu == RING_BUFFER_ALL_CPUS)
5666                set_buffer_entries(&tr->max_buffer, size);
5667        else
5668                per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5669
5670 out:
5671#endif /* CONFIG_TRACER_MAX_TRACE */
5672
5673        if (cpu == RING_BUFFER_ALL_CPUS)
5674                set_buffer_entries(&tr->array_buffer, size);
5675        else
5676                per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5677
5678        return ret;
5679}
5680
5681ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5682                                  unsigned long size, int cpu_id)
5683{
5684        int ret = size;
5685
5686        mutex_lock(&trace_types_lock);
5687
5688        if (cpu_id != RING_BUFFER_ALL_CPUS) {
5689                /* make sure, this cpu is enabled in the mask */
5690                if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5691                        ret = -EINVAL;
5692                        goto out;
5693                }
5694        }
5695
5696        ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5697        if (ret < 0)
5698                ret = -ENOMEM;
5699
5700out:
5701        mutex_unlock(&trace_types_lock);
5702
5703        return ret;
5704}
5705
5706
5707/**
5708 * tracing_update_buffers - used by tracing facility to expand ring buffers
5709 *
5710 * To save on memory when the tracing is never used on a system with it
5711 * configured in. The ring buffers are set to a minimum size. But once
5712 * a user starts to use the tracing facility, then they need to grow
5713 * to their default size.
5714 *
5715 * This function is to be called when a tracer is about to be used.
5716 */
5717int tracing_update_buffers(void)
5718{
5719        int ret = 0;
5720
5721        mutex_lock(&trace_types_lock);
5722        if (!ring_buffer_expanded)
5723                ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5724                                                RING_BUFFER_ALL_CPUS);
5725        mutex_unlock(&trace_types_lock);
5726
5727        return ret;
5728}
5729
5730struct trace_option_dentry;
5731
5732static void
5733create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5734
5735/*
5736 * Used to clear out the tracer before deletion of an instance.
5737 * Must have trace_types_lock held.
5738 */
5739static void tracing_set_nop(struct trace_array *tr)
5740{
5741        if (tr->current_trace == &nop_trace)
5742                return;
5743        
5744        tr->current_trace->enabled--;
5745
5746        if (tr->current_trace->reset)
5747                tr->current_trace->reset(tr);
5748
5749        tr->current_trace = &nop_trace;
5750}
5751
5752static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5753{
5754        /* Only enable if the directory has been created already. */
5755        if (!tr->dir)
5756                return;
5757
5758        create_trace_option_files(tr, t);
5759}
5760
5761int tracing_set_tracer(struct trace_array *tr, const char *buf)
5762{
5763        struct tracer *t;
5764#ifdef CONFIG_TRACER_MAX_TRACE
5765        bool had_max_tr;
5766#endif
5767        int ret = 0;
5768
5769        mutex_lock(&trace_types_lock);
5770
5771        if (!ring_buffer_expanded) {
5772                ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5773                                                RING_BUFFER_ALL_CPUS);
5774                if (ret < 0)
5775                        goto out;
5776                ret = 0;
5777        }
5778
5779        for (t = trace_types; t; t = t->next) {
5780                if (strcmp(t->name, buf) == 0)
5781                        break;
5782        }
5783        if (!t) {
5784                ret = -EINVAL;
5785                goto out;
5786        }
5787        if (t == tr->current_trace)
5788                goto out;
5789
5790#ifdef CONFIG_TRACER_SNAPSHOT
5791        if (t->use_max_tr) {
5792                arch_spin_lock(&tr->max_lock);
5793                if (tr->cond_snapshot)
5794                        ret = -EBUSY;
5795                arch_spin_unlock(&tr->max_lock);
5796                if (ret)
5797                        goto out;
5798        }
5799#endif
5800        /* Some tracers won't work on kernel command line */
5801        if (system_state < SYSTEM_RUNNING && t->noboot) {
5802                pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5803                        t->name);
5804                goto out;
5805        }
5806
5807        /* Some tracers are only allowed for the top level buffer */
5808        if (!trace_ok_for_array(t, tr)) {
5809                ret = -EINVAL;
5810                goto out;
5811        }
5812
5813        /* If trace pipe files are being read, we can't change the tracer */
5814        if (tr->current_trace->ref) {
5815                ret = -EBUSY;
5816                goto out;
5817        }
5818
5819        trace_branch_disable();
5820
5821        tr->current_trace->enabled--;
5822
5823        if (tr->current_trace->reset)
5824                tr->current_trace->reset(tr);
5825
5826        /* Current trace needs to be nop_trace before synchronize_rcu */
5827        tr->current_trace = &nop_trace;
5828
5829#ifdef CONFIG_TRACER_MAX_TRACE
5830        had_max_tr = tr->allocated_snapshot;
5831
5832        if (had_max_tr && !t->use_max_tr) {
5833                /*
5834                 * We need to make sure that the update_max_tr sees that
5835                 * current_trace changed to nop_trace to keep it from
5836                 * swapping the buffers after we resize it.
5837                 * The update_max_tr is called from interrupts disabled
5838                 * so a synchronized_sched() is sufficient.
5839                 */
5840                synchronize_rcu();
5841                free_snapshot(tr);
5842        }
5843#endif
5844
5845#ifdef CONFIG_TRACER_MAX_TRACE
5846        if (t->use_max_tr && !had_max_tr) {
5847                ret = tracing_alloc_snapshot_instance(tr);
5848                if (ret < 0)
5849                        goto out;
5850        }
5851#endif
5852
5853        if (t->init) {
5854                ret = tracer_init(t, tr);
5855                if (ret)
5856                        goto out;
5857        }
5858
5859        tr->current_trace = t;
5860        tr->current_trace->enabled++;
5861        trace_branch_enable(tr);
5862 out:
5863        mutex_unlock(&trace_types_lock);
5864
5865        return ret;
5866}
5867
5868static ssize_t
5869tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5870                        size_t cnt, loff_t *ppos)
5871{
5872        struct trace_array *tr = filp->private_data;
5873        char buf[MAX_TRACER_SIZE+1];
5874        int i;
5875        size_t ret;
5876        int err;
5877
5878        ret = cnt;
5879
5880        if (cnt > MAX_TRACER_SIZE)
5881                cnt = MAX_TRACER_SIZE;
5882
5883        if (copy_from_user(buf, ubuf, cnt))
5884                return -EFAULT;
5885
5886        buf[cnt] = 0;
5887
5888        /* strip ending whitespace. */
5889        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5890                buf[i] = 0;
5891
5892        err = tracing_set_tracer(tr, buf);
5893        if (err)
5894                return err;
5895
5896        *ppos += ret;
5897
5898        return ret;
5899}
5900
5901static ssize_t
5902tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5903                   size_t cnt, loff_t *ppos)
5904{
5905        char buf[64];
5906        int r;
5907
5908        r = snprintf(buf, sizeof(buf), "%ld\n",
5909                     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5910        if (r > sizeof(buf))
5911                r = sizeof(buf);
5912        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5913}
5914
5915static ssize_t
5916tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5917                    size_t cnt, loff_t *ppos)
5918{
5919        unsigned long val;
5920        int ret;
5921
5922        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5923        if (ret)
5924                return ret;
5925
5926        *ptr = val * 1000;
5927
5928        return cnt;
5929}
5930
5931static ssize_t
5932tracing_thresh_read(struct file *filp, char __user *ubuf,
5933                    size_t cnt, loff_t *ppos)
5934{
5935        return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5936}
5937
5938static ssize_t
5939tracing_thresh_write(struct file *filp, const char __user *ubuf,
5940                     size_t cnt, loff_t *ppos)
5941{
5942        struct trace_array *tr = filp->private_data;
5943        int ret;
5944
5945        mutex_lock(&trace_types_lock);
5946        ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5947        if (ret < 0)
5948                goto out;
5949
5950        if (tr->current_trace->update_thresh) {
5951                ret = tr->current_trace->update_thresh(tr);
5952                if (ret < 0)
5953                        goto out;
5954        }
5955
5956        ret = cnt;
5957out:
5958        mutex_unlock(&trace_types_lock);
5959
5960        return ret;
5961}
5962
5963#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5964
5965static ssize_t
5966tracing_max_lat_read(struct file *filp, char __user *ubuf,
5967                     size_t cnt, loff_t *ppos)
5968{
5969        return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5970}
5971
5972static ssize_t
5973tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5974                      size_t cnt, loff_t *ppos)
5975{
5976        return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5977}
5978
5979#endif
5980
5981static int tracing_open_pipe(struct inode *inode, struct file *filp)
5982{
5983        struct trace_array *tr = inode->i_private;
5984        struct trace_iterator *iter;
5985        int ret;
5986
5987        ret = tracing_check_open_get_tr(tr);
5988        if (ret)
5989                return ret;
5990
5991        mutex_lock(&trace_types_lock);
5992
5993        /* create a buffer to store the information to pass to userspace */
5994        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5995        if (!iter) {
5996                ret = -ENOMEM;
5997                __trace_array_put(tr);
5998                goto out;
5999        }
6000
6001        trace_seq_init(&iter->seq);
6002        iter->trace = tr->current_trace;
6003
6004        if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6005                ret = -ENOMEM;
6006                goto fail;
6007        }
6008
6009        /* trace pipe does not show start of buffer */
6010        cpumask_setall(iter->started);
6011
6012        if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6013                iter->iter_flags |= TRACE_FILE_LAT_FMT;
6014
6015        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6016        if (trace_clocks[tr->clock_id].in_ns)
6017                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6018
6019        iter->tr = tr;
6020        iter->array_buffer = &tr->array_buffer;
6021        iter->cpu_file = tracing_get_cpu(inode);
6022        mutex_init(&iter->mutex);
6023        filp->private_data = iter;
6024
6025        if (iter->trace->pipe_open)
6026                iter->trace->pipe_open(iter);
6027
6028        nonseekable_open(inode, filp);
6029
6030        tr->current_trace->ref++;
6031out:
6032        mutex_unlock(&trace_types_lock);
6033        return ret;
6034
6035fail:
6036        kfree(iter);
6037        __trace_array_put(tr);
6038        mutex_unlock(&trace_types_lock);
6039        return ret;
6040}
6041
6042static int tracing_release_pipe(struct inode *inode, struct file *file)
6043{
6044        struct trace_iterator *iter = file->private_data;
6045        struct trace_array *tr = inode->i_private;
6046
6047        mutex_lock(&trace_types_lock);
6048
6049        tr->current_trace->ref--;
6050
6051        if (iter->trace->pipe_close)
6052                iter->trace->pipe_close(iter);
6053
6054        mutex_unlock(&trace_types_lock);
6055
6056        free_cpumask_var(iter->started);
6057        mutex_destroy(&iter->mutex);
6058        kfree(iter);
6059
6060        trace_array_put(tr);
6061
6062        return 0;
6063}
6064
6065static __poll_t
6066trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6067{
6068        struct trace_array *tr = iter->tr;
6069
6070        /* Iterators are static, they should be filled or empty */
6071        if (trace_buffer_iter(iter, iter->cpu_file))
6072                return EPOLLIN | EPOLLRDNORM;
6073
6074        if (tr->trace_flags & TRACE_ITER_BLOCK)
6075                /*
6076                 * Always select as readable when in blocking mode
6077                 */
6078                return EPOLLIN | EPOLLRDNORM;
6079        else
6080                return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6081                                             filp, poll_table);
6082}
6083
6084static __poll_t
6085tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6086{
6087        struct trace_iterator *iter = filp->private_data;
6088
6089        return trace_poll(iter, filp, poll_table);
6090}
6091
6092/* Must be called with iter->mutex held. */
6093static int tracing_wait_pipe(struct file *filp)
6094{
6095        struct trace_iterator *iter = filp->private_data;
6096        int ret;
6097
6098        while (trace_empty(iter)) {
6099
6100                if ((filp->f_flags & O_NONBLOCK)) {
6101                        return -EAGAIN;
6102                }
6103
6104                /*
6105                 * We block until we read something and tracing is disabled.
6106                 * We still block if tracing is disabled, but we have never
6107                 * read anything. This allows a user to cat this file, and
6108                 * then enable tracing. But after we have read something,
6109                 * we give an EOF when tracing is again disabled.
6110                 *
6111                 * iter->pos will be 0 if we haven't read anything.
6112                 */
6113                if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6114                        break;
6115
6116                mutex_unlock(&iter->mutex);
6117
6118                ret = wait_on_pipe(iter, 0);
6119
6120                mutex_lock(&iter->mutex);
6121
6122                if (ret)
6123                        return ret;
6124        }
6125
6126        return 1;
6127}
6128
6129/*
6130 * Consumer reader.
6131 */
6132static ssize_t
6133tracing_read_pipe(struct file *filp, char __user *ubuf,
6134                  size_t cnt, loff_t *ppos)
6135{
6136        struct trace_iterator *iter = filp->private_data;
6137        ssize_t sret;
6138
6139        /*
6140         * Avoid more than one consumer on a single file descriptor
6141         * This is just a matter of traces coherency, the ring buffer itself
6142         * is protected.
6143         */
6144        mutex_lock(&iter->mutex);
6145
6146        /* return any leftover data */
6147        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6148        if (sret != -EBUSY)
6149                goto out;
6150
6151        trace_seq_init(&iter->seq);
6152
6153        if (iter->trace->read) {
6154                sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6155                if (sret)
6156                        goto out;
6157        }
6158
6159waitagain:
6160        sret = tracing_wait_pipe(filp);
6161        if (sret <= 0)
6162                goto out;
6163
6164        /* stop when tracing is finished */
6165        if (trace_empty(iter)) {
6166                sret = 0;
6167                goto out;
6168        }
6169
6170        if (cnt >= PAGE_SIZE)
6171                cnt = PAGE_SIZE - 1;
6172
6173        /* reset all but tr, trace, and overruns */
6174        memset(&iter->seq, 0,
6175               sizeof(struct trace_iterator) -
6176               offsetof(struct trace_iterator, seq));
6177        cpumask_clear(iter->started);
6178        trace_seq_init(&iter->seq);
6179        iter->pos = -1;
6180
6181        trace_event_read_lock();
6182        trace_access_lock(iter->cpu_file);
6183        while (trace_find_next_entry_inc(iter) != NULL) {
6184                enum print_line_t ret;
6185                int save_len = iter->seq.seq.len;
6186
6187                ret = print_trace_line(iter);
6188                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6189                        /* don't print partial lines */
6190                        iter->seq.seq.len = save_len;
6191                        break;
6192                }
6193                if (ret != TRACE_TYPE_NO_CONSUME)
6194                        trace_consume(iter);
6195
6196                if (trace_seq_used(&iter->seq) >= cnt)
6197                        break;
6198
6199                /*
6200                 * Setting the full flag means we reached the trace_seq buffer
6201                 * size and we should leave by partial output condition above.
6202                 * One of the trace_seq_* functions is not used properly.
6203                 */
6204                WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6205                          iter->ent->type);
6206        }
6207        trace_access_unlock(iter->cpu_file);
6208        trace_event_read_unlock();
6209
6210        /* Now copy what we have to the user */
6211        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6212        if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6213                trace_seq_init(&iter->seq);
6214
6215        /*
6216         * If there was nothing to send to user, in spite of consuming trace
6217         * entries, go back to wait for more entries.
6218         */
6219        if (sret == -EBUSY)
6220                goto waitagain;
6221
6222out:
6223        mutex_unlock(&iter->mutex);
6224
6225        return sret;
6226}
6227
6228static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6229                                     unsigned int idx)
6230{
6231        __free_page(spd->pages[idx]);
6232}
6233
6234static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6235        .confirm                = generic_pipe_buf_confirm,
6236        .release                = generic_pipe_buf_release,
6237        .steal                  = generic_pipe_buf_steal,
6238        .get                    = generic_pipe_buf_get,
6239};
6240
6241static size_t
6242tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6243{
6244        size_t count;
6245        int save_len;
6246        int ret;
6247
6248        /* Seq buffer is page-sized, exactly what we need. */
6249        for (;;) {
6250                save_len = iter->seq.seq.len;
6251                ret = print_trace_line(iter);
6252
6253                if (trace_seq_has_overflowed(&iter->seq)) {
6254                        iter->seq.seq.len = save_len;
6255                        break;
6256                }
6257
6258                /*
6259                 * This should not be hit, because it should only
6260                 * be set if the iter->seq overflowed. But check it
6261                 * anyway to be safe.
6262                 */
6263                if (ret == TRACE_TYPE_PARTIAL_LINE) {
6264                        iter->seq.seq.len = save_len;
6265                        break;
6266                }
6267
6268                count = trace_seq_used(&iter->seq) - save_len;
6269                if (rem < count) {
6270                        rem = 0;
6271                        iter->seq.seq.len = save_len;
6272                        break;
6273                }
6274
6275                if (ret != TRACE_TYPE_NO_CONSUME)
6276                        trace_consume(iter);
6277                rem -= count;
6278                if (!trace_find_next_entry_inc(iter))   {
6279                        rem = 0;
6280                        iter->ent = NULL;
6281                        break;
6282                }
6283        }
6284
6285        return rem;
6286}
6287
6288static ssize_t tracing_splice_read_pipe(struct file *filp,
6289                                        loff_t *ppos,
6290                                        struct pipe_inode_info *pipe,
6291                                        size_t len,
6292                                        unsigned int flags)
6293{
6294        struct page *pages_def[PIPE_DEF_BUFFERS];
6295        struct partial_page partial_def[PIPE_DEF_BUFFERS];
6296        struct trace_iterator *iter = filp->private_data;
6297        struct splice_pipe_desc spd = {
6298                .pages          = pages_def,
6299                .partial        = partial_def,
6300                .nr_pages       = 0, /* This gets updated below. */
6301                .nr_pages_max   = PIPE_DEF_BUFFERS,
6302                .ops            = &tracing_pipe_buf_ops,
6303                .spd_release    = tracing_spd_release_pipe,
6304        };
6305        ssize_t ret;
6306        size_t rem;
6307        unsigned int i;
6308
6309        if (splice_grow_spd(pipe, &spd))
6310                return -ENOMEM;
6311
6312        mutex_lock(&iter->mutex);
6313
6314        if (iter->trace->splice_read) {
6315                ret = iter->trace->splice_read(iter, filp,
6316                                               ppos, pipe, len, flags);
6317                if (ret)
6318                        goto out_err;
6319        }
6320
6321        ret = tracing_wait_pipe(filp);
6322        if (ret <= 0)
6323                goto out_err;
6324
6325        if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6326                ret = -EFAULT;
6327                goto out_err;
6328        }
6329
6330        trace_event_read_lock();
6331        trace_access_lock(iter->cpu_file);
6332
6333        /* Fill as many pages as possible. */
6334        for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6335                spd.pages[i] = alloc_page(GFP_KERNEL);
6336                if (!spd.pages[i])
6337                        break;
6338
6339                rem = tracing_fill_pipe_page(rem, iter);
6340
6341                /* Copy the data into the page, so we can start over. */
6342                ret = trace_seq_to_buffer(&iter->seq,
6343                                          page_address(spd.pages[i]),
6344                                          trace_seq_used(&iter->seq));
6345                if (ret < 0) {
6346                        __free_page(spd.pages[i]);
6347                        break;
6348                }
6349                spd.partial[i].offset = 0;
6350                spd.partial[i].len = trace_seq_used(&iter->seq);
6351
6352                trace_seq_init(&iter->seq);
6353        }
6354
6355        trace_access_unlock(iter->cpu_file);
6356        trace_event_read_unlock();
6357        mutex_unlock(&iter->mutex);
6358
6359        spd.nr_pages = i;
6360
6361        if (i)
6362                ret = splice_to_pipe(pipe, &spd);
6363        else
6364                ret = 0;
6365out:
6366        splice_shrink_spd(&spd);
6367        return ret;
6368
6369out_err:
6370        mutex_unlock(&iter->mutex);
6371        goto out;
6372}
6373
6374static ssize_t
6375tracing_entries_read(struct file *filp, char __user *ubuf,
6376                     size_t cnt, loff_t *ppos)
6377{
6378        struct inode *inode = file_inode(filp);
6379        struct trace_array *tr = inode->i_private;
6380        int cpu = tracing_get_cpu(inode);
6381        char buf[64];
6382        int r = 0;
6383        ssize_t ret;
6384
6385        mutex_lock(&trace_types_lock);
6386
6387        if (cpu == RING_BUFFER_ALL_CPUS) {
6388                int cpu, buf_size_same;
6389                unsigned long size;
6390
6391                size = 0;
6392                buf_size_same = 1;
6393                /* check if all cpu sizes are same */
6394                for_each_tracing_cpu(cpu) {
6395                        /* fill in the size from first enabled cpu */
6396                        if (size == 0)
6397                                size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6398                        if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6399                                buf_size_same = 0;
6400                                break;
6401                        }
6402                }
6403
6404                if (buf_size_same) {
6405                        if (!ring_buffer_expanded)
6406                                r = sprintf(buf, "%lu (expanded: %lu)\n",
6407                                            size >> 10,
6408                                            trace_buf_size >> 10);
6409                        else
6410                                r = sprintf(buf, "%lu\n", size >> 10);
6411                } else
6412                        r = sprintf(buf, "X\n");
6413        } else
6414                r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6415
6416        mutex_unlock(&trace_types_lock);
6417
6418        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6419        return ret;
6420}
6421
6422static ssize_t
6423tracing_entries_write(struct file *filp, const char __user *ubuf,
6424                      size_t cnt, loff_t *ppos)
6425{
6426        struct inode *inode = file_inode(filp);
6427        struct trace_array *tr = inode->i_private;
6428        unsigned long val;
6429        int ret;
6430
6431        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6432        if (ret)
6433                return ret;
6434
6435        /* must have at least 1 entry */
6436        if (!val)
6437                return -EINVAL;
6438
6439        /* value is in KB */
6440        val <<= 10;
6441        ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6442        if (ret < 0)
6443                return ret;
6444
6445        *ppos += cnt;
6446
6447        return cnt;
6448}
6449
6450static ssize_t
6451tracing_total_entries_read(struct file *filp, char __user *ubuf,
6452                                size_t cnt, loff_t *ppos)
6453{
6454        struct trace_array *tr = filp->private_data;
6455        char buf[64];
6456        int r, cpu;
6457        unsigned long size = 0, expanded_size = 0;
6458
6459        mutex_lock(&trace_types_lock);
6460        for_each_tracing_cpu(cpu) {
6461                size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6462                if (!ring_buffer_expanded)
6463                        expanded_size += trace_buf_size >> 10;
6464        }
6465        if (ring_buffer_expanded)
6466                r = sprintf(buf, "%lu\n", size);
6467        else
6468                r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6469        mutex_unlock(&trace_types_lock);
6470
6471        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6472}
6473
6474static ssize_t
6475tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6476                          size_t cnt, loff_t *ppos)
6477{
6478        /*
6479         * There is no need to read what the user has written, this function
6480         * is just to make sure that there is no error when "echo" is used
6481         */
6482
6483        *ppos += cnt;
6484
6485        return cnt;
6486}
6487
6488static int
6489tracing_free_buffer_release(struct inode *inode, struct file *filp)
6490{
6491        struct trace_array *tr = inode->i_private;
6492
6493        /* disable tracing ? */
6494        if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6495                tracer_tracing_off(tr);
6496        /* resize the ring buffer to 0 */
6497        tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6498
6499        trace_array_put(tr);
6500
6501        return 0;
6502}
6503
6504static ssize_t
6505tracing_mark_write(struct file *filp, const char __user *ubuf,
6506                                        size_t cnt, loff_t *fpos)
6507{
6508        struct trace_array *tr = filp->private_data;
6509        struct ring_buffer_event *event;
6510        enum event_trigger_type tt = ETT_NONE;
6511        struct trace_buffer *buffer;
6512        struct print_entry *entry;
6513        unsigned long irq_flags;
6514        ssize_t written;
6515        int size;
6516        int len;
6517
6518/* Used in tracing_mark_raw_write() as well */
6519#define FAULTED_STR "<faulted>"
6520#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6521
6522        if (tracing_disabled)
6523                return -EINVAL;
6524
6525        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6526                return -EINVAL;
6527
6528        if (cnt > TRACE_BUF_SIZE)
6529                cnt = TRACE_BUF_SIZE;
6530
6531        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6532
6533        local_save_flags(irq_flags);
6534        size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6535
6536        /* If less than "<faulted>", then make sure we can still add that */
6537        if (cnt < FAULTED_SIZE)
6538                size += FAULTED_SIZE - cnt;
6539
6540        buffer = tr->array_buffer.buffer;
6541        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6542                                            irq_flags, preempt_count());
6543        if (unlikely(!event))
6544                /* Ring buffer disabled, return as if not open for write */
6545                return -EBADF;
6546
6547        entry = ring_buffer_event_data(event);
6548        entry->ip = _THIS_IP_;
6549
6550        len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6551        if (len) {
6552                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6553                cnt = FAULTED_SIZE;
6554                written = -EFAULT;
6555        } else
6556                written = cnt;
6557        len = cnt;
6558
6559        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6560                /* do not add \n before testing triggers, but add \0 */
6561                entry->buf[cnt] = '\0';
6562                tt = event_triggers_call(tr->trace_marker_file, entry, event);
6563        }
6564
6565        if (entry->buf[cnt - 1] != '\n') {
6566                entry->buf[cnt] = '\n';
6567                entry->buf[cnt + 1] = '\0';
6568        } else
6569                entry->buf[cnt] = '\0';
6570
6571        __buffer_unlock_commit(buffer, event);
6572
6573        if (tt)
6574                event_triggers_post_call(tr->trace_marker_file, tt);
6575
6576        if (written > 0)
6577                *fpos += written;
6578
6579        return written;
6580}
6581
6582/* Limit it for now to 3K (including tag) */
6583#define RAW_DATA_MAX_SIZE (1024*3)
6584
6585static ssize_t
6586tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6587                                        size_t cnt, loff_t *fpos)
6588{
6589        struct trace_array *tr = filp->private_data;
6590        struct ring_buffer_event *event;
6591        struct trace_buffer *buffer;
6592        struct raw_data_entry *entry;
6593        unsigned long irq_flags;
6594        ssize_t written;
6595        int size;
6596        int len;
6597
6598#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6599
6600        if (tracing_disabled)
6601                return -EINVAL;
6602
6603        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6604                return -EINVAL;
6605
6606        /* The marker must at least have a tag id */
6607        if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6608                return -EINVAL;
6609
6610        if (cnt > TRACE_BUF_SIZE)
6611                cnt = TRACE_BUF_SIZE;
6612
6613        BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6614
6615        local_save_flags(irq_flags);
6616        size = sizeof(*entry) + cnt;
6617        if (cnt < FAULT_SIZE_ID)
6618                size += FAULT_SIZE_ID - cnt;
6619
6620        buffer = tr->array_buffer.buffer;
6621        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6622                                            irq_flags, preempt_count());
6623        if (!event)
6624                /* Ring buffer disabled, return as if not open for write */
6625                return -EBADF;
6626
6627        entry = ring_buffer_event_data(event);
6628
6629        len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6630        if (len) {
6631                entry->id = -1;
6632                memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6633                written = -EFAULT;
6634        } else
6635                written = cnt;
6636
6637        __buffer_unlock_commit(buffer, event);
6638
6639        if (written > 0)
6640                *fpos += written;
6641
6642        return written;
6643}
6644
6645static int tracing_clock_show(struct seq_file *m, void *v)
6646{
6647        struct trace_array *tr = m->private;
6648        int i;
6649
6650        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6651                seq_printf(m,
6652                        "%s%s%s%s", i ? " " : "",
6653                        i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6654                        i == tr->clock_id ? "]" : "");
6655        seq_putc(m, '\n');
6656
6657        return 0;
6658}
6659
6660int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6661{
6662        int i;
6663
6664        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6665                if (strcmp(trace_clocks[i].name, clockstr) == 0)
6666                        break;
6667        }
6668        if (i == ARRAY_SIZE(trace_clocks))
6669                return -EINVAL;
6670
6671        mutex_lock(&trace_types_lock);
6672
6673        tr->clock_id = i;
6674
6675        ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6676
6677        /*
6678         * New clock may not be consistent with the previous clock.
6679         * Reset the buffer so that it doesn't have incomparable timestamps.
6680         */
6681        tracing_reset_online_cpus(&tr->array_buffer);
6682
6683#ifdef CONFIG_TRACER_MAX_TRACE
6684        if (tr->max_buffer.buffer)
6685                ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6686        tracing_reset_online_cpus(&tr->max_buffer);
6687#endif
6688
6689        mutex_unlock(&trace_types_lock);
6690
6691        return 0;
6692}
6693
6694static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6695                                   size_t cnt, loff_t *fpos)
6696{
6697        struct seq_file *m = filp->private_data;
6698        struct trace_array *tr = m->private;
6699        char buf[64];
6700        const char *clockstr;
6701        int ret;
6702
6703        if (cnt >= sizeof(buf))
6704                return -EINVAL;
6705
6706        if (copy_from_user(buf, ubuf, cnt))
6707                return -EFAULT;
6708
6709        buf[cnt] = 0;
6710
6711        clockstr = strstrip(buf);
6712
6713        ret = tracing_set_clock(tr, clockstr);
6714        if (ret)
6715                return ret;
6716
6717        *fpos += cnt;
6718
6719        return cnt;
6720}
6721
6722static int tracing_clock_open(struct inode *inode, struct file *file)
6723{
6724        struct trace_array *tr = inode->i_private;
6725        int ret;
6726
6727        ret = tracing_check_open_get_tr(tr);
6728        if (ret)
6729                return ret;
6730
6731        ret = single_open(file, tracing_clock_show, inode->i_private);
6732        if (ret < 0)
6733                trace_array_put(tr);
6734
6735        return ret;
6736}
6737
6738static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6739{
6740        struct trace_array *tr = m->private;
6741
6742        mutex_lock(&trace_types_lock);
6743
6744        if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6745                seq_puts(m, "delta [absolute]\n");
6746        else
6747                seq_puts(m, "[delta] absolute\n");
6748
6749        mutex_unlock(&trace_types_lock);
6750
6751        return 0;
6752}
6753
6754static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6755{
6756        struct trace_array *tr = inode->i_private;
6757        int ret;
6758
6759        ret = tracing_check_open_get_tr(tr);
6760        if (ret)
6761                return ret;
6762
6763        ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6764        if (ret < 0)
6765                trace_array_put(tr);
6766
6767        return ret;
6768}
6769
6770int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6771{
6772        int ret = 0;
6773
6774        mutex_lock(&trace_types_lock);
6775
6776        if (abs && tr->time_stamp_abs_ref++)
6777                goto out;
6778
6779        if (!abs) {
6780                if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6781                        ret = -EINVAL;
6782                        goto out;
6783                }
6784
6785                if (--tr->time_stamp_abs_ref)
6786                        goto out;
6787        }
6788
6789        ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6790
6791#ifdef CONFIG_TRACER_MAX_TRACE
6792        if (tr->max_buffer.buffer)
6793                ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6794#endif
6795 out:
6796        mutex_unlock(&trace_types_lock);
6797
6798        return ret;
6799}
6800
6801struct ftrace_buffer_info {
6802        struct trace_iterator   iter;
6803        void                    *spare;
6804        unsigned int            spare_cpu;
6805        unsigned int            read;
6806};
6807
6808#ifdef CONFIG_TRACER_SNAPSHOT
6809static int tracing_snapshot_open(struct inode *inode, struct file *file)
6810{
6811        struct trace_array *tr = inode->i_private;
6812        struct trace_iterator *iter;
6813        struct seq_file *m;
6814        int ret;
6815
6816        ret = tracing_check_open_get_tr(tr);
6817        if (ret)
6818                return ret;
6819
6820        if (file->f_mode & FMODE_READ) {
6821                iter = __tracing_open(inode, file, true);
6822                if (IS_ERR(iter))
6823                        ret = PTR_ERR(iter);
6824        } else {
6825                /* Writes still need the seq_file to hold the private data */
6826                ret = -ENOMEM;
6827                m = kzalloc(sizeof(*m), GFP_KERNEL);
6828                if (!m)
6829                        goto out;
6830                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6831                if (!iter) {
6832                        kfree(m);
6833                        goto out;
6834                }
6835                ret = 0;
6836
6837                iter->tr = tr;
6838                iter->array_buffer = &tr->max_buffer;
6839                iter->cpu_file = tracing_get_cpu(inode);
6840                m->private = iter;
6841                file->private_data = m;
6842        }
6843out:
6844        if (ret < 0)
6845                trace_array_put(tr);
6846
6847        return ret;
6848}
6849
6850static ssize_t
6851tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6852                       loff_t *ppos)
6853{
6854        struct seq_file *m = filp->private_data;
6855        struct trace_iterator *iter = m->private;
6856        struct trace_array *tr = iter->tr;
6857        unsigned long val;
6858        int ret;
6859
6860        ret = tracing_update_buffers();
6861        if (ret < 0)
6862                return ret;
6863
6864        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6865        if (ret)
6866                return ret;
6867
6868        mutex_lock(&trace_types_lock);
6869
6870        if (tr->current_trace->use_max_tr) {
6871                ret = -EBUSY;
6872                goto out;
6873        }
6874
6875        arch_spin_lock(&tr->max_lock);
6876        if (tr->cond_snapshot)
6877                ret = -EBUSY;
6878        arch_spin_unlock(&tr->max_lock);
6879        if (ret)
6880                goto out;
6881
6882        switch (val) {
6883        case 0:
6884                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6885                        ret = -EINVAL;
6886                        break;
6887                }
6888                if (tr->allocated_snapshot)
6889                        free_snapshot(tr);
6890                break;
6891        case 1:
6892/* Only allow per-cpu swap if the ring buffer supports it */
6893#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6894                if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6895                        ret = -EINVAL;
6896                        break;
6897                }
6898#endif
6899                if (tr->allocated_snapshot)
6900                        ret = resize_buffer_duplicate_size(&tr->max_buffer,
6901                                        &tr->array_buffer, iter->cpu_file);
6902                else
6903                        ret = tracing_alloc_snapshot_instance(tr);
6904                if (ret < 0)
6905                        break;
6906                local_irq_disable();
6907                /* Now, we're going to swap */
6908                if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6909                        update_max_tr(tr, current, smp_processor_id(), NULL);
6910                else
6911                        update_max_tr_single(tr, current, iter->cpu_file);
6912                local_irq_enable();
6913                break;
6914        default:
6915                if (tr->allocated_snapshot) {
6916                        if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6917                                tracing_reset_online_cpus(&tr->max_buffer);
6918                        else
6919                                tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6920                }
6921                break;
6922        }
6923
6924        if (ret >= 0) {
6925                *ppos += cnt;
6926                ret = cnt;
6927        }
6928out:
6929        mutex_unlock(&trace_types_lock);
6930        return ret;
6931}
6932
6933static int tracing_snapshot_release(struct inode *inode, struct file *file)
6934{
6935        struct seq_file *m = file->private_data;
6936        int ret;
6937
6938        ret = tracing_release(inode, file);
6939
6940        if (file->f_mode & FMODE_READ)
6941                return ret;
6942
6943        /* If write only, the seq_file is just a stub */
6944        if (m)
6945                kfree(m->private);
6946        kfree(m);
6947
6948        return 0;
6949}
6950
6951static int tracing_buffers_open(struct inode *inode, struct file *filp);
6952static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6953                                    size_t count, loff_t *ppos);
6954static int tracing_buffers_release(struct inode *inode, struct file *file);
6955static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6956                   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6957
6958static int snapshot_raw_open(struct inode *inode, struct file *filp)
6959{
6960        struct ftrace_buffer_info *info;
6961        int ret;
6962
6963        /* The following checks for tracefs lockdown */
6964        ret = tracing_buffers_open(inode, filp);
6965        if (ret < 0)
6966                return ret;
6967
6968        info = filp->private_data;
6969
6970        if (info->iter.trace->use_max_tr) {
6971                tracing_buffers_release(inode, filp);
6972                return -EBUSY;
6973        }
6974
6975        info->iter.snapshot = true;
6976        info->iter.array_buffer = &info->iter.tr->max_buffer;
6977
6978        return ret;
6979}
6980
6981#endif /* CONFIG_TRACER_SNAPSHOT */
6982
6983
6984static const struct file_operations tracing_thresh_fops = {
6985        .open           = tracing_open_generic,
6986        .read           = tracing_thresh_read,
6987        .write          = tracing_thresh_write,
6988        .llseek         = generic_file_llseek,
6989};
6990
6991#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6992static const struct file_operations tracing_max_lat_fops = {
6993        .open           = tracing_open_generic,
6994        .read           = tracing_max_lat_read,
6995        .write          = tracing_max_lat_write,
6996        .llseek         = generic_file_llseek,
6997};
6998#endif
6999
7000static const struct file_operations set_tracer_fops = {
7001        .open           = tracing_open_generic,
7002        .read           = tracing_set_trace_read,
7003        .write          = tracing_set_trace_write,
7004        .llseek         = generic_file_llseek,
7005};
7006
7007static const struct file_operations tracing_pipe_fops = {
7008        .open           = tracing_open_pipe,
7009        .poll           = tracing_poll_pipe,
7010        .read           = tracing_read_pipe,
7011        .splice_read    = tracing_splice_read_pipe,
7012        .release        = tracing_release_pipe,
7013        .llseek         = no_llseek,
7014};
7015
7016static const struct file_operations tracing_entries_fops = {
7017        .open           = tracing_open_generic_tr,
7018        .read           = tracing_entries_read,
7019        .write          = tracing_entries_write,
7020        .llseek         = generic_file_llseek,
7021        .release        = tracing_release_generic_tr,
7022};
7023
7024static const struct file_operations tracing_total_entries_fops = {
7025        .open           = tracing_open_generic_tr,
7026        .read           = tracing_total_entries_read,
7027        .llseek         = generic_file_llseek,
7028        .release        = tracing_release_generic_tr,
7029};
7030
7031static const struct file_operations tracing_free_buffer_fops = {
7032        .open           = tracing_open_generic_tr,
7033        .write          = tracing_free_buffer_write,
7034        .release        = tracing_free_buffer_release,
7035};
7036
7037static const struct file_operations tracing_mark_fops = {
7038        .open           = tracing_open_generic_tr,
7039        .write          = tracing_mark_write,
7040        .llseek         = generic_file_llseek,
7041        .release        = tracing_release_generic_tr,
7042};
7043
7044static const struct file_operations tracing_mark_raw_fops = {
7045        .open           = tracing_open_generic_tr,
7046        .write          = tracing_mark_raw_write,
7047        .llseek         = generic_file_llseek,
7048        .release        = tracing_release_generic_tr,
7049};
7050
7051static const struct file_operations trace_clock_fops = {
7052        .open           = tracing_clock_open,
7053        .read           = seq_read,
7054        .llseek         = seq_lseek,
7055        .release        = tracing_single_release_tr,
7056        .write          = tracing_clock_write,
7057};
7058
7059static const struct file_operations trace_time_stamp_mode_fops = {
7060        .open           = tracing_time_stamp_mode_open,
7061        .read           = seq_read,
7062        .llseek         = seq_lseek,
7063        .release        = tracing_single_release_tr,
7064};
7065
7066#ifdef CONFIG_TRACER_SNAPSHOT
7067static const struct file_operations snapshot_fops = {
7068        .open           = tracing_snapshot_open,
7069        .read           = seq_read,
7070        .write          = tracing_snapshot_write,
7071        .llseek         = tracing_lseek,
7072        .release        = tracing_snapshot_release,
7073};
7074
7075static const struct file_operations snapshot_raw_fops = {
7076        .open           = snapshot_raw_open,
7077        .read           = tracing_buffers_read,
7078        .release        = tracing_buffers_release,
7079        .splice_read    = tracing_buffers_splice_read,
7080        .llseek         = no_llseek,
7081};
7082
7083#endif /* CONFIG_TRACER_SNAPSHOT */
7084
7085#define TRACING_LOG_ERRS_MAX    8
7086#define TRACING_LOG_LOC_MAX     128
7087
7088#define CMD_PREFIX "  Command: "
7089
7090struct err_info {
7091        const char      **errs; /* ptr to loc-specific array of err strings */
7092        u8              type;   /* index into errs -> specific err string */
7093        u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7094        u64             ts;
7095};
7096
7097struct tracing_log_err {
7098        struct list_head        list;
7099        struct err_info         info;
7100        char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7101        char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7102};
7103
7104static DEFINE_MUTEX(tracing_err_log_lock);
7105
7106static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7107{
7108        struct tracing_log_err *err;
7109
7110        if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7111                err = kzalloc(sizeof(*err), GFP_KERNEL);
7112                if (!err)
7113                        err = ERR_PTR(-ENOMEM);
7114                tr->n_err_log_entries++;
7115
7116                return err;
7117        }
7118
7119        err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7120        list_del(&err->list);
7121
7122        return err;
7123}
7124
7125/**
7126 * err_pos - find the position of a string within a command for error careting
7127 * @cmd: The tracing command that caused the error
7128 * @str: The string to position the caret at within @cmd
7129 *
7130 * Finds the position of the first occurence of @str within @cmd.  The
7131 * return value can be passed to tracing_log_err() for caret placement
7132 * within @cmd.
7133 *
7134 * Returns the index within @cmd of the first occurence of @str or 0
7135 * if @str was not found.
7136 */
7137unsigned int err_pos(char *cmd, const char *str)
7138{
7139        char *found;
7140
7141        if (WARN_ON(!strlen(cmd)))
7142                return 0;
7143
7144        found = strstr(cmd, str);
7145        if (found)
7146                return found - cmd;
7147
7148        return 0;
7149}
7150
7151/**
7152 * tracing_log_err - write an error to the tracing error log
7153 * @tr: The associated trace array for the error (NULL for top level array)
7154 * @loc: A string describing where the error occurred
7155 * @cmd: The tracing command that caused the error
7156 * @errs: The array of loc-specific static error strings
7157 * @type: The index into errs[], which produces the specific static err string
7158 * @pos: The position the caret should be placed in the cmd
7159 *
7160 * Writes an error into tracing/error_log of the form:
7161 *
7162 * <loc>: error: <text>
7163 *   Command: <cmd>
7164 *              ^
7165 *
7166 * tracing/error_log is a small log file containing the last
7167 * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7168 * unless there has been a tracing error, and the error log can be
7169 * cleared and have its memory freed by writing the empty string in
7170 * truncation mode to it i.e. echo > tracing/error_log.
7171 *
7172 * NOTE: the @errs array along with the @type param are used to
7173 * produce a static error string - this string is not copied and saved
7174 * when the error is logged - only a pointer to it is saved.  See
7175 * existing callers for examples of how static strings are typically
7176 * defined for use with tracing_log_err().
7177 */
7178void tracing_log_err(struct trace_array *tr,
7179                     const char *loc, const char *cmd,
7180                     const char **errs, u8 type, u8 pos)
7181{
7182        struct tracing_log_err *err;
7183
7184        if (!tr)
7185                tr = &global_trace;
7186
7187        mutex_lock(&tracing_err_log_lock);
7188        err = get_tracing_log_err(tr);
7189        if (PTR_ERR(err) == -ENOMEM) {
7190                mutex_unlock(&tracing_err_log_lock);
7191                return;
7192        }
7193
7194        snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7195        snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7196
7197        err->info.errs = errs;
7198        err->info.type = type;
7199        err->info.pos = pos;
7200        err->info.ts = local_clock();
7201
7202        list_add_tail(&err->list, &tr->err_log);
7203        mutex_unlock(&tracing_err_log_lock);
7204}
7205
7206static void clear_tracing_err_log(struct trace_array *tr)
7207{
7208        struct tracing_log_err *err, *next;
7209
7210        mutex_lock(&tracing_err_log_lock);
7211        list_for_each_entry_safe(err, next, &tr->err_log, list) {
7212                list_del(&err->list);
7213                kfree(err);
7214        }
7215
7216        tr->n_err_log_entries = 0;
7217        mutex_unlock(&tracing_err_log_lock);
7218}
7219
7220static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7221{
7222        struct trace_array *tr = m->private;
7223
7224        mutex_lock(&tracing_err_log_lock);
7225
7226        return seq_list_start(&tr->err_log, *pos);
7227}
7228
7229static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7230{
7231        struct trace_array *tr = m->private;
7232
7233        return seq_list_next(v, &tr->err_log, pos);
7234}
7235
7236static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7237{
7238        mutex_unlock(&tracing_err_log_lock);
7239}
7240
7241static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7242{
7243        u8 i;
7244
7245        for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7246                seq_putc(m, ' ');
7247        for (i = 0; i < pos; i++)
7248                seq_putc(m, ' ');
7249        seq_puts(m, "^\n");
7250}
7251
7252static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7253{
7254        struct tracing_log_err *err = v;
7255
7256        if (err) {
7257                const char *err_text = err->info.errs[err->info.type];
7258                u64 sec = err->info.ts;
7259                u32 nsec;
7260
7261                nsec = do_div(sec, NSEC_PER_SEC);
7262                seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7263                           err->loc, err_text);
7264                seq_printf(m, "%s", err->cmd);
7265                tracing_err_log_show_pos(m, err->info.pos);
7266        }
7267
7268        return 0;
7269}
7270
7271static const struct seq_operations tracing_err_log_seq_ops = {
7272        .start  = tracing_err_log_seq_start,
7273        .next   = tracing_err_log_seq_next,
7274        .stop   = tracing_err_log_seq_stop,
7275        .show   = tracing_err_log_seq_show
7276};
7277
7278static int tracing_err_log_open(struct inode *inode, struct file *file)
7279{
7280        struct trace_array *tr = inode->i_private;
7281        int ret = 0;
7282
7283        ret = tracing_check_open_get_tr(tr);
7284        if (ret)
7285                return ret;
7286
7287        /* If this file was opened for write, then erase contents */
7288        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7289                clear_tracing_err_log(tr);
7290
7291        if (file->f_mode & FMODE_READ) {
7292                ret = seq_open(file, &tracing_err_log_seq_ops);
7293                if (!ret) {
7294                        struct seq_file *m = file->private_data;
7295                        m->private = tr;
7296                } else {
7297                        trace_array_put(tr);
7298                }
7299        }
7300        return ret;
7301}
7302
7303static ssize_t tracing_err_log_write(struct file *file,
7304                                     const char __user *buffer,
7305                                     size_t count, loff_t *ppos)
7306{
7307        return count;
7308}
7309
7310static int tracing_err_log_release(struct inode *inode, struct file *file)
7311{
7312        struct trace_array *tr = inode->i_private;
7313
7314        trace_array_put(tr);
7315
7316        if (file->f_mode & FMODE_READ)
7317                seq_release(inode, file);
7318
7319        return 0;
7320}
7321
7322static const struct file_operations tracing_err_log_fops = {
7323        .open           = tracing_err_log_open,
7324        .write          = tracing_err_log_write,
7325        .read           = seq_read,
7326        .llseek         = seq_lseek,
7327        .release        = tracing_err_log_release,
7328};
7329
7330static int tracing_buffers_open(struct inode *inode, struct file *filp)
7331{
7332        struct trace_array *tr = inode->i_private;
7333        struct ftrace_buffer_info *info;
7334        int ret;
7335
7336        ret = tracing_check_open_get_tr(tr);
7337        if (ret)
7338                return ret;
7339
7340        info = kzalloc(sizeof(*info), GFP_KERNEL);
7341        if (!info) {
7342                trace_array_put(tr);
7343                return -ENOMEM;
7344        }
7345
7346        mutex_lock(&trace_types_lock);
7347
7348        info->iter.tr           = tr;
7349        info->iter.cpu_file     = tracing_get_cpu(inode);
7350        info->iter.trace        = tr->current_trace;
7351        info->iter.array_buffer = &tr->array_buffer;
7352        info->spare             = NULL;
7353        /* Force reading ring buffer for first read */
7354        info->read              = (unsigned int)-1;
7355
7356        filp->private_data = info;
7357
7358        tr->current_trace->ref++;
7359
7360        mutex_unlock(&trace_types_lock);
7361
7362        ret = nonseekable_open(inode, filp);
7363        if (ret < 0)
7364                trace_array_put(tr);
7365
7366        return ret;
7367}
7368
7369static __poll_t
7370tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7371{
7372        struct ftrace_buffer_info *info = filp->private_data;
7373        struct trace_iterator *iter = &info->iter;
7374
7375        return trace_poll(iter, filp, poll_table);
7376}
7377
7378static ssize_t
7379tracing_buffers_read(struct file *filp, char __user *ubuf,
7380                     size_t count, loff_t *ppos)
7381{
7382        struct ftrace_buffer_info *info = filp->private_data;
7383        struct trace_iterator *iter = &info->iter;
7384        ssize_t ret = 0;
7385        ssize_t size;
7386
7387        if (!count)
7388                return 0;
7389
7390#ifdef CONFIG_TRACER_MAX_TRACE
7391        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7392                return -EBUSY;
7393#endif
7394
7395        if (!info->spare) {
7396                info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7397                                                          iter->cpu_file);
7398                if (IS_ERR(info->spare)) {
7399                        ret = PTR_ERR(info->spare);
7400                        info->spare = NULL;
7401                } else {
7402                        info->spare_cpu = iter->cpu_file;
7403                }
7404        }
7405        if (!info->spare)
7406                return ret;
7407
7408        /* Do we have previous read data to read? */
7409        if (info->read < PAGE_SIZE)
7410                goto read;
7411
7412 again:
7413        trace_access_lock(iter->cpu_file);
7414        ret = ring_buffer_read_page(iter->array_buffer->buffer,
7415                                    &info->spare,
7416                                    count,
7417                                    iter->cpu_file, 0);
7418        trace_access_unlock(iter->cpu_file);
7419
7420        if (ret < 0) {
7421                if (trace_empty(iter)) {
7422                        if ((filp->f_flags & O_NONBLOCK))
7423                                return -EAGAIN;
7424
7425                        ret = wait_on_pipe(iter, 0);
7426                        if (ret)
7427                                return ret;
7428
7429                        goto again;
7430                }
7431                return 0;
7432        }
7433
7434        info->read = 0;
7435 read:
7436        size = PAGE_SIZE - info->read;
7437        if (size > count)
7438                size = count;
7439
7440        ret = copy_to_user(ubuf, info->spare + info->read, size);
7441        if (ret == size)
7442                return -EFAULT;
7443
7444        size -= ret;
7445
7446        *ppos += size;
7447        info->read += size;
7448
7449        return size;
7450}
7451
7452static int tracing_buffers_release(struct inode *inode, struct file *file)
7453{
7454        struct ftrace_buffer_info *info = file->private_data;
7455        struct trace_iterator *iter = &info->iter;
7456
7457        mutex_lock(&trace_types_lock);
7458
7459        iter->tr->current_trace->ref--;
7460
7461        __trace_array_put(iter->tr);
7462
7463        if (info->spare)
7464                ring_buffer_free_read_page(iter->array_buffer->buffer,
7465                                           info->spare_cpu, info->spare);
7466        kfree(info);
7467
7468        mutex_unlock(&trace_types_lock);
7469
7470        return 0;
7471}
7472
7473struct buffer_ref {
7474        struct trace_buffer     *buffer;
7475        void                    *page;
7476        int                     cpu;
7477        refcount_t              refcount;
7478};
7479
7480static void buffer_ref_release(struct buffer_ref *ref)
7481{
7482        if (!refcount_dec_and_test(&ref->refcount))
7483                return;
7484        ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7485        kfree(ref);
7486}
7487
7488static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7489                                    struct pipe_buffer *buf)
7490{
7491        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7492
7493        buffer_ref_release(ref);
7494        buf->private = 0;
7495}
7496
7497static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7498                                struct pipe_buffer *buf)
7499{
7500        struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7501
7502        if (refcount_read(&ref->refcount) > INT_MAX/2)
7503                return false;
7504
7505        refcount_inc(&ref->refcount);
7506        return true;
7507}
7508
7509/* Pipe buffer operations for a buffer. */
7510static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7511        .confirm                = generic_pipe_buf_confirm,
7512        .release                = buffer_pipe_buf_release,
7513        .steal                  = generic_pipe_buf_nosteal,
7514        .get                    = buffer_pipe_buf_get,
7515};
7516
7517/*
7518 * Callback from splice_to_pipe(), if we need to release some pages
7519 * at the end of the spd in case we error'ed out in filling the pipe.
7520 */
7521static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7522{
7523        struct buffer_ref *ref =
7524                (struct buffer_ref *)spd->partial[i].private;
7525
7526        buffer_ref_release(ref);
7527        spd->partial[i].private = 0;
7528}
7529
7530static ssize_t
7531tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7532                            struct pipe_inode_info *pipe, size_t len,
7533                            unsigned int flags)
7534{
7535        struct ftrace_buffer_info *info = file->private_data;
7536        struct trace_iterator *iter = &info->iter;
7537        struct partial_page partial_def[PIPE_DEF_BUFFERS];
7538        struct page *pages_def[PIPE_DEF_BUFFERS];
7539        struct splice_pipe_desc spd = {
7540                .pages          = pages_def,
7541                .partial        = partial_def,
7542                .nr_pages_max   = PIPE_DEF_BUFFERS,
7543                .ops            = &buffer_pipe_buf_ops,
7544                .spd_release    = buffer_spd_release,
7545        };
7546        struct buffer_ref *ref;
7547        int entries, i;
7548        ssize_t ret = 0;
7549
7550#ifdef CONFIG_TRACER_MAX_TRACE
7551        if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7552                return -EBUSY;
7553#endif
7554
7555        if (*ppos & (PAGE_SIZE - 1))
7556                return -EINVAL;
7557
7558        if (len & (PAGE_SIZE - 1)) {
7559                if (len < PAGE_SIZE)
7560                        return -EINVAL;
7561                len &= PAGE_MASK;
7562        }
7563
7564        if (splice_grow_spd(pipe, &spd))
7565                return -ENOMEM;
7566
7567 again:
7568        trace_access_lock(iter->cpu_file);
7569        entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7570
7571        for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7572                struct page *page;
7573                int r;
7574
7575                ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7576                if (!ref) {
7577                        ret = -ENOMEM;
7578                        break;
7579                }
7580
7581                refcount_set(&ref->refcount, 1);
7582                ref->buffer = iter->array_buffer->buffer;
7583                ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7584                if (IS_ERR(ref->page)) {
7585                        ret = PTR_ERR(ref->page);
7586                        ref->page = NULL;
7587                        kfree(ref);
7588                        break;
7589                }
7590                ref->cpu = iter->cpu_file;
7591
7592                r = ring_buffer_read_page(ref->buffer, &ref->page,
7593                                          len, iter->cpu_file, 1);
7594                if (r < 0) {
7595                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
7596                                                   ref->page);
7597                        kfree(ref);
7598                        break;
7599                }
7600
7601                page = virt_to_page(ref->page);
7602
7603                spd.pages[i] = page;
7604                spd.partial[i].len = PAGE_SIZE;
7605                spd.partial[i].offset = 0;
7606                spd.partial[i].private = (unsigned long)ref;
7607                spd.nr_pages++;
7608                *ppos += PAGE_SIZE;
7609
7610                entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7611        }
7612
7613        trace_access_unlock(iter->cpu_file);
7614        spd.nr_pages = i;
7615
7616        /* did we read anything? */
7617        if (!spd.nr_pages) {
7618                if (ret)
7619                        goto out;
7620
7621                ret = -EAGAIN;
7622                if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7623                        goto out;
7624
7625                ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7626                if (ret)
7627                        goto out;
7628
7629                goto again;
7630        }
7631
7632        ret = splice_to_pipe(pipe, &spd);
7633out:
7634        splice_shrink_spd(&spd);
7635
7636        return ret;
7637}
7638
7639static const struct file_operations tracing_buffers_fops = {
7640        .open           = tracing_buffers_open,
7641        .read           = tracing_buffers_read,
7642        .poll           = tracing_buffers_poll,
7643        .release        = tracing_buffers_release,
7644        .splice_read    = tracing_buffers_splice_read,
7645        .llseek         = no_llseek,
7646};
7647
7648static ssize_t
7649tracing_stats_read(struct file *filp, char __user *ubuf,
7650                   size_t count, loff_t *ppos)
7651{
7652        struct inode *inode = file_inode(filp);
7653        struct trace_array *tr = inode->i_private;
7654        struct array_buffer *trace_buf = &tr->array_buffer;
7655        int cpu = tracing_get_cpu(inode);
7656        struct trace_seq *s;
7657        unsigned long cnt;
7658        unsigned long long t;
7659        unsigned long usec_rem;
7660
7661        s = kmalloc(sizeof(*s), GFP_KERNEL);
7662        if (!s)
7663                return -ENOMEM;
7664
7665        trace_seq_init(s);
7666
7667        cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7668        trace_seq_printf(s, "entries: %ld\n", cnt);
7669
7670        cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7671        trace_seq_printf(s, "overrun: %ld\n", cnt);
7672
7673        cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7674        trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7675
7676        cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7677        trace_seq_printf(s, "bytes: %ld\n", cnt);
7678
7679        if (trace_clocks[tr->clock_id].in_ns) {
7680                /* local or global for trace_clock */
7681                t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7682                usec_rem = do_div(t, USEC_PER_SEC);
7683                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7684                                                                t, usec_rem);
7685
7686                t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7687                usec_rem = do_div(t, USEC_PER_SEC);
7688                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7689        } else {
7690                /* counter or tsc mode for trace_clock */
7691                trace_seq_printf(s, "oldest event ts: %llu\n",
7692                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7693
7694                trace_seq_printf(s, "now ts: %llu\n",
7695                                ring_buffer_time_stamp(trace_buf->buffer, cpu));
7696        }
7697
7698        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7699        trace_seq_printf(s, "dropped events: %ld\n", cnt);
7700
7701        cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7702        trace_seq_printf(s, "read events: %ld\n", cnt);
7703
7704        count = simple_read_from_buffer(ubuf, count, ppos,
7705                                        s->buffer, trace_seq_used(s));
7706
7707        kfree(s);
7708
7709        return count;
7710}
7711
7712static const struct file_operations tracing_stats_fops = {
7713        .open           = tracing_open_generic_tr,
7714        .read           = tracing_stats_read,
7715        .llseek         = generic_file_llseek,
7716        .release        = tracing_release_generic_tr,
7717};
7718
7719#ifdef CONFIG_DYNAMIC_FTRACE
7720
7721static ssize_t
7722tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7723                  size_t cnt, loff_t *ppos)
7724{
7725        ssize_t ret;
7726        char *buf;
7727        int r;
7728
7729        /* 256 should be plenty to hold the amount needed */
7730        buf = kmalloc(256, GFP_KERNEL);
7731        if (!buf)
7732                return -ENOMEM;
7733
7734        r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7735                      ftrace_update_tot_cnt,
7736                      ftrace_number_of_pages,
7737                      ftrace_number_of_groups);
7738
7739        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7740        kfree(buf);
7741        return ret;
7742}
7743
7744static const struct file_operations tracing_dyn_info_fops = {
7745        .open           = tracing_open_generic,
7746        .read           = tracing_read_dyn_info,
7747        .llseek         = generic_file_llseek,
7748};
7749#endif /* CONFIG_DYNAMIC_FTRACE */
7750
7751#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7752static void
7753ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7754                struct trace_array *tr, struct ftrace_probe_ops *ops,
7755                void *data)
7756{
7757        tracing_snapshot_instance(tr);
7758}
7759
7760static void
7761ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7762                      struct trace_array *tr, struct ftrace_probe_ops *ops,
7763                      void *data)
7764{
7765        struct ftrace_func_mapper *mapper = data;
7766        long *count = NULL;
7767
7768        if (mapper)
7769                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7770
7771        if (count) {
7772
7773                if (*count <= 0)
7774                        return;
7775
7776                (*count)--;
7777        }
7778
7779        tracing_snapshot_instance(tr);
7780}
7781
7782static int
7783ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7784                      struct ftrace_probe_ops *ops, void *data)
7785{
7786        struct ftrace_func_mapper *mapper = data;
7787        long *count = NULL;
7788
7789        seq_printf(m, "%ps:", (void *)ip);
7790
7791        seq_puts(m, "snapshot");
7792
7793        if (mapper)
7794                count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7795
7796        if (count)
7797                seq_printf(m, ":count=%ld\n", *count);
7798        else
7799                seq_puts(m, ":unlimited\n");
7800
7801        return 0;
7802}
7803
7804static int
7805ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7806                     unsigned long ip, void *init_data, void **data)
7807{
7808        struct ftrace_func_mapper *mapper = *data;
7809
7810        if (!mapper) {
7811                mapper = allocate_ftrace_func_mapper();
7812                if (!mapper)
7813                        return -ENOMEM;
7814                *data = mapper;
7815        }
7816
7817        return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7818}
7819
7820static void
7821ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7822                     unsigned long ip, void *data)
7823{
7824        struct ftrace_func_mapper *mapper = data;
7825
7826        if (!ip) {
7827                if (!mapper)
7828                        return;
7829                free_ftrace_func_mapper(mapper, NULL);
7830                return;
7831        }
7832
7833        ftrace_func_mapper_remove_ip(mapper, ip);
7834}
7835
7836static struct ftrace_probe_ops snapshot_probe_ops = {
7837        .func                   = ftrace_snapshot,
7838        .print                  = ftrace_snapshot_print,
7839};
7840
7841static struct ftrace_probe_ops snapshot_count_probe_ops = {
7842        .func                   = ftrace_count_snapshot,
7843        .print                  = ftrace_snapshot_print,
7844        .init                   = ftrace_snapshot_init,
7845        .free                   = ftrace_snapshot_free,
7846};
7847
7848static int
7849ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7850                               char *glob, char *cmd, char *param, int enable)
7851{
7852        struct ftrace_probe_ops *ops;
7853        void *count = (void *)-1;
7854        char *number;
7855        int ret;
7856
7857        if (!tr)
7858                return -ENODEV;
7859
7860        /* hash funcs only work with set_ftrace_filter */
7861        if (!enable)
7862                return -EINVAL;
7863
7864        ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7865
7866        if (glob[0] == '!')
7867                return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7868
7869        if (!param)
7870                goto out_reg;
7871
7872        number = strsep(&param, ":");
7873
7874        if (!strlen(number))
7875                goto out_reg;
7876
7877        /*
7878         * We use the callback data field (which is a pointer)
7879         * as our counter.
7880         */
7881        ret = kstrtoul(number, 0, (unsigned long *)&count);
7882        if (ret)
7883                return ret;
7884
7885 out_reg:
7886        ret = tracing_alloc_snapshot_instance(tr);
7887        if (ret < 0)
7888                goto out;
7889
7890        ret = register_ftrace_function_probe(glob, tr, ops, count);
7891
7892 out:
7893        return ret < 0 ? ret : 0;
7894}
7895
7896static struct ftrace_func_command ftrace_snapshot_cmd = {
7897        .name                   = "snapshot",
7898        .func                   = ftrace_trace_snapshot_callback,
7899};
7900
7901static __init int register_snapshot_cmd(void)
7902{
7903        return register_ftrace_command(&ftrace_snapshot_cmd);
7904}
7905#else
7906static inline __init int register_snapshot_cmd(void) { return 0; }
7907#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7908
7909static struct dentry *tracing_get_dentry(struct trace_array *tr)
7910{
7911        if (WARN_ON(!tr->dir))
7912                return ERR_PTR(-ENODEV);
7913
7914        /* Top directory uses NULL as the parent */
7915        if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7916                return NULL;
7917
7918        /* All sub buffers have a descriptor */
7919        return tr->dir;
7920}
7921
7922static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7923{
7924        struct dentry *d_tracer;
7925
7926        if (tr->percpu_dir)
7927                return tr->percpu_dir;
7928
7929        d_tracer = tracing_get_dentry(tr);
7930        if (IS_ERR(d_tracer))
7931                return NULL;
7932
7933        tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7934
7935        MEM_FAIL(!tr->percpu_dir,
7936                  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7937
7938        return tr->percpu_dir;
7939}
7940
7941static struct dentry *
7942trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7943                      void *data, long cpu, const struct file_operations *fops)
7944{
7945        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7946
7947        if (ret) /* See tracing_get_cpu() */
7948                d_inode(ret)->i_cdev = (void *)(cpu + 1);
7949        return ret;
7950}
7951
7952static void
7953tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7954{
7955        struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7956        struct dentry *d_cpu;
7957        char cpu_dir[30]; /* 30 characters should be more than enough */
7958
7959        if (!d_percpu)
7960                return;
7961
7962        snprintf(cpu_dir, 30, "cpu%ld", cpu);
7963        d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7964        if (!d_cpu) {
7965                pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7966                return;
7967        }
7968
7969        /* per cpu trace_pipe */
7970        trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7971                                tr, cpu, &tracing_pipe_fops);
7972
7973        /* per cpu trace */
7974        trace_create_cpu_file("trace", 0644, d_cpu,
7975                                tr, cpu, &tracing_fops);
7976
7977        trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7978                                tr, cpu, &tracing_buffers_fops);
7979
7980        trace_create_cpu_file("stats", 0444, d_cpu,
7981                                tr, cpu, &tracing_stats_fops);
7982
7983        trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7984                                tr, cpu, &tracing_entries_fops);
7985
7986#ifdef CONFIG_TRACER_SNAPSHOT
7987        trace_create_cpu_file("snapshot", 0644, d_cpu,
7988                                tr, cpu, &snapshot_fops);
7989
7990        trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7991                                tr, cpu, &snapshot_raw_fops);
7992#endif
7993}
7994
7995#ifdef CONFIG_FTRACE_SELFTEST
7996/* Let selftest have access to static functions in this file */
7997#include "trace_selftest.c"
7998#endif
7999
8000static ssize_t
8001trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8002                        loff_t *ppos)
8003{
8004        struct trace_option_dentry *topt = filp->private_data;
8005        char *buf;
8006
8007        if (topt->flags->val & topt->opt->bit)
8008                buf = "1\n";
8009        else
8010                buf = "0\n";
8011
8012        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8013}
8014
8015static ssize_t
8016trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8017                         loff_t *ppos)
8018{
8019        struct trace_option_dentry *topt = filp->private_data;
8020        unsigned long val;
8021        int ret;
8022
8023        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8024        if (ret)
8025                return ret;
8026
8027        if (val != 0 && val != 1)
8028                return -EINVAL;
8029
8030        if (!!(topt->flags->val & topt->opt->bit) != val) {
8031                mutex_lock(&trace_types_lock);
8032                ret = __set_tracer_option(topt->tr, topt->flags,
8033                                          topt->opt, !val);
8034                mutex_unlock(&trace_types_lock);
8035                if (ret)
8036                        return ret;
8037        }
8038
8039        *ppos += cnt;
8040
8041        return cnt;
8042}
8043
8044
8045static const struct file_operations trace_options_fops = {
8046        .open = tracing_open_generic,
8047        .read = trace_options_read,
8048        .write = trace_options_write,
8049        .llseek = generic_file_llseek,
8050};
8051
8052/*
8053 * In order to pass in both the trace_array descriptor as well as the index
8054 * to the flag that the trace option file represents, the trace_array
8055 * has a character array of trace_flags_index[], which holds the index
8056 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8057 * The address of this character array is passed to the flag option file
8058 * read/write callbacks.
8059 *
8060 * In order to extract both the index and the trace_array descriptor,
8061 * get_tr_index() uses the following algorithm.
8062 *
8063 *   idx = *ptr;
8064 *
8065 * As the pointer itself contains the address of the index (remember
8066 * index[1] == 1).
8067 *
8068 * Then to get the trace_array descriptor, by subtracting that index
8069 * from the ptr, we get to the start of the index itself.
8070 *
8071 *   ptr - idx == &index[0]
8072 *
8073 * Then a simple container_of() from that pointer gets us to the
8074 * trace_array descriptor.
8075 */
8076static void get_tr_index(void *data, struct trace_array **ptr,
8077                         unsigned int *pindex)
8078{
8079        *pindex = *(unsigned char *)data;
8080
8081        *ptr = container_of(data - *pindex, struct trace_array,
8082                            trace_flags_index);
8083}
8084
8085static ssize_t
8086trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8087                        loff_t *ppos)
8088{
8089        void *tr_index = filp->private_data;
8090        struct trace_array *tr;
8091        unsigned int index;
8092        char *buf;
8093
8094        get_tr_index(tr_index, &tr, &index);
8095
8096        if (tr->trace_flags & (1 << index))
8097                buf = "1\n";
8098        else
8099                buf = "0\n";
8100
8101        return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8102}
8103
8104static ssize_t
8105trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8106                         loff_t *ppos)
8107{
8108        void *tr_index = filp->private_data;
8109        struct trace_array *tr;
8110        unsigned int index;
8111        unsigned long val;
8112        int ret;
8113
8114        get_tr_index(tr_index, &tr, &index);
8115
8116        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8117        if (ret)
8118                return ret;
8119
8120        if (val != 0 && val != 1)
8121                return -EINVAL;
8122
8123        mutex_lock(&event_mutex);
8124        mutex_lock(&trace_types_lock);
8125        ret = set_tracer_flag(tr, 1 << index, val);
8126        mutex_unlock(&trace_types_lock);
8127        mutex_unlock(&event_mutex);
8128
8129        if (ret < 0)
8130                return ret;
8131
8132        *ppos += cnt;
8133
8134        return cnt;
8135}
8136
8137static const struct file_operations trace_options_core_fops = {
8138        .open = tracing_open_generic,
8139        .read = trace_options_core_read,
8140        .write = trace_options_core_write,
8141        .llseek = generic_file_llseek,
8142};
8143
8144struct dentry *trace_create_file(const char *name,
8145                                 umode_t mode,
8146                                 struct dentry *parent,
8147                                 void *data,
8148                                 const struct file_operations *fops)
8149{
8150        struct dentry *ret;
8151
8152        ret = tracefs_create_file(name, mode, parent, data, fops);
8153        if (!ret)
8154                pr_warn("Could not create tracefs '%s' entry\n", name);
8155
8156        return ret;
8157}
8158
8159
8160static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8161{
8162        struct dentry *d_tracer;
8163
8164        if (tr->options)
8165                return tr->options;
8166
8167        d_tracer = tracing_get_dentry(tr);
8168        if (IS_ERR(d_tracer))
8169                return NULL;
8170
8171        tr->options = tracefs_create_dir("options", d_tracer);
8172        if (!tr->options) {
8173                pr_warn("Could not create tracefs directory 'options'\n");
8174                return NULL;
8175        }
8176
8177        return tr->options;
8178}
8179
8180static void
8181create_trace_option_file(struct trace_array *tr,
8182                         struct trace_option_dentry *topt,
8183                         struct tracer_flags *flags,
8184                         struct tracer_opt *opt)
8185{
8186        struct dentry *t_options;
8187
8188        t_options = trace_options_init_dentry(tr);
8189        if (!t_options)
8190                return;
8191
8192        topt->flags = flags;
8193        topt->opt = opt;
8194        topt->tr = tr;
8195
8196        topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8197                                    &trace_options_fops);
8198
8199}
8200
8201static void
8202create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8203{
8204        struct trace_option_dentry *topts;
8205        struct trace_options *tr_topts;
8206        struct tracer_flags *flags;
8207        struct tracer_opt *opts;
8208        int cnt;
8209        int i;
8210
8211        if (!tracer)
8212                return;
8213
8214        flags = tracer->flags;
8215
8216        if (!flags || !flags->opts)
8217                return;
8218
8219        /*
8220         * If this is an instance, only create flags for tracers
8221         * the instance may have.
8222         */
8223        if (!trace_ok_for_array(tracer, tr))
8224                return;
8225
8226        for (i = 0; i < tr->nr_topts; i++) {
8227                /* Make sure there's no duplicate flags. */
8228                if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8229                        return;
8230        }
8231
8232        opts = flags->opts;
8233
8234        for (cnt = 0; opts[cnt].name; cnt++)
8235                ;
8236
8237        topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8238        if (!topts)
8239                return;
8240
8241        tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8242                            GFP_KERNEL);
8243        if (!tr_topts) {
8244                kfree(topts);
8245                return;
8246        }
8247
8248        tr->topts = tr_topts;
8249        tr->topts[tr->nr_topts].tracer = tracer;
8250        tr->topts[tr->nr_topts].topts = topts;
8251        tr->nr_topts++;
8252
8253        for (cnt = 0; opts[cnt].name; cnt++) {
8254                create_trace_option_file(tr, &topts[cnt], flags,
8255                                         &opts[cnt]);
8256                MEM_FAIL(topts[cnt].entry == NULL,
8257                          "Failed to create trace option: %s",
8258                          opts[cnt].name);
8259        }
8260}
8261
8262static struct dentry *
8263create_trace_option_core_file(struct trace_array *tr,
8264                              const char *option, long index)
8265{
8266        struct dentry *t_options;
8267
8268        t_options = trace_options_init_dentry(tr);
8269        if (!t_options)
8270                return NULL;
8271
8272        return trace_create_file(option, 0644, t_options,
8273                                 (void *)&tr->trace_flags_index[index],
8274                                 &trace_options_core_fops);
8275}
8276
8277static void create_trace_options_dir(struct trace_array *tr)
8278{
8279        struct dentry *t_options;
8280        bool top_level = tr == &global_trace;
8281        int i;
8282
8283        t_options = trace_options_init_dentry(tr);
8284        if (!t_options)
8285                return;
8286
8287        for (i = 0; trace_options[i]; i++) {
8288                if (top_level ||
8289                    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8290                        create_trace_option_core_file(tr, trace_options[i], i);
8291        }
8292}
8293
8294static ssize_t
8295rb_simple_read(struct file *filp, char __user *ubuf,
8296               size_t cnt, loff_t *ppos)
8297{
8298        struct trace_array *tr = filp->private_data;
8299        char buf[64];
8300        int r;
8301
8302        r = tracer_tracing_is_on(tr);
8303        r = sprintf(buf, "%d\n", r);
8304
8305        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8306}
8307
8308static ssize_t
8309rb_simple_write(struct file *filp, const char __user *ubuf,
8310                size_t cnt, loff_t *ppos)
8311{
8312        struct trace_array *tr = filp->private_data;
8313        struct trace_buffer *buffer = tr->array_buffer.buffer;
8314        unsigned long val;
8315        int ret;
8316
8317        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8318        if (ret)
8319                return ret;
8320
8321        if (buffer) {
8322                mutex_lock(&trace_types_lock);
8323                if (!!val == tracer_tracing_is_on(tr)) {
8324                        val = 0; /* do nothing */
8325                } else if (val) {
8326                        tracer_tracing_on(tr);
8327                        if (tr->current_trace->start)
8328                                tr->current_trace->start(tr);
8329                } else {
8330                        tracer_tracing_off(tr);
8331                        if (tr->current_trace->stop)
8332                                tr->current_trace->stop(tr);
8333                }
8334                mutex_unlock(&trace_types_lock);
8335        }
8336
8337        (*ppos)++;
8338
8339        return cnt;
8340}
8341
8342static const struct file_operations rb_simple_fops = {
8343        .open           = tracing_open_generic_tr,
8344        .read           = rb_simple_read,
8345        .write          = rb_simple_write,
8346        .release        = tracing_release_generic_tr,
8347        .llseek         = default_llseek,
8348};
8349
8350static ssize_t
8351buffer_percent_read(struct file *filp, char __user *ubuf,
8352                    size_t cnt, loff_t *ppos)
8353{
8354        struct trace_array *tr = filp->private_data;
8355        char buf[64];
8356        int r;
8357
8358        r = tr->buffer_percent;
8359        r = sprintf(buf, "%d\n", r);
8360
8361        return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8362}
8363
8364static ssize_t
8365buffer_percent_write(struct file *filp, const char __user *ubuf,
8366                     size_t cnt, loff_t *ppos)
8367{
8368        struct trace_array *tr = filp->private_data;
8369        unsigned long val;
8370        int ret;
8371
8372        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8373        if (ret)
8374                return ret;
8375
8376        if (val > 100)
8377                return -EINVAL;
8378
8379        if (!val)
8380                val = 1;
8381
8382        tr->buffer_percent = val;
8383
8384        (*ppos)++;
8385
8386        return cnt;
8387}
8388
8389static const struct file_operations buffer_percent_fops = {
8390        .open           = tracing_open_generic_tr,
8391        .read           = buffer_percent_read,
8392        .write          = buffer_percent_write,
8393        .release        = tracing_release_generic_tr,
8394        .llseek         = default_llseek,
8395};
8396
8397static struct dentry *trace_instance_dir;
8398
8399static void
8400init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8401
8402static int
8403allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8404{
8405        enum ring_buffer_flags rb_flags;
8406
8407        rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8408
8409        buf->tr = tr;
8410
8411        buf->buffer = ring_buffer_alloc(size, rb_flags);
8412        if (!buf->buffer)
8413                return -ENOMEM;
8414
8415        buf->data = alloc_percpu(struct trace_array_cpu);
8416        if (!buf->data) {
8417                ring_buffer_free(buf->buffer);
8418                buf->buffer = NULL;
8419                return -ENOMEM;
8420        }
8421
8422        /* Allocate the first page for all buffers */
8423        set_buffer_entries(&tr->array_buffer,
8424                           ring_buffer_size(tr->array_buffer.buffer, 0));
8425
8426        return 0;
8427}
8428
8429static int allocate_trace_buffers(struct trace_array *tr, int size)
8430{
8431        int ret;
8432
8433        ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8434        if (ret)
8435                return ret;
8436
8437#ifdef CONFIG_TRACER_MAX_TRACE
8438        ret = allocate_trace_buffer(tr, &tr->max_buffer,
8439                                    allocate_snapshot ? size : 1);
8440        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8441                ring_buffer_free(tr->array_buffer.buffer);
8442                tr->array_buffer.buffer = NULL;
8443                free_percpu(tr->array_buffer.data);
8444                tr->array_buffer.data = NULL;
8445                return -ENOMEM;
8446        }
8447        tr->allocated_snapshot = allocate_snapshot;
8448
8449        /*
8450         * Only the top level trace array gets its snapshot allocated
8451         * from the kernel command line.
8452         */
8453        allocate_snapshot = false;
8454#endif
8455        return 0;
8456}
8457
8458static void free_trace_buffer(struct array_buffer *buf)
8459{
8460        if (buf->buffer) {
8461                ring_buffer_free(buf->buffer);
8462                buf->buffer = NULL;
8463                free_percpu(buf->data);
8464                buf->data = NULL;
8465        }
8466}
8467
8468static void free_trace_buffers(struct trace_array *tr)
8469{
8470        if (!tr)
8471                return;
8472
8473        free_trace_buffer(&tr->array_buffer);
8474
8475#ifdef CONFIG_TRACER_MAX_TRACE
8476        free_trace_buffer(&tr->max_buffer);
8477#endif
8478}
8479
8480static void init_trace_flags_index(struct trace_array *tr)
8481{
8482        int i;
8483
8484        /* Used by the trace options files */
8485        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8486                tr->trace_flags_index[i] = i;
8487}
8488
8489static void __update_tracer_options(struct trace_array *tr)
8490{
8491        struct tracer *t;
8492
8493        for (t = trace_types; t; t = t->next)
8494                add_tracer_options(tr, t);
8495}
8496
8497static void update_tracer_options(struct trace_array *tr)
8498{
8499        mutex_lock(&trace_types_lock);
8500        __update_tracer_options(tr);
8501        mutex_unlock(&trace_types_lock);
8502}
8503
8504/* Must have trace_types_lock held */
8505struct trace_array *trace_array_find(const char *instance)
8506{
8507        struct trace_array *tr, *found = NULL;
8508
8509        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8510                if (tr->name && strcmp(tr->name, instance) == 0) {
8511                        found = tr;
8512                        break;
8513                }
8514        }
8515
8516        return found;
8517}
8518
8519struct trace_array *trace_array_find_get(const char *instance)
8520{
8521        struct trace_array *tr;
8522
8523        mutex_lock(&trace_types_lock);
8524        tr = trace_array_find(instance);
8525        if (tr)
8526                tr->ref++;
8527        mutex_unlock(&trace_types_lock);
8528
8529        return tr;
8530}
8531
8532static struct trace_array *trace_array_create(const char *name)
8533{
8534        struct trace_array *tr;
8535        int ret;
8536
8537        ret = -ENOMEM;
8538        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8539        if (!tr)
8540                return ERR_PTR(ret);
8541
8542        tr->name = kstrdup(name, GFP_KERNEL);
8543        if (!tr->name)
8544                goto out_free_tr;
8545
8546        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8547                goto out_free_tr;
8548
8549        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8550
8551        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8552
8553        raw_spin_lock_init(&tr->start_lock);
8554
8555        tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8556
8557        tr->current_trace = &nop_trace;
8558
8559        INIT_LIST_HEAD(&tr->systems);
8560        INIT_LIST_HEAD(&tr->events);
8561        INIT_LIST_HEAD(&tr->hist_vars);
8562        INIT_LIST_HEAD(&tr->err_log);
8563
8564        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8565                goto out_free_tr;
8566
8567        tr->dir = tracefs_create_dir(name, trace_instance_dir);
8568        if (!tr->dir)
8569                goto out_free_tr;
8570
8571        ret = event_trace_add_tracer(tr->dir, tr);
8572        if (ret) {
8573                tracefs_remove(tr->dir);
8574                goto out_free_tr;
8575        }
8576
8577        ftrace_init_trace_array(tr);
8578
8579        init_tracer_tracefs(tr, tr->dir);
8580        init_trace_flags_index(tr);
8581        __update_tracer_options(tr);
8582
8583        list_add(&tr->list, &ftrace_trace_arrays);
8584
8585        tr->ref++;
8586
8587
8588        return tr;
8589
8590 out_free_tr:
8591        free_trace_buffers(tr);
8592        free_cpumask_var(tr->tracing_cpumask);
8593        kfree(tr->name);
8594        kfree(tr);
8595
8596        return ERR_PTR(ret);
8597}
8598
8599static int instance_mkdir(const char *name)
8600{
8601        struct trace_array *tr;
8602        int ret;
8603
8604        mutex_lock(&event_mutex);
8605        mutex_lock(&trace_types_lock);
8606
8607        ret = -EEXIST;
8608        if (trace_array_find(name))
8609                goto out_unlock;
8610
8611        tr = trace_array_create(name);
8612
8613        ret = PTR_ERR_OR_ZERO(tr);
8614
8615out_unlock:
8616        mutex_unlock(&trace_types_lock);
8617        mutex_unlock(&event_mutex);
8618        return ret;
8619}
8620
8621/**
8622 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8623 * @name: The name of the trace array to be looked up/created.
8624 *
8625 * Returns pointer to trace array with given name.
8626 * NULL, if it cannot be created.
8627 *
8628 * NOTE: This function increments the reference counter associated with the
8629 * trace array returned. This makes sure it cannot be freed while in use.
8630 * Use trace_array_put() once the trace array is no longer needed.
8631 * If the trace_array is to be freed, trace_array_destroy() needs to
8632 * be called after the trace_array_put(), or simply let user space delete
8633 * it from the tracefs instances directory. But until the
8634 * trace_array_put() is called, user space can not delete it.
8635 *
8636 */
8637struct trace_array *trace_array_get_by_name(const char *name)
8638{
8639        struct trace_array *tr;
8640
8641        mutex_lock(&event_mutex);
8642        mutex_lock(&trace_types_lock);
8643
8644        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8645                if (tr->name && strcmp(tr->name, name) == 0)
8646                        goto out_unlock;
8647        }
8648
8649        tr = trace_array_create(name);
8650
8651        if (IS_ERR(tr))
8652                tr = NULL;
8653out_unlock:
8654        if (tr)
8655                tr->ref++;
8656
8657        mutex_unlock(&trace_types_lock);
8658        mutex_unlock(&event_mutex);
8659        return tr;
8660}
8661EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8662
8663static int __remove_instance(struct trace_array *tr)
8664{
8665        int i;
8666
8667        /* Reference counter for a newly created trace array = 1. */
8668        if (tr->ref > 1 || (tr->current_trace && tr->current_trace->ref))
8669                return -EBUSY;
8670
8671        list_del(&tr->list);
8672
8673        /* Disable all the flags that were enabled coming in */
8674        for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8675                if ((1 << i) & ZEROED_TRACE_FLAGS)
8676                        set_tracer_flag(tr, 1 << i, 0);
8677        }
8678
8679        tracing_set_nop(tr);
8680        clear_ftrace_function_probes(tr);
8681        event_trace_del_tracer(tr);
8682        ftrace_clear_pids(tr);
8683        ftrace_destroy_function_files(tr);
8684        tracefs_remove(tr->dir);
8685        free_trace_buffers(tr);
8686
8687        for (i = 0; i < tr->nr_topts; i++) {
8688                kfree(tr->topts[i].topts);
8689        }
8690        kfree(tr->topts);
8691
8692        free_cpumask_var(tr->tracing_cpumask);
8693        kfree(tr->name);
8694        kfree(tr);
8695        tr = NULL;
8696
8697        return 0;
8698}
8699
8700int trace_array_destroy(struct trace_array *this_tr)
8701{
8702        struct trace_array *tr;
8703        int ret;
8704
8705        if (!this_tr)
8706                return -EINVAL;
8707
8708        mutex_lock(&event_mutex);
8709        mutex_lock(&trace_types_lock);
8710
8711        ret = -ENODEV;
8712
8713        /* Making sure trace array exists before destroying it. */
8714        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8715                if (tr == this_tr) {
8716                        ret = __remove_instance(tr);
8717                        break;
8718                }
8719        }
8720
8721        mutex_unlock(&trace_types_lock);
8722        mutex_unlock(&event_mutex);
8723
8724        return ret;
8725}
8726EXPORT_SYMBOL_GPL(trace_array_destroy);
8727
8728static int instance_rmdir(const char *name)
8729{
8730        struct trace_array *tr;
8731        int ret;
8732
8733        mutex_lock(&event_mutex);
8734        mutex_lock(&trace_types_lock);
8735
8736        ret = -ENODEV;
8737        tr = trace_array_find(name);
8738        if (tr)
8739                ret = __remove_instance(tr);
8740
8741        mutex_unlock(&trace_types_lock);
8742        mutex_unlock(&event_mutex);
8743
8744        return ret;
8745}
8746
8747static __init void create_trace_instances(struct dentry *d_tracer)
8748{
8749        trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8750                                                         instance_mkdir,
8751                                                         instance_rmdir);
8752        if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8753                return;
8754}
8755
8756static void
8757init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8758{
8759        struct trace_event_file *file;
8760        int cpu;
8761
8762        trace_create_file("available_tracers", 0444, d_tracer,
8763                        tr, &show_traces_fops);
8764
8765        trace_create_file("current_tracer", 0644, d_tracer,
8766                        tr, &set_tracer_fops);
8767
8768        trace_create_file("tracing_cpumask", 0644, d_tracer,
8769                          tr, &tracing_cpumask_fops);
8770
8771        trace_create_file("trace_options", 0644, d_tracer,
8772                          tr, &tracing_iter_fops);
8773
8774        trace_create_file("trace", 0644, d_tracer,
8775                          tr, &tracing_fops);
8776
8777        trace_create_file("trace_pipe", 0444, d_tracer,
8778                          tr, &tracing_pipe_fops);
8779
8780        trace_create_file("buffer_size_kb", 0644, d_tracer,
8781                          tr, &tracing_entries_fops);
8782
8783        trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8784                          tr, &tracing_total_entries_fops);
8785
8786        trace_create_file("free_buffer", 0200, d_tracer,
8787                          tr, &tracing_free_buffer_fops);
8788
8789        trace_create_file("trace_marker", 0220, d_tracer,
8790                          tr, &tracing_mark_fops);
8791
8792        file = __find_event_file(tr, "ftrace", "print");
8793        if (file && file->dir)
8794                trace_create_file("trigger", 0644, file->dir, file,
8795                                  &event_trigger_fops);
8796        tr->trace_marker_file = file;
8797
8798        trace_create_file("trace_marker_raw", 0220, d_tracer,
8799                          tr, &tracing_mark_raw_fops);
8800
8801        trace_create_file("trace_clock", 0644, d_tracer, tr,
8802                          &trace_clock_fops);
8803
8804        trace_create_file("tracing_on", 0644, d_tracer,
8805                          tr, &rb_simple_fops);
8806
8807        trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8808                          &trace_time_stamp_mode_fops);
8809
8810        tr->buffer_percent = 50;
8811
8812        trace_create_file("buffer_percent", 0444, d_tracer,
8813                        tr, &buffer_percent_fops);
8814
8815        create_trace_options_dir(tr);
8816
8817#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8818        trace_create_maxlat_file(tr, d_tracer);
8819#endif
8820
8821        if (ftrace_create_function_files(tr, d_tracer))
8822                MEM_FAIL(1, "Could not allocate function filter files");
8823
8824#ifdef CONFIG_TRACER_SNAPSHOT
8825        trace_create_file("snapshot", 0644, d_tracer,
8826                          tr, &snapshot_fops);
8827#endif
8828
8829        trace_create_file("error_log", 0644, d_tracer,
8830                          tr, &tracing_err_log_fops);
8831
8832        for_each_tracing_cpu(cpu)
8833                tracing_init_tracefs_percpu(tr, cpu);
8834
8835        ftrace_init_tracefs(tr, d_tracer);
8836}
8837
8838static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8839{
8840        struct vfsmount *mnt;
8841        struct file_system_type *type;
8842
8843        /*
8844         * To maintain backward compatibility for tools that mount
8845         * debugfs to get to the tracing facility, tracefs is automatically
8846         * mounted to the debugfs/tracing directory.
8847         */
8848        type = get_fs_type("tracefs");
8849        if (!type)
8850                return NULL;
8851        mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8852        put_filesystem(type);
8853        if (IS_ERR(mnt))
8854                return NULL;
8855        mntget(mnt);
8856
8857        return mnt;
8858}
8859
8860/**
8861 * tracing_init_dentry - initialize top level trace array
8862 *
8863 * This is called when creating files or directories in the tracing
8864 * directory. It is called via fs_initcall() by any of the boot up code
8865 * and expects to return the dentry of the top level tracing directory.
8866 */
8867struct dentry *tracing_init_dentry(void)
8868{
8869        struct trace_array *tr = &global_trace;
8870
8871        if (security_locked_down(LOCKDOWN_TRACEFS)) {
8872                pr_warn("Tracing disabled due to lockdown\n");
8873                return ERR_PTR(-EPERM);
8874        }
8875
8876        /* The top level trace array uses  NULL as parent */
8877        if (tr->dir)
8878                return NULL;
8879
8880        if (WARN_ON(!tracefs_initialized()) ||
8881                (IS_ENABLED(CONFIG_DEBUG_FS) &&
8882                 WARN_ON(!debugfs_initialized())))
8883                return ERR_PTR(-ENODEV);
8884
8885        /*
8886         * As there may still be users that expect the tracing
8887         * files to exist in debugfs/tracing, we must automount
8888         * the tracefs file system there, so older tools still
8889         * work with the newer kerenl.
8890         */
8891        tr->dir = debugfs_create_automount("tracing", NULL,
8892                                           trace_automount, NULL);
8893
8894        return NULL;
8895}
8896
8897extern struct trace_eval_map *__start_ftrace_eval_maps[];
8898extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8899
8900static void __init trace_eval_init(void)
8901{
8902        int len;
8903
8904        len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8905        trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8906}
8907
8908#ifdef CONFIG_MODULES
8909static void trace_module_add_evals(struct module *mod)
8910{
8911        if (!mod->num_trace_evals)
8912                return;
8913
8914        /*
8915         * Modules with bad taint do not have events created, do
8916         * not bother with enums either.
8917         */
8918        if (trace_module_has_bad_taint(mod))
8919                return;
8920
8921        trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8922}
8923
8924#ifdef CONFIG_TRACE_EVAL_MAP_FILE
8925static void trace_module_remove_evals(struct module *mod)
8926{
8927        union trace_eval_map_item *map;
8928        union trace_eval_map_item **last = &trace_eval_maps;
8929
8930        if (!mod->num_trace_evals)
8931                return;
8932
8933        mutex_lock(&trace_eval_mutex);
8934
8935        map = trace_eval_maps;
8936
8937        while (map) {
8938                if (map->head.mod == mod)
8939                        break;
8940                map = trace_eval_jmp_to_tail(map);
8941                last = &map->tail.next;
8942                map = map->tail.next;
8943        }
8944        if (!map)
8945                goto out;
8946
8947        *last = trace_eval_jmp_to_tail(map)->tail.next;
8948        kfree(map);
8949 out:
8950        mutex_unlock(&trace_eval_mutex);
8951}
8952#else
8953static inline void trace_module_remove_evals(struct module *mod) { }
8954#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8955
8956static int trace_module_notify(struct notifier_block *self,
8957                               unsigned long val, void *data)
8958{
8959        struct module *mod = data;
8960
8961        switch (val) {
8962        case MODULE_STATE_COMING:
8963                trace_module_add_evals(mod);
8964                break;
8965        case MODULE_STATE_GOING:
8966                trace_module_remove_evals(mod);
8967                break;
8968        }
8969
8970        return 0;
8971}
8972
8973static struct notifier_block trace_module_nb = {
8974        .notifier_call = trace_module_notify,
8975        .priority = 0,
8976};
8977#endif /* CONFIG_MODULES */
8978
8979static __init int tracer_init_tracefs(void)
8980{
8981        struct dentry *d_tracer;
8982
8983        trace_access_lock_init();
8984
8985        d_tracer = tracing_init_dentry();
8986        if (IS_ERR(d_tracer))
8987                return 0;
8988
8989        event_trace_init();
8990
8991        init_tracer_tracefs(&global_trace, d_tracer);
8992        ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8993
8994        trace_create_file("tracing_thresh", 0644, d_tracer,
8995                        &global_trace, &tracing_thresh_fops);
8996
8997        trace_create_file("README", 0444, d_tracer,
8998                        NULL, &tracing_readme_fops);
8999
9000        trace_create_file("saved_cmdlines", 0444, d_tracer,
9001                        NULL, &tracing_saved_cmdlines_fops);
9002
9003        trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9004                          NULL, &tracing_saved_cmdlines_size_fops);
9005
9006        trace_create_file("saved_tgids", 0444, d_tracer,
9007                        NULL, &tracing_saved_tgids_fops);
9008
9009        trace_eval_init();
9010
9011        trace_create_eval_file(d_tracer);
9012
9013#ifdef CONFIG_MODULES
9014        register_module_notifier(&trace_module_nb);
9015#endif
9016
9017#ifdef CONFIG_DYNAMIC_FTRACE
9018        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9019                        NULL, &tracing_dyn_info_fops);
9020#endif
9021
9022        create_trace_instances(d_tracer);
9023
9024        update_tracer_options(&global_trace);
9025
9026        return 0;
9027}
9028
9029static int trace_panic_handler(struct notifier_block *this,
9030                               unsigned long event, void *unused)
9031{
9032        if (ftrace_dump_on_oops)
9033                ftrace_dump(ftrace_dump_on_oops);
9034        return NOTIFY_OK;
9035}
9036
9037static struct notifier_block trace_panic_notifier = {
9038        .notifier_call  = trace_panic_handler,
9039        .next           = NULL,
9040        .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9041};
9042
9043static int trace_die_handler(struct notifier_block *self,
9044                             unsigned long val,
9045                             void *data)
9046{
9047        switch (val) {
9048        case DIE_OOPS:
9049                if (ftrace_dump_on_oops)
9050                        ftrace_dump(ftrace_dump_on_oops);
9051                break;
9052        default:
9053                break;
9054        }
9055        return NOTIFY_OK;
9056}
9057
9058static struct notifier_block trace_die_notifier = {
9059        .notifier_call = trace_die_handler,
9060        .priority = 200
9061};
9062
9063/*
9064 * printk is set to max of 1024, we really don't need it that big.
9065 * Nothing should be printing 1000 characters anyway.
9066 */
9067#define TRACE_MAX_PRINT         1000
9068
9069/*
9070 * Define here KERN_TRACE so that we have one place to modify
9071 * it if we decide to change what log level the ftrace dump
9072 * should be at.
9073 */
9074#define KERN_TRACE              KERN_EMERG
9075
9076void
9077trace_printk_seq(struct trace_seq *s)
9078{
9079        /* Probably should print a warning here. */
9080        if (s->seq.len >= TRACE_MAX_PRINT)
9081                s->seq.len = TRACE_MAX_PRINT;
9082
9083        /*
9084         * More paranoid code. Although the buffer size is set to
9085         * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9086         * an extra layer of protection.
9087         */
9088        if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9089                s->seq.len = s->seq.size - 1;
9090
9091        /* should be zero ended, but we are paranoid. */
9092        s->buffer[s->seq.len] = 0;
9093
9094        printk(KERN_TRACE "%s", s->buffer);
9095
9096        trace_seq_init(s);
9097}
9098
9099void trace_init_global_iter(struct trace_iterator *iter)
9100{
9101        iter->tr = &global_trace;
9102        iter->trace = iter->tr->current_trace;
9103        iter->cpu_file = RING_BUFFER_ALL_CPUS;
9104        iter->array_buffer = &global_trace.array_buffer;
9105
9106        if (iter->trace && iter->trace->open)
9107                iter->trace->open(iter);
9108
9109        /* Annotate start of buffers if we had overruns */
9110        if (ring_buffer_overruns(iter->array_buffer->buffer))
9111                iter->iter_flags |= TRACE_FILE_ANNOTATE;
9112
9113        /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9114        if (trace_clocks[iter->tr->clock_id].in_ns)
9115                iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9116}
9117
9118void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9119{
9120        /* use static because iter can be a bit big for the stack */
9121        static struct trace_iterator iter;
9122        static atomic_t dump_running;
9123        struct trace_array *tr = &global_trace;
9124        unsigned int old_userobj;
9125        unsigned long flags;
9126        int cnt = 0, cpu;
9127
9128        /* Only allow one dump user at a time. */
9129        if (atomic_inc_return(&dump_running) != 1) {
9130                atomic_dec(&dump_running);
9131                return;
9132        }
9133
9134        /*
9135         * Always turn off tracing when we dump.
9136         * We don't need to show trace output of what happens
9137         * between multiple crashes.
9138         *
9139         * If the user does a sysrq-z, then they can re-enable
9140         * tracing with echo 1 > tracing_on.
9141         */
9142        tracing_off();
9143
9144        local_irq_save(flags);
9145        printk_nmi_direct_enter();
9146
9147        /* Simulate the iterator */
9148        trace_init_global_iter(&iter);
9149
9150        for_each_tracing_cpu(cpu) {
9151                atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9152        }
9153
9154        old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9155
9156        /* don't look at user memory in panic mode */
9157        tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9158
9159        switch (oops_dump_mode) {
9160        case DUMP_ALL:
9161                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9162                break;
9163        case DUMP_ORIG:
9164                iter.cpu_file = raw_smp_processor_id();
9165                break;
9166        case DUMP_NONE:
9167                goto out_enable;
9168        default:
9169                printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9170                iter.cpu_file = RING_BUFFER_ALL_CPUS;
9171        }
9172
9173        printk(KERN_TRACE "Dumping ftrace buffer:\n");
9174
9175        /* Did function tracer already get disabled? */
9176        if (ftrace_is_dead()) {
9177                printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9178                printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9179        }
9180
9181        /*
9182         * We need to stop all tracing on all CPUS to read the
9183         * the next buffer. This is a bit expensive, but is
9184         * not done often. We fill all what we can read,
9185         * and then release the locks again.
9186         */
9187
9188        while (!trace_empty(&iter)) {
9189
9190                if (!cnt)
9191                        printk(KERN_TRACE "---------------------------------\n");
9192
9193                cnt++;
9194
9195                trace_iterator_reset(&iter);
9196                iter.iter_flags |= TRACE_FILE_LAT_FMT;
9197
9198                if (trace_find_next_entry_inc(&iter) != NULL) {
9199                        int ret;
9200
9201                        ret = print_trace_line(&iter);
9202                        if (ret != TRACE_TYPE_NO_CONSUME)
9203                                trace_consume(&iter);
9204                }
9205                touch_nmi_watchdog();
9206
9207                trace_printk_seq(&iter.seq);
9208        }
9209
9210        if (!cnt)
9211                printk(KERN_TRACE "   (ftrace buffer empty)\n");
9212        else
9213                printk(KERN_TRACE "---------------------------------\n");
9214
9215 out_enable:
9216        tr->trace_flags |= old_userobj;
9217
9218        for_each_tracing_cpu(cpu) {
9219                atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9220        }
9221        atomic_dec(&dump_running);
9222        printk_nmi_direct_exit();
9223        local_irq_restore(flags);
9224}
9225EXPORT_SYMBOL_GPL(ftrace_dump);
9226
9227int trace_run_command(const char *buf, int (*createfn)(int, char **))
9228{
9229        char **argv;
9230        int argc, ret;
9231
9232        argc = 0;
9233        ret = 0;
9234        argv = argv_split(GFP_KERNEL, buf, &argc);
9235        if (!argv)
9236                return -ENOMEM;
9237
9238        if (argc)
9239                ret = createfn(argc, argv);
9240
9241        argv_free(argv);
9242
9243        return ret;
9244}
9245
9246#define WRITE_BUFSIZE  4096
9247
9248ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9249                                size_t count, loff_t *ppos,
9250                                int (*createfn)(int, char **))
9251{
9252        char *kbuf, *buf, *tmp;
9253        int ret = 0;
9254        size_t done = 0;
9255        size_t size;
9256
9257        kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9258        if (!kbuf)
9259                return -ENOMEM;
9260
9261        while (done < count) {
9262                size = count - done;
9263
9264                if (size >= WRITE_BUFSIZE)
9265                        size = WRITE_BUFSIZE - 1;
9266
9267                if (copy_from_user(kbuf, buffer + done, size)) {
9268                        ret = -EFAULT;
9269                        goto out;
9270                }
9271                kbuf[size] = '\0';
9272                buf = kbuf;
9273                do {
9274                        tmp = strchr(buf, '\n');
9275                        if (tmp) {
9276                                *tmp = '\0';
9277                                size = tmp - buf + 1;
9278                        } else {
9279                                size = strlen(buf);
9280                                if (done + size < count) {
9281                                        if (buf != kbuf)
9282                                                break;
9283                                        /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9284                                        pr_warn("Line length is too long: Should be less than %d\n",
9285                                                WRITE_BUFSIZE - 2);
9286                                        ret = -EINVAL;
9287                                        goto out;
9288                                }
9289                        }
9290                        done += size;
9291
9292                        /* Remove comments */
9293                        tmp = strchr(buf, '#');
9294
9295                        if (tmp)
9296                                *tmp = '\0';
9297
9298                        ret = trace_run_command(buf, createfn);
9299                        if (ret)
9300                                goto out;
9301                        buf += size;
9302
9303                } while (done < count);
9304        }
9305        ret = done;
9306
9307out:
9308        kfree(kbuf);
9309
9310        return ret;
9311}
9312
9313__init static int tracer_alloc_buffers(void)
9314{
9315        int ring_buf_size;
9316        int ret = -ENOMEM;
9317
9318
9319        if (security_locked_down(LOCKDOWN_TRACEFS)) {
9320                pr_warn("Tracing disabled due to lockdown\n");
9321                return -EPERM;
9322        }
9323
9324        /*
9325         * Make sure we don't accidently add more trace options
9326         * than we have bits for.
9327         */
9328        BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9329
9330        if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9331                goto out;
9332
9333        if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9334                goto out_free_buffer_mask;
9335
9336        /* Only allocate trace_printk buffers if a trace_printk exists */
9337        if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9338                /* Must be called before global_trace.buffer is allocated */
9339                trace_printk_init_buffers();
9340
9341        /* To save memory, keep the ring buffer size to its minimum */
9342        if (ring_buffer_expanded)
9343                ring_buf_size = trace_buf_size;
9344        else
9345                ring_buf_size = 1;
9346
9347        cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9348        cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9349
9350        raw_spin_lock_init(&global_trace.start_lock);
9351
9352        /*
9353         * The prepare callbacks allocates some memory for the ring buffer. We
9354         * don't free the buffer if the if the CPU goes down. If we were to free
9355         * the buffer, then the user would lose any trace that was in the
9356         * buffer. The memory will be removed once the "instance" is removed.
9357         */
9358        ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9359                                      "trace/RB:preapre", trace_rb_cpu_prepare,
9360                                      NULL);
9361        if (ret < 0)
9362                goto out_free_cpumask;
9363        /* Used for event triggers */
9364        ret = -ENOMEM;
9365        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9366        if (!temp_buffer)
9367                goto out_rm_hp_state;
9368
9369        if (trace_create_savedcmd() < 0)
9370                goto out_free_temp_buffer;
9371
9372        /* TODO: make the number of buffers hot pluggable with CPUS */
9373        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9374                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9375                goto out_free_savedcmd;
9376        }
9377
9378        if (global_trace.buffer_disabled)
9379                tracing_off();
9380
9381        if (trace_boot_clock) {
9382                ret = tracing_set_clock(&global_trace, trace_boot_clock);
9383                if (ret < 0)
9384                        pr_warn("Trace clock %s not defined, going back to default\n",
9385                                trace_boot_clock);
9386        }
9387
9388        /*
9389         * register_tracer() might reference current_trace, so it
9390         * needs to be set before we register anything. This is
9391         * just a bootstrap of current_trace anyway.
9392         */
9393        global_trace.current_trace = &nop_trace;
9394
9395        global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9396
9397        ftrace_init_global_array_ops(&global_trace);
9398
9399        init_trace_flags_index(&global_trace);
9400
9401        register_tracer(&nop_trace);
9402
9403        /* Function tracing may start here (via kernel command line) */
9404        init_function_trace();
9405
9406        /* All seems OK, enable tracing */
9407        tracing_disabled = 0;
9408
9409        atomic_notifier_chain_register(&panic_notifier_list,
9410                                       &trace_panic_notifier);
9411
9412        register_die_notifier(&trace_die_notifier);
9413
9414        global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9415
9416        INIT_LIST_HEAD(&global_trace.systems);
9417        INIT_LIST_HEAD(&global_trace.events);
9418        INIT_LIST_HEAD(&global_trace.hist_vars);
9419        INIT_LIST_HEAD(&global_trace.err_log);
9420        list_add(&global_trace.list, &ftrace_trace_arrays);
9421
9422        apply_trace_boot_options();
9423
9424        register_snapshot_cmd();
9425
9426        return 0;
9427
9428out_free_savedcmd:
9429        free_saved_cmdlines_buffer(savedcmd);
9430out_free_temp_buffer:
9431        ring_buffer_free(temp_buffer);
9432out_rm_hp_state:
9433        cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9434out_free_cpumask:
9435        free_cpumask_var(global_trace.tracing_cpumask);
9436out_free_buffer_mask:
9437        free_cpumask_var(tracing_buffer_mask);
9438out:
9439        return ret;
9440}
9441
9442void __init early_trace_init(void)
9443{
9444        if (tracepoint_printk) {
9445                tracepoint_print_iter =
9446                        kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9447                if (MEM_FAIL(!tracepoint_print_iter,
9448                             "Failed to allocate trace iterator\n"))
9449                        tracepoint_printk = 0;
9450                else
9451                        static_key_enable(&tracepoint_printk_key.key);
9452        }
9453        tracer_alloc_buffers();
9454}
9455
9456void __init trace_init(void)
9457{
9458        trace_event_init();
9459}
9460
9461__init static int clear_boot_tracer(void)
9462{
9463        /*
9464         * The default tracer at boot buffer is an init section.
9465         * This function is called in lateinit. If we did not
9466         * find the boot tracer, then clear it out, to prevent
9467         * later registration from accessing the buffer that is
9468         * about to be freed.
9469         */
9470        if (!default_bootup_tracer)
9471                return 0;
9472
9473        printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9474               default_bootup_tracer);
9475        default_bootup_tracer = NULL;
9476
9477        return 0;
9478}
9479
9480fs_initcall(tracer_init_tracefs);
9481late_initcall_sync(clear_boot_tracer);
9482
9483#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9484__init static int tracing_set_default_clock(void)
9485{
9486        /* sched_clock_stable() is determined in late_initcall */
9487        if (!trace_boot_clock && !sched_clock_stable()) {
9488                if (security_locked_down(LOCKDOWN_TRACEFS)) {
9489                        pr_warn("Can not set tracing clock due to lockdown\n");
9490                        return -EPERM;
9491                }
9492
9493                printk(KERN_WARNING
9494                       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9495                       "If you want to keep using the local clock, then add:\n"
9496                       "  \"trace_clock=local\"\n"
9497                       "on the kernel command line\n");
9498                tracing_set_clock(&global_trace, "global");
9499        }
9500
9501        return 0;
9502}
9503late_initcall_sync(tracing_set_default_clock);
9504#endif
9505