linux/kernel/trace/trace_events.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * event tracer
   4 *
   5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   6 *
   7 *  - Added format output of fields of the trace point.
   8 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   9 *
  10 */
  11
  12#define pr_fmt(fmt) fmt
  13
  14#include <linux/workqueue.h>
  15#include <linux/security.h>
  16#include <linux/spinlock.h>
  17#include <linux/kthread.h>
  18#include <linux/tracefs.h>
  19#include <linux/uaccess.h>
  20#include <linux/module.h>
  21#include <linux/ctype.h>
  22#include <linux/sort.h>
  23#include <linux/slab.h>
  24#include <linux/delay.h>
  25
  26#include <trace/events/sched.h>
  27#include <trace/syscall.h>
  28
  29#include <asm/setup.h>
  30
  31#include "trace_output.h"
  32
  33#undef TRACE_SYSTEM
  34#define TRACE_SYSTEM "TRACE_SYSTEM"
  35
  36DEFINE_MUTEX(event_mutex);
  37
  38LIST_HEAD(ftrace_events);
  39static LIST_HEAD(ftrace_generic_fields);
  40static LIST_HEAD(ftrace_common_fields);
  41static bool eventdir_initialized;
  42
  43#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
  44
  45static struct kmem_cache *field_cachep;
  46static struct kmem_cache *file_cachep;
  47
  48static inline int system_refcount(struct event_subsystem *system)
  49{
  50        return system->ref_count;
  51}
  52
  53static int system_refcount_inc(struct event_subsystem *system)
  54{
  55        return system->ref_count++;
  56}
  57
  58static int system_refcount_dec(struct event_subsystem *system)
  59{
  60        return --system->ref_count;
  61}
  62
  63/* Double loops, do not use break, only goto's work */
  64#define do_for_each_event_file(tr, file)                        \
  65        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  66                list_for_each_entry(file, &tr->events, list)
  67
  68#define do_for_each_event_file_safe(tr, file)                   \
  69        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  70                struct trace_event_file *___n;                          \
  71                list_for_each_entry_safe(file, ___n, &tr->events, list)
  72
  73#define while_for_each_event_file()             \
  74        }
  75
  76static struct ftrace_event_field *
  77__find_event_field(struct list_head *head, char *name)
  78{
  79        struct ftrace_event_field *field;
  80
  81        list_for_each_entry(field, head, link) {
  82                if (!strcmp(field->name, name))
  83                        return field;
  84        }
  85
  86        return NULL;
  87}
  88
  89struct ftrace_event_field *
  90trace_find_event_field(struct trace_event_call *call, char *name)
  91{
  92        struct ftrace_event_field *field;
  93        struct list_head *head;
  94
  95        head = trace_get_fields(call);
  96        field = __find_event_field(head, name);
  97        if (field)
  98                return field;
  99
 100        field = __find_event_field(&ftrace_generic_fields, name);
 101        if (field)
 102                return field;
 103
 104        return __find_event_field(&ftrace_common_fields, name);
 105}
 106
 107static int __trace_define_field(struct list_head *head, const char *type,
 108                                const char *name, int offset, int size,
 109                                int is_signed, int filter_type)
 110{
 111        struct ftrace_event_field *field;
 112
 113        field = kmem_cache_alloc(field_cachep, GFP_TRACE);
 114        if (!field)
 115                return -ENOMEM;
 116
 117        field->name = name;
 118        field->type = type;
 119
 120        if (filter_type == FILTER_OTHER)
 121                field->filter_type = filter_assign_type(type);
 122        else
 123                field->filter_type = filter_type;
 124
 125        field->offset = offset;
 126        field->size = size;
 127        field->is_signed = is_signed;
 128
 129        list_add(&field->link, head);
 130
 131        return 0;
 132}
 133
 134int trace_define_field(struct trace_event_call *call, const char *type,
 135                       const char *name, int offset, int size, int is_signed,
 136                       int filter_type)
 137{
 138        struct list_head *head;
 139
 140        if (WARN_ON(!call->class))
 141                return 0;
 142
 143        head = trace_get_fields(call);
 144        return __trace_define_field(head, type, name, offset, size,
 145                                    is_signed, filter_type);
 146}
 147EXPORT_SYMBOL_GPL(trace_define_field);
 148
 149#define __generic_field(type, item, filter_type)                        \
 150        ret = __trace_define_field(&ftrace_generic_fields, #type,       \
 151                                   #item, 0, 0, is_signed_type(type),   \
 152                                   filter_type);                        \
 153        if (ret)                                                        \
 154                return ret;
 155
 156#define __common_field(type, item)                                      \
 157        ret = __trace_define_field(&ftrace_common_fields, #type,        \
 158                                   "common_" #item,                     \
 159                                   offsetof(typeof(ent), item),         \
 160                                   sizeof(ent.item),                    \
 161                                   is_signed_type(type), FILTER_OTHER); \
 162        if (ret)                                                        \
 163                return ret;
 164
 165static int trace_define_generic_fields(void)
 166{
 167        int ret;
 168
 169        __generic_field(int, CPU, FILTER_CPU);
 170        __generic_field(int, cpu, FILTER_CPU);
 171        __generic_field(char *, COMM, FILTER_COMM);
 172        __generic_field(char *, comm, FILTER_COMM);
 173
 174        return ret;
 175}
 176
 177static int trace_define_common_fields(void)
 178{
 179        int ret;
 180        struct trace_entry ent;
 181
 182        __common_field(unsigned short, type);
 183        __common_field(unsigned char, flags);
 184        /* Holds both preempt_count and migrate_disable */
 185        __common_field(unsigned char, preempt_count);
 186        __common_field(int, pid);
 187
 188        return ret;
 189}
 190
 191static void trace_destroy_fields(struct trace_event_call *call)
 192{
 193        struct ftrace_event_field *field, *next;
 194        struct list_head *head;
 195
 196        head = trace_get_fields(call);
 197        list_for_each_entry_safe(field, next, head, link) {
 198                list_del(&field->link);
 199                kmem_cache_free(field_cachep, field);
 200        }
 201}
 202
 203/*
 204 * run-time version of trace_event_get_offsets_<call>() that returns the last
 205 * accessible offset of trace fields excluding __dynamic_array bytes
 206 */
 207int trace_event_get_offsets(struct trace_event_call *call)
 208{
 209        struct ftrace_event_field *tail;
 210        struct list_head *head;
 211
 212        head = trace_get_fields(call);
 213        /*
 214         * head->next points to the last field with the largest offset,
 215         * since it was added last by trace_define_field()
 216         */
 217        tail = list_first_entry(head, struct ftrace_event_field, link);
 218        return tail->offset + tail->size;
 219}
 220
 221/*
 222 * Check if the referenced field is an array and return true,
 223 * as arrays are OK to dereference.
 224 */
 225static bool test_field(const char *fmt, struct trace_event_call *call)
 226{
 227        struct trace_event_fields *field = call->class->fields_array;
 228        const char *array_descriptor;
 229        const char *p = fmt;
 230        int len;
 231
 232        if (!(len = str_has_prefix(fmt, "REC->")))
 233                return false;
 234        fmt += len;
 235        for (p = fmt; *p; p++) {
 236                if (!isalnum(*p) && *p != '_')
 237                        break;
 238        }
 239        len = p - fmt;
 240
 241        for (; field->type; field++) {
 242                if (strncmp(field->name, fmt, len) ||
 243                    field->name[len])
 244                        continue;
 245                array_descriptor = strchr(field->type, '[');
 246                /* This is an array and is OK to dereference. */
 247                return array_descriptor != NULL;
 248        }
 249        return false;
 250}
 251
 252/*
 253 * Examine the print fmt of the event looking for unsafe dereference
 254 * pointers using %p* that could be recorded in the trace event and
 255 * much later referenced after the pointer was freed. Dereferencing
 256 * pointers are OK, if it is dereferenced into the event itself.
 257 */
 258static void test_event_printk(struct trace_event_call *call)
 259{
 260        u64 dereference_flags = 0;
 261        bool first = true;
 262        const char *fmt, *c, *r, *a;
 263        int parens = 0;
 264        char in_quote = 0;
 265        int start_arg = 0;
 266        int arg = 0;
 267        int i;
 268
 269        fmt = call->print_fmt;
 270
 271        if (!fmt)
 272                return;
 273
 274        for (i = 0; fmt[i]; i++) {
 275                switch (fmt[i]) {
 276                case '\\':
 277                        i++;
 278                        if (!fmt[i])
 279                                return;
 280                        continue;
 281                case '"':
 282                case '\'':
 283                        /*
 284                         * The print fmt starts with a string that
 285                         * is processed first to find %p* usage,
 286                         * then after the first string, the print fmt
 287                         * contains arguments that are used to check
 288                         * if the dereferenced %p* usage is safe.
 289                         */
 290                        if (first) {
 291                                if (fmt[i] == '\'')
 292                                        continue;
 293                                if (in_quote) {
 294                                        arg = 0;
 295                                        first = false;
 296                                        /*
 297                                         * If there was no %p* uses
 298                                         * the fmt is OK.
 299                                         */
 300                                        if (!dereference_flags)
 301                                                return;
 302                                }
 303                        }
 304                        if (in_quote) {
 305                                if (in_quote == fmt[i])
 306                                        in_quote = 0;
 307                        } else {
 308                                in_quote = fmt[i];
 309                        }
 310                        continue;
 311                case '%':
 312                        if (!first || !in_quote)
 313                                continue;
 314                        i++;
 315                        if (!fmt[i])
 316                                return;
 317                        switch (fmt[i]) {
 318                        case '%':
 319                                continue;
 320                        case 'p':
 321                                /* Find dereferencing fields */
 322                                switch (fmt[i + 1]) {
 323                                case 'B': case 'R': case 'r':
 324                                case 'b': case 'M': case 'm':
 325                                case 'I': case 'i': case 'E':
 326                                case 'U': case 'V': case 'N':
 327                                case 'a': case 'd': case 'D':
 328                                case 'g': case 't': case 'C':
 329                                case 'O': case 'f':
 330                                        if (WARN_ONCE(arg == 63,
 331                                                      "Too many args for event: %s",
 332                                                      trace_event_name(call)))
 333                                                return;
 334                                        dereference_flags |= 1ULL << arg;
 335                                }
 336                                break;
 337                        default:
 338                        {
 339                                bool star = false;
 340                                int j;
 341
 342                                /* Increment arg if %*s exists. */
 343                                for (j = 0; fmt[i + j]; j++) {
 344                                        if (isdigit(fmt[i + j]) ||
 345                                            fmt[i + j] == '.')
 346                                                continue;
 347                                        if (fmt[i + j] == '*') {
 348                                                star = true;
 349                                                continue;
 350                                        }
 351                                        if ((fmt[i + j] == 's') && star)
 352                                                arg++;
 353                                        break;
 354                                }
 355                                break;
 356                        } /* default */
 357
 358                        } /* switch */
 359                        arg++;
 360                        continue;
 361                case '(':
 362                        if (in_quote)
 363                                continue;
 364                        parens++;
 365                        continue;
 366                case ')':
 367                        if (in_quote)
 368                                continue;
 369                        parens--;
 370                        if (WARN_ONCE(parens < 0,
 371                                      "Paren mismatch for event: %s\narg='%s'\n%*s",
 372                                      trace_event_name(call),
 373                                      fmt + start_arg,
 374                                      (i - start_arg) + 5, "^"))
 375                                return;
 376                        continue;
 377                case ',':
 378                        if (in_quote || parens)
 379                                continue;
 380                        i++;
 381                        while (isspace(fmt[i]))
 382                                i++;
 383                        start_arg = i;
 384                        if (!(dereference_flags & (1ULL << arg)))
 385                                goto next_arg;
 386
 387                        /* Find the REC-> in the argument */
 388                        c = strchr(fmt + i, ',');
 389                        r = strstr(fmt + i, "REC->");
 390                        if (r && (!c || r < c)) {
 391                                /*
 392                                 * Addresses of events on the buffer,
 393                                 * or an array on the buffer is
 394                                 * OK to dereference.
 395                                 * There's ways to fool this, but
 396                                 * this is to catch common mistakes,
 397                                 * not malicious code.
 398                                 */
 399                                a = strchr(fmt + i, '&');
 400                                if ((a && (a < r)) || test_field(r, call))
 401                                        dereference_flags &= ~(1ULL << arg);
 402                        }
 403                next_arg:
 404                        i--;
 405                        arg++;
 406                }
 407        }
 408
 409        /*
 410         * If you triggered the below warning, the trace event reported
 411         * uses an unsafe dereference pointer %p*. As the data stored
 412         * at the trace event time may no longer exist when the trace
 413         * event is printed, dereferencing to the original source is
 414         * unsafe. The source of the dereference must be copied into the
 415         * event itself, and the dereference must access the copy instead.
 416         */
 417        if (WARN_ON_ONCE(dereference_flags)) {
 418                arg = 1;
 419                while (!(dereference_flags & 1)) {
 420                        dereference_flags >>= 1;
 421                        arg++;
 422                }
 423                pr_warn("event %s has unsafe dereference of argument %d\n",
 424                        trace_event_name(call), arg);
 425                pr_warn("print_fmt: %s\n", fmt);
 426        }
 427}
 428
 429int trace_event_raw_init(struct trace_event_call *call)
 430{
 431        int id;
 432
 433        id = register_trace_event(&call->event);
 434        if (!id)
 435                return -ENODEV;
 436
 437        test_event_printk(call);
 438
 439        return 0;
 440}
 441EXPORT_SYMBOL_GPL(trace_event_raw_init);
 442
 443bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
 444{
 445        struct trace_array *tr = trace_file->tr;
 446        struct trace_array_cpu *data;
 447        struct trace_pid_list *no_pid_list;
 448        struct trace_pid_list *pid_list;
 449
 450        pid_list = rcu_dereference_raw(tr->filtered_pids);
 451        no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
 452
 453        if (!pid_list && !no_pid_list)
 454                return false;
 455
 456        data = this_cpu_ptr(tr->array_buffer.data);
 457
 458        return data->ignore_pid;
 459}
 460EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
 461
 462void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 463                                 struct trace_event_file *trace_file,
 464                                 unsigned long len)
 465{
 466        struct trace_event_call *event_call = trace_file->event_call;
 467
 468        if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
 469            trace_event_ignore_this_pid(trace_file))
 470                return NULL;
 471
 472        /*
 473         * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
 474         * preemption (adding one to the preempt_count). Since we are
 475         * interested in the preempt_count at the time the tracepoint was
 476         * hit, we need to subtract one to offset the increment.
 477         */
 478        fbuffer->trace_ctx = tracing_gen_ctx_dec();
 479        fbuffer->trace_file = trace_file;
 480
 481        fbuffer->event =
 482                trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
 483                                                event_call->event.type, len,
 484                                                fbuffer->trace_ctx);
 485        if (!fbuffer->event)
 486                return NULL;
 487
 488        fbuffer->regs = NULL;
 489        fbuffer->entry = ring_buffer_event_data(fbuffer->event);
 490        return fbuffer->entry;
 491}
 492EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
 493
 494int trace_event_reg(struct trace_event_call *call,
 495                    enum trace_reg type, void *data)
 496{
 497        struct trace_event_file *file = data;
 498
 499        WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
 500        switch (type) {
 501        case TRACE_REG_REGISTER:
 502                return tracepoint_probe_register(call->tp,
 503                                                 call->class->probe,
 504                                                 file);
 505        case TRACE_REG_UNREGISTER:
 506                tracepoint_probe_unregister(call->tp,
 507                                            call->class->probe,
 508                                            file);
 509                return 0;
 510
 511#ifdef CONFIG_PERF_EVENTS
 512        case TRACE_REG_PERF_REGISTER:
 513                return tracepoint_probe_register(call->tp,
 514                                                 call->class->perf_probe,
 515                                                 call);
 516        case TRACE_REG_PERF_UNREGISTER:
 517                tracepoint_probe_unregister(call->tp,
 518                                            call->class->perf_probe,
 519                                            call);
 520                return 0;
 521        case TRACE_REG_PERF_OPEN:
 522        case TRACE_REG_PERF_CLOSE:
 523        case TRACE_REG_PERF_ADD:
 524        case TRACE_REG_PERF_DEL:
 525                return 0;
 526#endif
 527        }
 528        return 0;
 529}
 530EXPORT_SYMBOL_GPL(trace_event_reg);
 531
 532void trace_event_enable_cmd_record(bool enable)
 533{
 534        struct trace_event_file *file;
 535        struct trace_array *tr;
 536
 537        lockdep_assert_held(&event_mutex);
 538
 539        do_for_each_event_file(tr, file) {
 540
 541                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 542                        continue;
 543
 544                if (enable) {
 545                        tracing_start_cmdline_record();
 546                        set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 547                } else {
 548                        tracing_stop_cmdline_record();
 549                        clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 550                }
 551        } while_for_each_event_file();
 552}
 553
 554void trace_event_enable_tgid_record(bool enable)
 555{
 556        struct trace_event_file *file;
 557        struct trace_array *tr;
 558
 559        lockdep_assert_held(&event_mutex);
 560
 561        do_for_each_event_file(tr, file) {
 562                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 563                        continue;
 564
 565                if (enable) {
 566                        tracing_start_tgid_record();
 567                        set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 568                } else {
 569                        tracing_stop_tgid_record();
 570                        clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
 571                                  &file->flags);
 572                }
 573        } while_for_each_event_file();
 574}
 575
 576static int __ftrace_event_enable_disable(struct trace_event_file *file,
 577                                         int enable, int soft_disable)
 578{
 579        struct trace_event_call *call = file->event_call;
 580        struct trace_array *tr = file->tr;
 581        unsigned long file_flags = file->flags;
 582        int ret = 0;
 583        int disable;
 584
 585        switch (enable) {
 586        case 0:
 587                /*
 588                 * When soft_disable is set and enable is cleared, the sm_ref
 589                 * reference counter is decremented. If it reaches 0, we want
 590                 * to clear the SOFT_DISABLED flag but leave the event in the
 591                 * state that it was. That is, if the event was enabled and
 592                 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
 593                 * is set we do not want the event to be enabled before we
 594                 * clear the bit.
 595                 *
 596                 * When soft_disable is not set but the SOFT_MODE flag is,
 597                 * we do nothing. Do not disable the tracepoint, otherwise
 598                 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
 599                 */
 600                if (soft_disable) {
 601                        if (atomic_dec_return(&file->sm_ref) > 0)
 602                                break;
 603                        disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
 604                        clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 605                } else
 606                        disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
 607
 608                if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
 609                        clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 610                        if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
 611                                tracing_stop_cmdline_record();
 612                                clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 613                        }
 614
 615                        if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
 616                                tracing_stop_tgid_record();
 617                                clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 618                        }
 619
 620                        call->class->reg(call, TRACE_REG_UNREGISTER, file);
 621                }
 622                /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
 623                if (file->flags & EVENT_FILE_FL_SOFT_MODE)
 624                        set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 625                else
 626                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 627                break;
 628        case 1:
 629                /*
 630                 * When soft_disable is set and enable is set, we want to
 631                 * register the tracepoint for the event, but leave the event
 632                 * as is. That means, if the event was already enabled, we do
 633                 * nothing (but set SOFT_MODE). If the event is disabled, we
 634                 * set SOFT_DISABLED before enabling the event tracepoint, so
 635                 * it still seems to be disabled.
 636                 */
 637                if (!soft_disable)
 638                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 639                else {
 640                        if (atomic_inc_return(&file->sm_ref) > 1)
 641                                break;
 642                        set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 643                }
 644
 645                if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
 646                        bool cmd = false, tgid = false;
 647
 648                        /* Keep the event disabled, when going to SOFT_MODE. */
 649                        if (soft_disable)
 650                                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 651
 652                        if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
 653                                cmd = true;
 654                                tracing_start_cmdline_record();
 655                                set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 656                        }
 657
 658                        if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
 659                                tgid = true;
 660                                tracing_start_tgid_record();
 661                                set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 662                        }
 663
 664                        ret = call->class->reg(call, TRACE_REG_REGISTER, file);
 665                        if (ret) {
 666                                if (cmd)
 667                                        tracing_stop_cmdline_record();
 668                                if (tgid)
 669                                        tracing_stop_tgid_record();
 670                                pr_info("event trace: Could not enable event "
 671                                        "%s\n", trace_event_name(call));
 672                                break;
 673                        }
 674                        set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 675
 676                        /* WAS_ENABLED gets set but never cleared. */
 677                        set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
 678                }
 679                break;
 680        }
 681
 682        /* Enable or disable use of trace_buffered_event */
 683        if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
 684            (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
 685                if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
 686                        trace_buffered_event_enable();
 687                else
 688                        trace_buffered_event_disable();
 689        }
 690
 691        return ret;
 692}
 693
 694int trace_event_enable_disable(struct trace_event_file *file,
 695                               int enable, int soft_disable)
 696{
 697        return __ftrace_event_enable_disable(file, enable, soft_disable);
 698}
 699
 700static int ftrace_event_enable_disable(struct trace_event_file *file,
 701                                       int enable)
 702{
 703        return __ftrace_event_enable_disable(file, enable, 0);
 704}
 705
 706static void ftrace_clear_events(struct trace_array *tr)
 707{
 708        struct trace_event_file *file;
 709
 710        mutex_lock(&event_mutex);
 711        list_for_each_entry(file, &tr->events, list) {
 712                ftrace_event_enable_disable(file, 0);
 713        }
 714        mutex_unlock(&event_mutex);
 715}
 716
 717static void
 718event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 719{
 720        struct trace_pid_list *pid_list;
 721        struct trace_array *tr = data;
 722
 723        pid_list = rcu_dereference_raw(tr->filtered_pids);
 724        trace_filter_add_remove_task(pid_list, NULL, task);
 725
 726        pid_list = rcu_dereference_raw(tr->filtered_no_pids);
 727        trace_filter_add_remove_task(pid_list, NULL, task);
 728}
 729
 730static void
 731event_filter_pid_sched_process_fork(void *data,
 732                                    struct task_struct *self,
 733                                    struct task_struct *task)
 734{
 735        struct trace_pid_list *pid_list;
 736        struct trace_array *tr = data;
 737
 738        pid_list = rcu_dereference_sched(tr->filtered_pids);
 739        trace_filter_add_remove_task(pid_list, self, task);
 740
 741        pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 742        trace_filter_add_remove_task(pid_list, self, task);
 743}
 744
 745void trace_event_follow_fork(struct trace_array *tr, bool enable)
 746{
 747        if (enable) {
 748                register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
 749                                                       tr, INT_MIN);
 750                register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
 751                                                       tr, INT_MAX);
 752        } else {
 753                unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
 754                                                    tr);
 755                unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
 756                                                    tr);
 757        }
 758}
 759
 760static void
 761event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 762                    struct task_struct *prev, struct task_struct *next)
 763{
 764        struct trace_array *tr = data;
 765        struct trace_pid_list *no_pid_list;
 766        struct trace_pid_list *pid_list;
 767        bool ret;
 768
 769        pid_list = rcu_dereference_sched(tr->filtered_pids);
 770        no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 771
 772        /*
 773         * Sched switch is funny, as we only want to ignore it
 774         * in the notrace case if both prev and next should be ignored.
 775         */
 776        ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
 777                trace_ignore_this_task(NULL, no_pid_list, next);
 778
 779        this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
 780                       (trace_ignore_this_task(pid_list, NULL, prev) &&
 781                        trace_ignore_this_task(pid_list, NULL, next)));
 782}
 783
 784static void
 785event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
 786                    struct task_struct *prev, struct task_struct *next)
 787{
 788        struct trace_array *tr = data;
 789        struct trace_pid_list *no_pid_list;
 790        struct trace_pid_list *pid_list;
 791
 792        pid_list = rcu_dereference_sched(tr->filtered_pids);
 793        no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 794
 795        this_cpu_write(tr->array_buffer.data->ignore_pid,
 796                       trace_ignore_this_task(pid_list, no_pid_list, next));
 797}
 798
 799static void
 800event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 801{
 802        struct trace_array *tr = data;
 803        struct trace_pid_list *no_pid_list;
 804        struct trace_pid_list *pid_list;
 805
 806        /* Nothing to do if we are already tracing */
 807        if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
 808                return;
 809
 810        pid_list = rcu_dereference_sched(tr->filtered_pids);
 811        no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 812
 813        this_cpu_write(tr->array_buffer.data->ignore_pid,
 814                       trace_ignore_this_task(pid_list, no_pid_list, task));
 815}
 816
 817static void
 818event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 819{
 820        struct trace_array *tr = data;
 821        struct trace_pid_list *no_pid_list;
 822        struct trace_pid_list *pid_list;
 823
 824        /* Nothing to do if we are not tracing */
 825        if (this_cpu_read(tr->array_buffer.data->ignore_pid))
 826                return;
 827
 828        pid_list = rcu_dereference_sched(tr->filtered_pids);
 829        no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 830
 831        /* Set tracing if current is enabled */
 832        this_cpu_write(tr->array_buffer.data->ignore_pid,
 833                       trace_ignore_this_task(pid_list, no_pid_list, current));
 834}
 835
 836static void unregister_pid_events(struct trace_array *tr)
 837{
 838        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
 839        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
 840
 841        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 842        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 843
 844        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
 845        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
 846
 847        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
 848        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
 849}
 850
 851static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
 852{
 853        struct trace_pid_list *pid_list;
 854        struct trace_pid_list *no_pid_list;
 855        struct trace_event_file *file;
 856        int cpu;
 857
 858        pid_list = rcu_dereference_protected(tr->filtered_pids,
 859                                             lockdep_is_held(&event_mutex));
 860        no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
 861                                             lockdep_is_held(&event_mutex));
 862
 863        /* Make sure there's something to do */
 864        if (!pid_type_enabled(type, pid_list, no_pid_list))
 865                return;
 866
 867        if (!still_need_pid_events(type, pid_list, no_pid_list)) {
 868                unregister_pid_events(tr);
 869
 870                list_for_each_entry(file, &tr->events, list) {
 871                        clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 872                }
 873
 874                for_each_possible_cpu(cpu)
 875                        per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
 876        }
 877
 878        if (type & TRACE_PIDS)
 879                rcu_assign_pointer(tr->filtered_pids, NULL);
 880
 881        if (type & TRACE_NO_PIDS)
 882                rcu_assign_pointer(tr->filtered_no_pids, NULL);
 883
 884        /* Wait till all users are no longer using pid filtering */
 885        tracepoint_synchronize_unregister();
 886
 887        if ((type & TRACE_PIDS) && pid_list)
 888                trace_free_pid_list(pid_list);
 889
 890        if ((type & TRACE_NO_PIDS) && no_pid_list)
 891                trace_free_pid_list(no_pid_list);
 892}
 893
 894static void ftrace_clear_event_pids(struct trace_array *tr, int type)
 895{
 896        mutex_lock(&event_mutex);
 897        __ftrace_clear_event_pids(tr, type);
 898        mutex_unlock(&event_mutex);
 899}
 900
 901static void __put_system(struct event_subsystem *system)
 902{
 903        struct event_filter *filter = system->filter;
 904
 905        WARN_ON_ONCE(system_refcount(system) == 0);
 906        if (system_refcount_dec(system))
 907                return;
 908
 909        list_del(&system->list);
 910
 911        if (filter) {
 912                kfree(filter->filter_string);
 913                kfree(filter);
 914        }
 915        kfree_const(system->name);
 916        kfree(system);
 917}
 918
 919static void __get_system(struct event_subsystem *system)
 920{
 921        WARN_ON_ONCE(system_refcount(system) == 0);
 922        system_refcount_inc(system);
 923}
 924
 925static void __get_system_dir(struct trace_subsystem_dir *dir)
 926{
 927        WARN_ON_ONCE(dir->ref_count == 0);
 928        dir->ref_count++;
 929        __get_system(dir->subsystem);
 930}
 931
 932static void __put_system_dir(struct trace_subsystem_dir *dir)
 933{
 934        WARN_ON_ONCE(dir->ref_count == 0);
 935        /* If the subsystem is about to be freed, the dir must be too */
 936        WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
 937
 938        __put_system(dir->subsystem);
 939        if (!--dir->ref_count)
 940                kfree(dir);
 941}
 942
 943static void put_system(struct trace_subsystem_dir *dir)
 944{
 945        mutex_lock(&event_mutex);
 946        __put_system_dir(dir);
 947        mutex_unlock(&event_mutex);
 948}
 949
 950static void remove_subsystem(struct trace_subsystem_dir *dir)
 951{
 952        if (!dir)
 953                return;
 954
 955        if (!--dir->nr_events) {
 956                tracefs_remove(dir->entry);
 957                list_del(&dir->list);
 958                __put_system_dir(dir);
 959        }
 960}
 961
 962static void remove_event_file_dir(struct trace_event_file *file)
 963{
 964        struct dentry *dir = file->dir;
 965        struct dentry *child;
 966
 967        if (dir) {
 968                spin_lock(&dir->d_lock);        /* probably unneeded */
 969                list_for_each_entry(child, &dir->d_subdirs, d_child) {
 970                        if (d_really_is_positive(child))        /* probably unneeded */
 971                                d_inode(child)->i_private = NULL;
 972                }
 973                spin_unlock(&dir->d_lock);
 974
 975                tracefs_remove(dir);
 976        }
 977
 978        list_del(&file->list);
 979        remove_subsystem(file->system);
 980        free_event_filter(file->filter);
 981        kmem_cache_free(file_cachep, file);
 982}
 983
 984/*
 985 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 986 */
 987static int
 988__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
 989                              const char *sub, const char *event, int set)
 990{
 991        struct trace_event_file *file;
 992        struct trace_event_call *call;
 993        const char *name;
 994        int ret = -EINVAL;
 995        int eret = 0;
 996
 997        list_for_each_entry(file, &tr->events, list) {
 998
 999                call = file->event_call;
1000                name = trace_event_name(call);
1001
1002                if (!name || !call->class || !call->class->reg)
1003                        continue;
1004
1005                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
1006                        continue;
1007
1008                if (match &&
1009                    strcmp(match, name) != 0 &&
1010                    strcmp(match, call->class->system) != 0)
1011                        continue;
1012
1013                if (sub && strcmp(sub, call->class->system) != 0)
1014                        continue;
1015
1016                if (event && strcmp(event, name) != 0)
1017                        continue;
1018
1019                ret = ftrace_event_enable_disable(file, set);
1020
1021                /*
1022                 * Save the first error and return that. Some events
1023                 * may still have been enabled, but let the user
1024                 * know that something went wrong.
1025                 */
1026                if (ret && !eret)
1027                        eret = ret;
1028
1029                ret = eret;
1030        }
1031
1032        return ret;
1033}
1034
1035static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
1036                                  const char *sub, const char *event, int set)
1037{
1038        int ret;
1039
1040        mutex_lock(&event_mutex);
1041        ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
1042        mutex_unlock(&event_mutex);
1043
1044        return ret;
1045}
1046
1047int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
1048{
1049        char *event = NULL, *sub = NULL, *match;
1050        int ret;
1051
1052        if (!tr)
1053                return -ENOENT;
1054        /*
1055         * The buf format can be <subsystem>:<event-name>
1056         *  *:<event-name> means any event by that name.
1057         *  :<event-name> is the same.
1058         *
1059         *  <subsystem>:* means all events in that subsystem
1060         *  <subsystem>: means the same.
1061         *
1062         *  <name> (no ':') means all events in a subsystem with
1063         *  the name <name> or any event that matches <name>
1064         */
1065
1066        match = strsep(&buf, ":");
1067        if (buf) {
1068                sub = match;
1069                event = buf;
1070                match = NULL;
1071
1072                if (!strlen(sub) || strcmp(sub, "*") == 0)
1073                        sub = NULL;
1074                if (!strlen(event) || strcmp(event, "*") == 0)
1075                        event = NULL;
1076        }
1077
1078        ret = __ftrace_set_clr_event(tr, match, sub, event, set);
1079
1080        /* Put back the colon to allow this to be called again */
1081        if (buf)
1082                *(buf - 1) = ':';
1083
1084        return ret;
1085}
1086
1087/**
1088 * trace_set_clr_event - enable or disable an event
1089 * @system: system name to match (NULL for any system)
1090 * @event: event name to match (NULL for all events, within system)
1091 * @set: 1 to enable, 0 to disable
1092 *
1093 * This is a way for other parts of the kernel to enable or disable
1094 * event recording.
1095 *
1096 * Returns 0 on success, -EINVAL if the parameters do not match any
1097 * registered events.
1098 */
1099int trace_set_clr_event(const char *system, const char *event, int set)
1100{
1101        struct trace_array *tr = top_trace_array();
1102
1103        if (!tr)
1104                return -ENODEV;
1105
1106        return __ftrace_set_clr_event(tr, NULL, system, event, set);
1107}
1108EXPORT_SYMBOL_GPL(trace_set_clr_event);
1109
1110/**
1111 * trace_array_set_clr_event - enable or disable an event for a trace array.
1112 * @tr: concerned trace array.
1113 * @system: system name to match (NULL for any system)
1114 * @event: event name to match (NULL for all events, within system)
1115 * @enable: true to enable, false to disable
1116 *
1117 * This is a way for other parts of the kernel to enable or disable
1118 * event recording.
1119 *
1120 * Returns 0 on success, -EINVAL if the parameters do not match any
1121 * registered events.
1122 */
1123int trace_array_set_clr_event(struct trace_array *tr, const char *system,
1124                const char *event, bool enable)
1125{
1126        int set;
1127
1128        if (!tr)
1129                return -ENOENT;
1130
1131        set = (enable == true) ? 1 : 0;
1132        return __ftrace_set_clr_event(tr, NULL, system, event, set);
1133}
1134EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
1135
1136/* 128 should be much more than enough */
1137#define EVENT_BUF_SIZE          127
1138
1139static ssize_t
1140ftrace_event_write(struct file *file, const char __user *ubuf,
1141                   size_t cnt, loff_t *ppos)
1142{
1143        struct trace_parser parser;
1144        struct seq_file *m = file->private_data;
1145        struct trace_array *tr = m->private;
1146        ssize_t read, ret;
1147
1148        if (!cnt)
1149                return 0;
1150
1151        ret = tracing_update_buffers();
1152        if (ret < 0)
1153                return ret;
1154
1155        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1156                return -ENOMEM;
1157
1158        read = trace_get_user(&parser, ubuf, cnt, ppos);
1159
1160        if (read >= 0 && trace_parser_loaded((&parser))) {
1161                int set = 1;
1162
1163                if (*parser.buffer == '!')
1164                        set = 0;
1165
1166                ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
1167                if (ret)
1168                        goto out_put;
1169        }
1170
1171        ret = read;
1172
1173 out_put:
1174        trace_parser_put(&parser);
1175
1176        return ret;
1177}
1178
1179static void *
1180t_next(struct seq_file *m, void *v, loff_t *pos)
1181{
1182        struct trace_event_file *file = v;
1183        struct trace_event_call *call;
1184        struct trace_array *tr = m->private;
1185
1186        (*pos)++;
1187
1188        list_for_each_entry_continue(file, &tr->events, list) {
1189                call = file->event_call;
1190                /*
1191                 * The ftrace subsystem is for showing formats only.
1192                 * They can not be enabled or disabled via the event files.
1193                 */
1194                if (call->class && call->class->reg &&
1195                    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1196                        return file;
1197        }
1198
1199        return NULL;
1200}
1201
1202static void *t_start(struct seq_file *m, loff_t *pos)
1203{
1204        struct trace_event_file *file;
1205        struct trace_array *tr = m->private;
1206        loff_t l;
1207
1208        mutex_lock(&event_mutex);
1209
1210        file = list_entry(&tr->events, struct trace_event_file, list);
1211        for (l = 0; l <= *pos; ) {
1212                file = t_next(m, file, &l);
1213                if (!file)
1214                        break;
1215        }
1216        return file;
1217}
1218
1219static void *
1220s_next(struct seq_file *m, void *v, loff_t *pos)
1221{
1222        struct trace_event_file *file = v;
1223        struct trace_array *tr = m->private;
1224
1225        (*pos)++;
1226
1227        list_for_each_entry_continue(file, &tr->events, list) {
1228                if (file->flags & EVENT_FILE_FL_ENABLED)
1229                        return file;
1230        }
1231
1232        return NULL;
1233}
1234
1235static void *s_start(struct seq_file *m, loff_t *pos)
1236{
1237        struct trace_event_file *file;
1238        struct trace_array *tr = m->private;
1239        loff_t l;
1240
1241        mutex_lock(&event_mutex);
1242
1243        file = list_entry(&tr->events, struct trace_event_file, list);
1244        for (l = 0; l <= *pos; ) {
1245                file = s_next(m, file, &l);
1246                if (!file)
1247                        break;
1248        }
1249        return file;
1250}
1251
1252static int t_show(struct seq_file *m, void *v)
1253{
1254        struct trace_event_file *file = v;
1255        struct trace_event_call *call = file->event_call;
1256
1257        if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
1258                seq_printf(m, "%s:", call->class->system);
1259        seq_printf(m, "%s\n", trace_event_name(call));
1260
1261        return 0;
1262}
1263
1264static void t_stop(struct seq_file *m, void *p)
1265{
1266        mutex_unlock(&event_mutex);
1267}
1268
1269static void *
1270__next(struct seq_file *m, void *v, loff_t *pos, int type)
1271{
1272        struct trace_array *tr = m->private;
1273        struct trace_pid_list *pid_list;
1274
1275        if (type == TRACE_PIDS)
1276                pid_list = rcu_dereference_sched(tr->filtered_pids);
1277        else
1278                pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1279
1280        return trace_pid_next(pid_list, v, pos);
1281}
1282
1283static void *
1284p_next(struct seq_file *m, void *v, loff_t *pos)
1285{
1286        return __next(m, v, pos, TRACE_PIDS);
1287}
1288
1289static void *
1290np_next(struct seq_file *m, void *v, loff_t *pos)
1291{
1292        return __next(m, v, pos, TRACE_NO_PIDS);
1293}
1294
1295static void *__start(struct seq_file *m, loff_t *pos, int type)
1296        __acquires(RCU)
1297{
1298        struct trace_pid_list *pid_list;
1299        struct trace_array *tr = m->private;
1300
1301        /*
1302         * Grab the mutex, to keep calls to p_next() having the same
1303         * tr->filtered_pids as p_start() has.
1304         * If we just passed the tr->filtered_pids around, then RCU would
1305         * have been enough, but doing that makes things more complex.
1306         */
1307        mutex_lock(&event_mutex);
1308        rcu_read_lock_sched();
1309
1310        if (type == TRACE_PIDS)
1311                pid_list = rcu_dereference_sched(tr->filtered_pids);
1312        else
1313                pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1314
1315        if (!pid_list)
1316                return NULL;
1317
1318        return trace_pid_start(pid_list, pos);
1319}
1320
1321static void *p_start(struct seq_file *m, loff_t *pos)
1322        __acquires(RCU)
1323{
1324        return __start(m, pos, TRACE_PIDS);
1325}
1326
1327static void *np_start(struct seq_file *m, loff_t *pos)
1328        __acquires(RCU)
1329{
1330        return __start(m, pos, TRACE_NO_PIDS);
1331}
1332
1333static void p_stop(struct seq_file *m, void *p)
1334        __releases(RCU)
1335{
1336        rcu_read_unlock_sched();
1337        mutex_unlock(&event_mutex);
1338}
1339
1340static ssize_t
1341event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1342                  loff_t *ppos)
1343{
1344        struct trace_event_file *file;
1345        unsigned long flags;
1346        char buf[4] = "0";
1347
1348        mutex_lock(&event_mutex);
1349        file = event_file_data(filp);
1350        if (likely(file))
1351                flags = file->flags;
1352        mutex_unlock(&event_mutex);
1353
1354        if (!file)
1355                return -ENODEV;
1356
1357        if (flags & EVENT_FILE_FL_ENABLED &&
1358            !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1359                strcpy(buf, "1");
1360
1361        if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1362            flags & EVENT_FILE_FL_SOFT_MODE)
1363                strcat(buf, "*");
1364
1365        strcat(buf, "\n");
1366
1367        return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1368}
1369
1370static ssize_t
1371event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1372                   loff_t *ppos)
1373{
1374        struct trace_event_file *file;
1375        unsigned long val;
1376        int ret;
1377
1378        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1379        if (ret)
1380                return ret;
1381
1382        ret = tracing_update_buffers();
1383        if (ret < 0)
1384                return ret;
1385
1386        switch (val) {
1387        case 0:
1388        case 1:
1389                ret = -ENODEV;
1390                mutex_lock(&event_mutex);
1391                file = event_file_data(filp);
1392                if (likely(file))
1393                        ret = ftrace_event_enable_disable(file, val);
1394                mutex_unlock(&event_mutex);
1395                break;
1396
1397        default:
1398                return -EINVAL;
1399        }
1400
1401        *ppos += cnt;
1402
1403        return ret ? ret : cnt;
1404}
1405
1406static ssize_t
1407system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1408                   loff_t *ppos)
1409{
1410        const char set_to_char[4] = { '?', '0', '1', 'X' };
1411        struct trace_subsystem_dir *dir = filp->private_data;
1412        struct event_subsystem *system = dir->subsystem;
1413        struct trace_event_call *call;
1414        struct trace_event_file *file;
1415        struct trace_array *tr = dir->tr;
1416        char buf[2];
1417        int set = 0;
1418        int ret;
1419
1420        mutex_lock(&event_mutex);
1421        list_for_each_entry(file, &tr->events, list) {
1422                call = file->event_call;
1423                if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
1424                    !trace_event_name(call) || !call->class || !call->class->reg)
1425                        continue;
1426
1427                if (system && strcmp(call->class->system, system->name) != 0)
1428                        continue;
1429
1430                /*
1431                 * We need to find out if all the events are set
1432                 * or if all events or cleared, or if we have
1433                 * a mixture.
1434                 */
1435                set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1436
1437                /*
1438                 * If we have a mixture, no need to look further.
1439                 */
1440                if (set == 3)
1441                        break;
1442        }
1443        mutex_unlock(&event_mutex);
1444
1445        buf[0] = set_to_char[set];
1446        buf[1] = '\n';
1447
1448        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1449
1450        return ret;
1451}
1452
1453static ssize_t
1454system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1455                    loff_t *ppos)
1456{
1457        struct trace_subsystem_dir *dir = filp->private_data;
1458        struct event_subsystem *system = dir->subsystem;
1459        const char *name = NULL;
1460        unsigned long val;
1461        ssize_t ret;
1462
1463        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1464        if (ret)
1465                return ret;
1466
1467        ret = tracing_update_buffers();
1468        if (ret < 0)
1469                return ret;
1470
1471        if (val != 0 && val != 1)
1472                return -EINVAL;
1473
1474        /*
1475         * Opening of "enable" adds a ref count to system,
1476         * so the name is safe to use.
1477         */
1478        if (system)
1479                name = system->name;
1480
1481        ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1482        if (ret)
1483                goto out;
1484
1485        ret = cnt;
1486
1487out:
1488        *ppos += cnt;
1489
1490        return ret;
1491}
1492
1493enum {
1494        FORMAT_HEADER           = 1,
1495        FORMAT_FIELD_SEPERATOR  = 2,
1496        FORMAT_PRINTFMT         = 3,
1497};
1498
1499static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1500{
1501        struct trace_event_call *call = event_file_data(m->private);
1502        struct list_head *common_head = &ftrace_common_fields;
1503        struct list_head *head = trace_get_fields(call);
1504        struct list_head *node = v;
1505
1506        (*pos)++;
1507
1508        switch ((unsigned long)v) {
1509        case FORMAT_HEADER:
1510                node = common_head;
1511                break;
1512
1513        case FORMAT_FIELD_SEPERATOR:
1514                node = head;
1515                break;
1516
1517        case FORMAT_PRINTFMT:
1518                /* all done */
1519                return NULL;
1520        }
1521
1522        node = node->prev;
1523        if (node == common_head)
1524                return (void *)FORMAT_FIELD_SEPERATOR;
1525        else if (node == head)
1526                return (void *)FORMAT_PRINTFMT;
1527        else
1528                return node;
1529}
1530
1531static int f_show(struct seq_file *m, void *v)
1532{
1533        struct trace_event_call *call = event_file_data(m->private);
1534        struct ftrace_event_field *field;
1535        const char *array_descriptor;
1536
1537        switch ((unsigned long)v) {
1538        case FORMAT_HEADER:
1539                seq_printf(m, "name: %s\n", trace_event_name(call));
1540                seq_printf(m, "ID: %d\n", call->event.type);
1541                seq_puts(m, "format:\n");
1542                return 0;
1543
1544        case FORMAT_FIELD_SEPERATOR:
1545                seq_putc(m, '\n');
1546                return 0;
1547
1548        case FORMAT_PRINTFMT:
1549                seq_printf(m, "\nprint fmt: %s\n",
1550                           call->print_fmt);
1551                return 0;
1552        }
1553
1554        field = list_entry(v, struct ftrace_event_field, link);
1555        /*
1556         * Smartly shows the array type(except dynamic array).
1557         * Normal:
1558         *      field:TYPE VAR
1559         * If TYPE := TYPE[LEN], it is shown:
1560         *      field:TYPE VAR[LEN]
1561         */
1562        array_descriptor = strchr(field->type, '[');
1563
1564        if (str_has_prefix(field->type, "__data_loc"))
1565                array_descriptor = NULL;
1566
1567        if (!array_descriptor)
1568                seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1569                           field->type, field->name, field->offset,
1570                           field->size, !!field->is_signed);
1571        else
1572                seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1573                           (int)(array_descriptor - field->type),
1574                           field->type, field->name,
1575                           array_descriptor, field->offset,
1576                           field->size, !!field->is_signed);
1577
1578        return 0;
1579}
1580
1581static void *f_start(struct seq_file *m, loff_t *pos)
1582{
1583        void *p = (void *)FORMAT_HEADER;
1584        loff_t l = 0;
1585
1586        /* ->stop() is called even if ->start() fails */
1587        mutex_lock(&event_mutex);
1588        if (!event_file_data(m->private))
1589                return ERR_PTR(-ENODEV);
1590
1591        while (l < *pos && p)
1592                p = f_next(m, p, &l);
1593
1594        return p;
1595}
1596
1597static void f_stop(struct seq_file *m, void *p)
1598{
1599        mutex_unlock(&event_mutex);
1600}
1601
1602static const struct seq_operations trace_format_seq_ops = {
1603        .start          = f_start,
1604        .next           = f_next,
1605        .stop           = f_stop,
1606        .show           = f_show,
1607};
1608
1609static int trace_format_open(struct inode *inode, struct file *file)
1610{
1611        struct seq_file *m;
1612        int ret;
1613
1614        /* Do we want to hide event format files on tracefs lockdown? */
1615
1616        ret = seq_open(file, &trace_format_seq_ops);
1617        if (ret < 0)
1618                return ret;
1619
1620        m = file->private_data;
1621        m->private = file;
1622
1623        return 0;
1624}
1625
1626static ssize_t
1627event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1628{
1629        int id = (long)event_file_data(filp);
1630        char buf[32];
1631        int len;
1632
1633        if (unlikely(!id))
1634                return -ENODEV;
1635
1636        len = sprintf(buf, "%d\n", id);
1637
1638        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1639}
1640
1641static ssize_t
1642event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1643                  loff_t *ppos)
1644{
1645        struct trace_event_file *file;
1646        struct trace_seq *s;
1647        int r = -ENODEV;
1648
1649        if (*ppos)
1650                return 0;
1651
1652        s = kmalloc(sizeof(*s), GFP_KERNEL);
1653
1654        if (!s)
1655                return -ENOMEM;
1656
1657        trace_seq_init(s);
1658
1659        mutex_lock(&event_mutex);
1660        file = event_file_data(filp);
1661        if (file)
1662                print_event_filter(file, s);
1663        mutex_unlock(&event_mutex);
1664
1665        if (file)
1666                r = simple_read_from_buffer(ubuf, cnt, ppos,
1667                                            s->buffer, trace_seq_used(s));
1668
1669        kfree(s);
1670
1671        return r;
1672}
1673
1674static ssize_t
1675event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1676                   loff_t *ppos)
1677{
1678        struct trace_event_file *file;
1679        char *buf;
1680        int err = -ENODEV;
1681
1682        if (cnt >= PAGE_SIZE)
1683                return -EINVAL;
1684
1685        buf = memdup_user_nul(ubuf, cnt);
1686        if (IS_ERR(buf))
1687                return PTR_ERR(buf);
1688
1689        mutex_lock(&event_mutex);
1690        file = event_file_data(filp);
1691        if (file)
1692                err = apply_event_filter(file, buf);
1693        mutex_unlock(&event_mutex);
1694
1695        kfree(buf);
1696        if (err < 0)
1697                return err;
1698
1699        *ppos += cnt;
1700
1701        return cnt;
1702}
1703
1704static LIST_HEAD(event_subsystems);
1705
1706static int subsystem_open(struct inode *inode, struct file *filp)
1707{
1708        struct event_subsystem *system = NULL;
1709        struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1710        struct trace_array *tr;
1711        int ret;
1712
1713        if (tracing_is_disabled())
1714                return -ENODEV;
1715
1716        /* Make sure the system still exists */
1717        mutex_lock(&event_mutex);
1718        mutex_lock(&trace_types_lock);
1719        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1720                list_for_each_entry(dir, &tr->systems, list) {
1721                        if (dir == inode->i_private) {
1722                                /* Don't open systems with no events */
1723                                if (dir->nr_events) {
1724                                        __get_system_dir(dir);
1725                                        system = dir->subsystem;
1726                                }
1727                                goto exit_loop;
1728                        }
1729                }
1730        }
1731 exit_loop:
1732        mutex_unlock(&trace_types_lock);
1733        mutex_unlock(&event_mutex);
1734
1735        if (!system)
1736                return -ENODEV;
1737
1738        /* Some versions of gcc think dir can be uninitialized here */
1739        WARN_ON(!dir);
1740
1741        /* Still need to increment the ref count of the system */
1742        if (trace_array_get(tr) < 0) {
1743                put_system(dir);
1744                return -ENODEV;
1745        }
1746
1747        ret = tracing_open_generic(inode, filp);
1748        if (ret < 0) {
1749                trace_array_put(tr);
1750                put_system(dir);
1751        }
1752
1753        return ret;
1754}
1755
1756static int system_tr_open(struct inode *inode, struct file *filp)
1757{
1758        struct trace_subsystem_dir *dir;
1759        struct trace_array *tr = inode->i_private;
1760        int ret;
1761
1762        /* Make a temporary dir that has no system but points to tr */
1763        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1764        if (!dir)
1765                return -ENOMEM;
1766
1767        ret = tracing_open_generic_tr(inode, filp);
1768        if (ret < 0) {
1769                kfree(dir);
1770                return ret;
1771        }
1772        dir->tr = tr;
1773        filp->private_data = dir;
1774
1775        return 0;
1776}
1777
1778static int subsystem_release(struct inode *inode, struct file *file)
1779{
1780        struct trace_subsystem_dir *dir = file->private_data;
1781
1782        trace_array_put(dir->tr);
1783
1784        /*
1785         * If dir->subsystem is NULL, then this is a temporary
1786         * descriptor that was made for a trace_array to enable
1787         * all subsystems.
1788         */
1789        if (dir->subsystem)
1790                put_system(dir);
1791        else
1792                kfree(dir);
1793
1794        return 0;
1795}
1796
1797static ssize_t
1798subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1799                      loff_t *ppos)
1800{
1801        struct trace_subsystem_dir *dir = filp->private_data;
1802        struct event_subsystem *system = dir->subsystem;
1803        struct trace_seq *s;
1804        int r;
1805
1806        if (*ppos)
1807                return 0;
1808
1809        s = kmalloc(sizeof(*s), GFP_KERNEL);
1810        if (!s)
1811                return -ENOMEM;
1812
1813        trace_seq_init(s);
1814
1815        print_subsystem_event_filter(system, s);
1816        r = simple_read_from_buffer(ubuf, cnt, ppos,
1817                                    s->buffer, trace_seq_used(s));
1818
1819        kfree(s);
1820
1821        return r;
1822}
1823
1824static ssize_t
1825subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1826                       loff_t *ppos)
1827{
1828        struct trace_subsystem_dir *dir = filp->private_data;
1829        char *buf;
1830        int err;
1831
1832        if (cnt >= PAGE_SIZE)
1833                return -EINVAL;
1834
1835        buf = memdup_user_nul(ubuf, cnt);
1836        if (IS_ERR(buf))
1837                return PTR_ERR(buf);
1838
1839        err = apply_subsystem_event_filter(dir, buf);
1840        kfree(buf);
1841        if (err < 0)
1842                return err;
1843
1844        *ppos += cnt;
1845
1846        return cnt;
1847}
1848
1849static ssize_t
1850show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1851{
1852        int (*func)(struct trace_seq *s) = filp->private_data;
1853        struct trace_seq *s;
1854        int r;
1855
1856        if (*ppos)
1857                return 0;
1858
1859        s = kmalloc(sizeof(*s), GFP_KERNEL);
1860        if (!s)
1861                return -ENOMEM;
1862
1863        trace_seq_init(s);
1864
1865        func(s);
1866        r = simple_read_from_buffer(ubuf, cnt, ppos,
1867                                    s->buffer, trace_seq_used(s));
1868
1869        kfree(s);
1870
1871        return r;
1872}
1873
1874static void ignore_task_cpu(void *data)
1875{
1876        struct trace_array *tr = data;
1877        struct trace_pid_list *pid_list;
1878        struct trace_pid_list *no_pid_list;
1879
1880        /*
1881         * This function is called by on_each_cpu() while the
1882         * event_mutex is held.
1883         */
1884        pid_list = rcu_dereference_protected(tr->filtered_pids,
1885                                             mutex_is_locked(&event_mutex));
1886        no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1887                                             mutex_is_locked(&event_mutex));
1888
1889        this_cpu_write(tr->array_buffer.data->ignore_pid,
1890                       trace_ignore_this_task(pid_list, no_pid_list, current));
1891}
1892
1893static void register_pid_events(struct trace_array *tr)
1894{
1895        /*
1896         * Register a probe that is called before all other probes
1897         * to set ignore_pid if next or prev do not match.
1898         * Register a probe this is called after all other probes
1899         * to only keep ignore_pid set if next pid matches.
1900         */
1901        register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1902                                         tr, INT_MAX);
1903        register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1904                                         tr, 0);
1905
1906        register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1907                                         tr, INT_MAX);
1908        register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1909                                         tr, 0);
1910
1911        register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1912                                             tr, INT_MAX);
1913        register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1914                                             tr, 0);
1915
1916        register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1917                                         tr, INT_MAX);
1918        register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1919                                         tr, 0);
1920}
1921
1922static ssize_t
1923event_pid_write(struct file *filp, const char __user *ubuf,
1924                size_t cnt, loff_t *ppos, int type)
1925{
1926        struct seq_file *m = filp->private_data;
1927        struct trace_array *tr = m->private;
1928        struct trace_pid_list *filtered_pids = NULL;
1929        struct trace_pid_list *other_pids = NULL;
1930        struct trace_pid_list *pid_list;
1931        struct trace_event_file *file;
1932        ssize_t ret;
1933
1934        if (!cnt)
1935                return 0;
1936
1937        ret = tracing_update_buffers();
1938        if (ret < 0)
1939                return ret;
1940
1941        mutex_lock(&event_mutex);
1942
1943        if (type == TRACE_PIDS) {
1944                filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1945                                                          lockdep_is_held(&event_mutex));
1946                other_pids = rcu_dereference_protected(tr->filtered_no_pids,
1947                                                          lockdep_is_held(&event_mutex));
1948        } else {
1949                filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
1950                                                          lockdep_is_held(&event_mutex));
1951                other_pids = rcu_dereference_protected(tr->filtered_pids,
1952                                                          lockdep_is_held(&event_mutex));
1953        }
1954
1955        ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
1956        if (ret < 0)
1957                goto out;
1958
1959        if (type == TRACE_PIDS)
1960                rcu_assign_pointer(tr->filtered_pids, pid_list);
1961        else
1962                rcu_assign_pointer(tr->filtered_no_pids, pid_list);
1963
1964        list_for_each_entry(file, &tr->events, list) {
1965                set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1966        }
1967
1968        if (filtered_pids) {
1969                tracepoint_synchronize_unregister();
1970                trace_free_pid_list(filtered_pids);
1971        } else if (pid_list && !other_pids) {
1972                register_pid_events(tr);
1973        }
1974
1975        /*
1976         * Ignoring of pids is done at task switch. But we have to
1977         * check for those tasks that are currently running.
1978         * Always do this in case a pid was appended or removed.
1979         */
1980        on_each_cpu(ignore_task_cpu, tr, 1);
1981
1982 out:
1983        mutex_unlock(&event_mutex);
1984
1985        if (ret > 0)
1986                *ppos += ret;
1987
1988        return ret;
1989}
1990
1991static ssize_t
1992ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1993                       size_t cnt, loff_t *ppos)
1994{
1995        return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
1996}
1997
1998static ssize_t
1999ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
2000                        size_t cnt, loff_t *ppos)
2001{
2002        return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
2003}
2004
2005static int ftrace_event_avail_open(struct inode *inode, struct file *file);
2006static int ftrace_event_set_open(struct inode *inode, struct file *file);
2007static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
2008static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
2009static int ftrace_event_release(struct inode *inode, struct file *file);
2010
2011static const struct seq_operations show_event_seq_ops = {
2012        .start = t_start,
2013        .next = t_next,
2014        .show = t_show,
2015        .stop = t_stop,
2016};
2017
2018static const struct seq_operations show_set_event_seq_ops = {
2019        .start = s_start,
2020        .next = s_next,
2021        .show = t_show,
2022        .stop = t_stop,
2023};
2024
2025static const struct seq_operations show_set_pid_seq_ops = {
2026        .start = p_start,
2027        .next = p_next,
2028        .show = trace_pid_show,
2029        .stop = p_stop,
2030};
2031
2032static const struct seq_operations show_set_no_pid_seq_ops = {
2033        .start = np_start,
2034        .next = np_next,
2035        .show = trace_pid_show,
2036        .stop = p_stop,
2037};
2038
2039static const struct file_operations ftrace_avail_fops = {
2040        .open = ftrace_event_avail_open,
2041        .read = seq_read,
2042        .llseek = seq_lseek,
2043        .release = seq_release,
2044};
2045
2046static const struct file_operations ftrace_set_event_fops = {
2047        .open = ftrace_event_set_open,
2048        .read = seq_read,
2049        .write = ftrace_event_write,
2050        .llseek = seq_lseek,
2051        .release = ftrace_event_release,
2052};
2053
2054static const struct file_operations ftrace_set_event_pid_fops = {
2055        .open = ftrace_event_set_pid_open,
2056        .read = seq_read,
2057        .write = ftrace_event_pid_write,
2058        .llseek = seq_lseek,
2059        .release = ftrace_event_release,
2060};
2061
2062static const struct file_operations ftrace_set_event_notrace_pid_fops = {
2063        .open = ftrace_event_set_npid_open,
2064        .read = seq_read,
2065        .write = ftrace_event_npid_write,
2066        .llseek = seq_lseek,
2067        .release = ftrace_event_release,
2068};
2069
2070static const struct file_operations ftrace_enable_fops = {
2071        .open = tracing_open_generic,
2072        .read = event_enable_read,
2073        .write = event_enable_write,
2074        .llseek = default_llseek,
2075};
2076
2077static const struct file_operations ftrace_event_format_fops = {
2078        .open = trace_format_open,
2079        .read = seq_read,
2080        .llseek = seq_lseek,
2081        .release = seq_release,
2082};
2083
2084static const struct file_operations ftrace_event_id_fops = {
2085        .read = event_id_read,
2086        .llseek = default_llseek,
2087};
2088
2089static const struct file_operations ftrace_event_filter_fops = {
2090        .open = tracing_open_generic,
2091        .read = event_filter_read,
2092        .write = event_filter_write,
2093        .llseek = default_llseek,
2094};
2095
2096static const struct file_operations ftrace_subsystem_filter_fops = {
2097        .open = subsystem_open,
2098        .read = subsystem_filter_read,
2099        .write = subsystem_filter_write,
2100        .llseek = default_llseek,
2101        .release = subsystem_release,
2102};
2103
2104static const struct file_operations ftrace_system_enable_fops = {
2105        .open = subsystem_open,
2106        .read = system_enable_read,
2107        .write = system_enable_write,
2108        .llseek = default_llseek,
2109        .release = subsystem_release,
2110};
2111
2112static const struct file_operations ftrace_tr_enable_fops = {
2113        .open = system_tr_open,
2114        .read = system_enable_read,
2115        .write = system_enable_write,
2116        .llseek = default_llseek,
2117        .release = subsystem_release,
2118};
2119
2120static const struct file_operations ftrace_show_header_fops = {
2121        .open = tracing_open_generic,
2122        .read = show_header,
2123        .llseek = default_llseek,
2124};
2125
2126static int
2127ftrace_event_open(struct inode *inode, struct file *file,
2128                  const struct seq_operations *seq_ops)
2129{
2130        struct seq_file *m;
2131        int ret;
2132
2133        ret = security_locked_down(LOCKDOWN_TRACEFS);
2134        if (ret)
2135                return ret;
2136
2137        ret = seq_open(file, seq_ops);
2138        if (ret < 0)
2139                return ret;
2140        m = file->private_data;
2141        /* copy tr over to seq ops */
2142        m->private = inode->i_private;
2143
2144        return ret;
2145}
2146
2147static int ftrace_event_release(struct inode *inode, struct file *file)
2148{
2149        struct trace_array *tr = inode->i_private;
2150
2151        trace_array_put(tr);
2152
2153        return seq_release(inode, file);
2154}
2155
2156static int
2157ftrace_event_avail_open(struct inode *inode, struct file *file)
2158{
2159        const struct seq_operations *seq_ops = &show_event_seq_ops;
2160
2161        /* Checks for tracefs lockdown */
2162        return ftrace_event_open(inode, file, seq_ops);
2163}
2164
2165static int
2166ftrace_event_set_open(struct inode *inode, struct file *file)
2167{
2168        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
2169        struct trace_array *tr = inode->i_private;
2170        int ret;
2171
2172        ret = tracing_check_open_get_tr(tr);
2173        if (ret)
2174                return ret;
2175
2176        if ((file->f_mode & FMODE_WRITE) &&
2177            (file->f_flags & O_TRUNC))
2178                ftrace_clear_events(tr);
2179
2180        ret = ftrace_event_open(inode, file, seq_ops);
2181        if (ret < 0)
2182                trace_array_put(tr);
2183        return ret;
2184}
2185
2186static int
2187ftrace_event_set_pid_open(struct inode *inode, struct file *file)
2188{
2189        const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
2190        struct trace_array *tr = inode->i_private;
2191        int ret;
2192
2193        ret = tracing_check_open_get_tr(tr);
2194        if (ret)
2195                return ret;
2196
2197        if ((file->f_mode & FMODE_WRITE) &&
2198            (file->f_flags & O_TRUNC))
2199                ftrace_clear_event_pids(tr, TRACE_PIDS);
2200
2201        ret = ftrace_event_open(inode, file, seq_ops);
2202        if (ret < 0)
2203                trace_array_put(tr);
2204        return ret;
2205}
2206
2207static int
2208ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2209{
2210        const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2211        struct trace_array *tr = inode->i_private;
2212        int ret;
2213
2214        ret = tracing_check_open_get_tr(tr);
2215        if (ret)
2216                return ret;
2217
2218        if ((file->f_mode & FMODE_WRITE) &&
2219            (file->f_flags & O_TRUNC))
2220                ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
2221
2222        ret = ftrace_event_open(inode, file, seq_ops);
2223        if (ret < 0)
2224                trace_array_put(tr);
2225        return ret;
2226}
2227
2228static struct event_subsystem *
2229create_new_subsystem(const char *name)
2230{
2231        struct event_subsystem *system;
2232
2233        /* need to create new entry */
2234        system = kmalloc(sizeof(*system), GFP_KERNEL);
2235        if (!system)
2236                return NULL;
2237
2238        system->ref_count = 1;
2239
2240        /* Only allocate if dynamic (kprobes and modules) */
2241        system->name = kstrdup_const(name, GFP_KERNEL);
2242        if (!system->name)
2243                goto out_free;
2244
2245        system->filter = NULL;
2246
2247        system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
2248        if (!system->filter)
2249                goto out_free;
2250
2251        list_add(&system->list, &event_subsystems);
2252
2253        return system;
2254
2255 out_free:
2256        kfree_const(system->name);
2257        kfree(system);
2258        return NULL;
2259}
2260
2261static struct dentry *
2262event_subsystem_dir(struct trace_array *tr, const char *name,
2263                    struct trace_event_file *file, struct dentry *parent)
2264{
2265        struct trace_subsystem_dir *dir;
2266        struct event_subsystem *system;
2267        struct dentry *entry;
2268
2269        /* First see if we did not already create this dir */
2270        list_for_each_entry(dir, &tr->systems, list) {
2271                system = dir->subsystem;
2272                if (strcmp(system->name, name) == 0) {
2273                        dir->nr_events++;
2274                        file->system = dir;
2275                        return dir->entry;
2276                }
2277        }
2278
2279        /* Now see if the system itself exists. */
2280        list_for_each_entry(system, &event_subsystems, list) {
2281                if (strcmp(system->name, name) == 0)
2282                        break;
2283        }
2284        /* Reset system variable when not found */
2285        if (&system->list == &event_subsystems)
2286                system = NULL;
2287
2288        dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2289        if (!dir)
2290                goto out_fail;
2291
2292        if (!system) {
2293                system = create_new_subsystem(name);
2294                if (!system)
2295                        goto out_free;
2296        } else
2297                __get_system(system);
2298
2299        dir->entry = tracefs_create_dir(name, parent);
2300        if (!dir->entry) {
2301                pr_warn("Failed to create system directory %s\n", name);
2302                __put_system(system);
2303                goto out_free;
2304        }
2305
2306        dir->tr = tr;
2307        dir->ref_count = 1;
2308        dir->nr_events = 1;
2309        dir->subsystem = system;
2310        file->system = dir;
2311
2312        /* the ftrace system is special, do not create enable or filter files */
2313        if (strcmp(name, "ftrace") != 0) {
2314
2315                entry = tracefs_create_file("filter", 0644, dir->entry, dir,
2316                                            &ftrace_subsystem_filter_fops);
2317                if (!entry) {
2318                        kfree(system->filter);
2319                        system->filter = NULL;
2320                        pr_warn("Could not create tracefs '%s/filter' entry\n", name);
2321                }
2322
2323                trace_create_file("enable", 0644, dir->entry, dir,
2324                                  &ftrace_system_enable_fops);
2325        }
2326
2327        list_add(&dir->list, &tr->systems);
2328
2329        return dir->entry;
2330
2331 out_free:
2332        kfree(dir);
2333 out_fail:
2334        /* Only print this message if failed on memory allocation */
2335        if (!dir || !system)
2336                pr_warn("No memory to create event subsystem %s\n", name);
2337        return NULL;
2338}
2339
2340static int
2341event_define_fields(struct trace_event_call *call)
2342{
2343        struct list_head *head;
2344        int ret = 0;
2345
2346        /*
2347         * Other events may have the same class. Only update
2348         * the fields if they are not already defined.
2349         */
2350        head = trace_get_fields(call);
2351        if (list_empty(head)) {
2352                struct trace_event_fields *field = call->class->fields_array;
2353                unsigned int offset = sizeof(struct trace_entry);
2354
2355                for (; field->type; field++) {
2356                        if (field->type == TRACE_FUNCTION_TYPE) {
2357                                field->define_fields(call);
2358                                break;
2359                        }
2360
2361                        offset = ALIGN(offset, field->align);
2362                        ret = trace_define_field(call, field->type, field->name,
2363                                                 offset, field->size,
2364                                                 field->is_signed, field->filter_type);
2365                        if (WARN_ON_ONCE(ret)) {
2366                                pr_err("error code is %d\n", ret);
2367                                break;
2368                        }
2369
2370                        offset += field->size;
2371                }
2372        }
2373
2374        return ret;
2375}
2376
2377static int
2378event_create_dir(struct dentry *parent, struct trace_event_file *file)
2379{
2380        struct trace_event_call *call = file->event_call;
2381        struct trace_array *tr = file->tr;
2382        struct dentry *d_events;
2383        const char *name;
2384        int ret;
2385
2386        /*
2387         * If the trace point header did not define TRACE_SYSTEM
2388         * then the system would be called "TRACE_SYSTEM".
2389         */
2390        if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
2391                d_events = event_subsystem_dir(tr, call->class->system, file, parent);
2392                if (!d_events)
2393                        return -ENOMEM;
2394        } else
2395                d_events = parent;
2396
2397        name = trace_event_name(call);
2398        file->dir = tracefs_create_dir(name, d_events);
2399        if (!file->dir) {
2400                pr_warn("Could not create tracefs '%s' directory\n", name);
2401                return -1;
2402        }
2403
2404        if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2405                trace_create_file("enable", 0644, file->dir, file,
2406                                  &ftrace_enable_fops);
2407
2408#ifdef CONFIG_PERF_EVENTS
2409        if (call->event.type && call->class->reg)
2410                trace_create_file("id", 0444, file->dir,
2411                                  (void *)(long)call->event.type,
2412                                  &ftrace_event_id_fops);
2413#endif
2414
2415        ret = event_define_fields(call);
2416        if (ret < 0) {
2417                pr_warn("Could not initialize trace point events/%s\n", name);
2418                return ret;
2419        }
2420
2421        /*
2422         * Only event directories that can be enabled should have
2423         * triggers or filters.
2424         */
2425        if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2426                trace_create_file("filter", 0644, file->dir, file,
2427                                  &ftrace_event_filter_fops);
2428
2429                trace_create_file("trigger", 0644, file->dir, file,
2430                                  &event_trigger_fops);
2431        }
2432
2433#ifdef CONFIG_HIST_TRIGGERS
2434        trace_create_file("hist", 0444, file->dir, file,
2435                          &event_hist_fops);
2436#endif
2437#ifdef CONFIG_HIST_TRIGGERS_DEBUG
2438        trace_create_file("hist_debug", 0444, file->dir, file,
2439                          &event_hist_debug_fops);
2440#endif
2441        trace_create_file("format", 0444, file->dir, call,
2442                          &ftrace_event_format_fops);
2443
2444#ifdef CONFIG_TRACE_EVENT_INJECT
2445        if (call->event.type && call->class->reg)
2446                trace_create_file("inject", 0200, file->dir, file,
2447                                  &event_inject_fops);
2448#endif
2449
2450        return 0;
2451}
2452
2453static void remove_event_from_tracers(struct trace_event_call *call)
2454{
2455        struct trace_event_file *file;
2456        struct trace_array *tr;
2457
2458        do_for_each_event_file_safe(tr, file) {
2459                if (file->event_call != call)
2460                        continue;
2461
2462                remove_event_file_dir(file);
2463                /*
2464                 * The do_for_each_event_file_safe() is
2465                 * a double loop. After finding the call for this
2466                 * trace_array, we use break to jump to the next
2467                 * trace_array.
2468                 */
2469                break;
2470        } while_for_each_event_file();
2471}
2472
2473static void event_remove(struct trace_event_call *call)
2474{
2475        struct trace_array *tr;
2476        struct trace_event_file *file;
2477
2478        do_for_each_event_file(tr, file) {
2479                if (file->event_call != call)
2480                        continue;
2481
2482                if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2483                        tr->clear_trace = true;
2484
2485                ftrace_event_enable_disable(file, 0);
2486                /*
2487                 * The do_for_each_event_file() is
2488                 * a double loop. After finding the call for this
2489                 * trace_array, we use break to jump to the next
2490                 * trace_array.
2491                 */
2492                break;
2493        } while_for_each_event_file();
2494
2495        if (call->event.funcs)
2496                __unregister_trace_event(&call->event);
2497        remove_event_from_tracers(call);
2498        list_del(&call->list);
2499}
2500
2501static int event_init(struct trace_event_call *call)
2502{
2503        int ret = 0;
2504        const char *name;
2505
2506        name = trace_event_name(call);
2507        if (WARN_ON(!name))
2508                return -EINVAL;
2509
2510        if (call->class->raw_init) {
2511                ret = call->class->raw_init(call);
2512                if (ret < 0 && ret != -ENOSYS)
2513                        pr_warn("Could not initialize trace events/%s\n", name);
2514        }
2515
2516        return ret;
2517}
2518
2519static int
2520__register_event(struct trace_event_call *call, struct module *mod)
2521{
2522        int ret;
2523
2524        ret = event_init(call);
2525        if (ret < 0)
2526                return ret;
2527
2528        list_add(&call->list, &ftrace_events);
2529        if (call->flags & TRACE_EVENT_FL_DYNAMIC)
2530                atomic_set(&call->refcnt, 0);
2531        else
2532                call->module = mod;
2533
2534        return 0;
2535}
2536
2537static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2538{
2539        int rlen;
2540        int elen;
2541
2542        /* Find the length of the eval value as a string */
2543        elen = snprintf(ptr, 0, "%ld", map->eval_value);
2544        /* Make sure there's enough room to replace the string with the value */
2545        if (len < elen)
2546                return NULL;
2547
2548        snprintf(ptr, elen + 1, "%ld", map->eval_value);
2549
2550        /* Get the rest of the string of ptr */
2551        rlen = strlen(ptr + len);
2552        memmove(ptr + elen, ptr + len, rlen);
2553        /* Make sure we end the new string */
2554        ptr[elen + rlen] = 0;
2555
2556        return ptr + elen;
2557}
2558
2559static void update_event_printk(struct trace_event_call *call,
2560                                struct trace_eval_map *map)
2561{
2562        char *ptr;
2563        int quote = 0;
2564        int len = strlen(map->eval_string);
2565
2566        for (ptr = call->print_fmt; *ptr; ptr++) {
2567                if (*ptr == '\\') {
2568                        ptr++;
2569                        /* paranoid */
2570                        if (!*ptr)
2571                                break;
2572                        continue;
2573                }
2574                if (*ptr == '"') {
2575                        quote ^= 1;
2576                        continue;
2577                }
2578                if (quote)
2579                        continue;
2580                if (isdigit(*ptr)) {
2581                        /* skip numbers */
2582                        do {
2583                                ptr++;
2584                                /* Check for alpha chars like ULL */
2585                        } while (isalnum(*ptr));
2586                        if (!*ptr)
2587                                break;
2588                        /*
2589                         * A number must have some kind of delimiter after
2590                         * it, and we can ignore that too.
2591                         */
2592                        continue;
2593                }
2594                if (isalpha(*ptr) || *ptr == '_') {
2595                        if (strncmp(map->eval_string, ptr, len) == 0 &&
2596                            !isalnum(ptr[len]) && ptr[len] != '_') {
2597                                ptr = eval_replace(ptr, map, len);
2598                                /* enum/sizeof string smaller than value */
2599                                if (WARN_ON_ONCE(!ptr))
2600                                        return;
2601                                /*
2602                                 * No need to decrement here, as eval_replace()
2603                                 * returns the pointer to the character passed
2604                                 * the eval, and two evals can not be placed
2605                                 * back to back without something in between.
2606                                 * We can skip that something in between.
2607                                 */
2608                                continue;
2609                        }
2610                skip_more:
2611                        do {
2612                                ptr++;
2613                        } while (isalnum(*ptr) || *ptr == '_');
2614                        if (!*ptr)
2615                                break;
2616                        /*
2617                         * If what comes after this variable is a '.' or
2618                         * '->' then we can continue to ignore that string.
2619                         */
2620                        if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2621                                ptr += *ptr == '.' ? 1 : 2;
2622                                if (!*ptr)
2623                                        break;
2624                                goto skip_more;
2625                        }
2626                        /*
2627                         * Once again, we can skip the delimiter that came
2628                         * after the string.
2629                         */
2630                        continue;
2631                }
2632        }
2633}
2634
2635void trace_event_eval_update(struct trace_eval_map **map, int len)
2636{
2637        struct trace_event_call *call, *p;
2638        const char *last_system = NULL;
2639        bool first = false;
2640        int last_i;
2641        int i;
2642
2643        down_write(&trace_event_sem);
2644        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2645                /* events are usually grouped together with systems */
2646                if (!last_system || call->class->system != last_system) {
2647                        first = true;
2648                        last_i = 0;
2649                        last_system = call->class->system;
2650                }
2651
2652                /*
2653                 * Since calls are grouped by systems, the likelihood that the
2654                 * next call in the iteration belongs to the same system as the
2655                 * previous call is high. As an optimization, we skip searching
2656                 * for a map[] that matches the call's system if the last call
2657                 * was from the same system. That's what last_i is for. If the
2658                 * call has the same system as the previous call, then last_i
2659                 * will be the index of the first map[] that has a matching
2660                 * system.
2661                 */
2662                for (i = last_i; i < len; i++) {
2663                        if (call->class->system == map[i]->system) {
2664                                /* Save the first system if need be */
2665                                if (first) {
2666                                        last_i = i;
2667                                        first = false;
2668                                }
2669                                update_event_printk(call, map[i]);
2670                        }
2671                }
2672        }
2673        up_write(&trace_event_sem);
2674}
2675
2676static struct trace_event_file *
2677trace_create_new_event(struct trace_event_call *call,
2678                       struct trace_array *tr)
2679{
2680        struct trace_event_file *file;
2681
2682        file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2683        if (!file)
2684                return NULL;
2685
2686        file->event_call = call;
2687        file->tr = tr;
2688        atomic_set(&file->sm_ref, 0);
2689        atomic_set(&file->tm_ref, 0);
2690        INIT_LIST_HEAD(&file->triggers);
2691        list_add(&file->list, &tr->events);
2692
2693        return file;
2694}
2695
2696/* Add an event to a trace directory */
2697static int
2698__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2699{
2700        struct trace_event_file *file;
2701
2702        file = trace_create_new_event(call, tr);
2703        if (!file)
2704                return -ENOMEM;
2705
2706        if (eventdir_initialized)
2707                return event_create_dir(tr->event_dir, file);
2708        else
2709                return event_define_fields(call);
2710}
2711
2712/*
2713 * Just create a descriptor for early init. A descriptor is required
2714 * for enabling events at boot. We want to enable events before
2715 * the filesystem is initialized.
2716 */
2717static int
2718__trace_early_add_new_event(struct trace_event_call *call,
2719                            struct trace_array *tr)
2720{
2721        struct trace_event_file *file;
2722
2723        file = trace_create_new_event(call, tr);
2724        if (!file)
2725                return -ENOMEM;
2726
2727        return event_define_fields(call);
2728}
2729
2730struct ftrace_module_file_ops;
2731static void __add_event_to_tracers(struct trace_event_call *call);
2732
2733/* Add an additional event_call dynamically */
2734int trace_add_event_call(struct trace_event_call *call)
2735{
2736        int ret;
2737        lockdep_assert_held(&event_mutex);
2738
2739        mutex_lock(&trace_types_lock);
2740
2741        ret = __register_event(call, NULL);
2742        if (ret >= 0)
2743                __add_event_to_tracers(call);
2744
2745        mutex_unlock(&trace_types_lock);
2746        return ret;
2747}
2748
2749/*
2750 * Must be called under locking of trace_types_lock, event_mutex and
2751 * trace_event_sem.
2752 */
2753static void __trace_remove_event_call(struct trace_event_call *call)
2754{
2755        event_remove(call);
2756        trace_destroy_fields(call);
2757        free_event_filter(call->filter);
2758        call->filter = NULL;
2759}
2760
2761static int probe_remove_event_call(struct trace_event_call *call)
2762{
2763        struct trace_array *tr;
2764        struct trace_event_file *file;
2765
2766#ifdef CONFIG_PERF_EVENTS
2767        if (call->perf_refcount)
2768                return -EBUSY;
2769#endif
2770        do_for_each_event_file(tr, file) {
2771                if (file->event_call != call)
2772                        continue;
2773                /*
2774                 * We can't rely on ftrace_event_enable_disable(enable => 0)
2775                 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2776                 * TRACE_REG_UNREGISTER.
2777                 */
2778                if (file->flags & EVENT_FILE_FL_ENABLED)
2779                        return -EBUSY;
2780                /*
2781                 * The do_for_each_event_file_safe() is
2782                 * a double loop. After finding the call for this
2783                 * trace_array, we use break to jump to the next
2784                 * trace_array.
2785                 */
2786                break;
2787        } while_for_each_event_file();
2788
2789        __trace_remove_event_call(call);
2790
2791        return 0;
2792}
2793
2794/* Remove an event_call */
2795int trace_remove_event_call(struct trace_event_call *call)
2796{
2797        int ret;
2798
2799        lockdep_assert_held(&event_mutex);
2800
2801        mutex_lock(&trace_types_lock);
2802        down_write(&trace_event_sem);
2803        ret = probe_remove_event_call(call);
2804        up_write(&trace_event_sem);
2805        mutex_unlock(&trace_types_lock);
2806
2807        return ret;
2808}
2809
2810#define for_each_event(event, start, end)                       \
2811        for (event = start;                                     \
2812             (unsigned long)event < (unsigned long)end;         \
2813             event++)
2814
2815#ifdef CONFIG_MODULES
2816
2817static void trace_module_add_events(struct module *mod)
2818{
2819        struct trace_event_call **call, **start, **end;
2820
2821        if (!mod->num_trace_events)
2822                return;
2823
2824        /* Don't add infrastructure for mods without tracepoints */
2825        if (trace_module_has_bad_taint(mod)) {
2826                pr_err("%s: module has bad taint, not creating trace events\n",
2827                       mod->name);
2828                return;
2829        }
2830
2831        start = mod->trace_events;
2832        end = mod->trace_events + mod->num_trace_events;
2833
2834        for_each_event(call, start, end) {
2835                __register_event(*call, mod);
2836                __add_event_to_tracers(*call);
2837        }
2838}
2839
2840static void trace_module_remove_events(struct module *mod)
2841{
2842        struct trace_event_call *call, *p;
2843
2844        down_write(&trace_event_sem);
2845        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2846                if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
2847                        continue;
2848                if (call->module == mod)
2849                        __trace_remove_event_call(call);
2850        }
2851        up_write(&trace_event_sem);
2852
2853        /*
2854         * It is safest to reset the ring buffer if the module being unloaded
2855         * registered any events that were used. The only worry is if
2856         * a new module gets loaded, and takes on the same id as the events
2857         * of this module. When printing out the buffer, traced events left
2858         * over from this module may be passed to the new module events and
2859         * unexpected results may occur.
2860         */
2861        tracing_reset_all_online_cpus();
2862}
2863
2864static int trace_module_notify(struct notifier_block *self,
2865                               unsigned long val, void *data)
2866{
2867        struct module *mod = data;
2868
2869        mutex_lock(&event_mutex);
2870        mutex_lock(&trace_types_lock);
2871        switch (val) {
2872        case MODULE_STATE_COMING:
2873                trace_module_add_events(mod);
2874                break;
2875        case MODULE_STATE_GOING:
2876                trace_module_remove_events(mod);
2877                break;
2878        }
2879        mutex_unlock(&trace_types_lock);
2880        mutex_unlock(&event_mutex);
2881
2882        return NOTIFY_OK;
2883}
2884
2885static struct notifier_block trace_module_nb = {
2886        .notifier_call = trace_module_notify,
2887        .priority = 1, /* higher than trace.c module notify */
2888};
2889#endif /* CONFIG_MODULES */
2890
2891/* Create a new event directory structure for a trace directory. */
2892static void
2893__trace_add_event_dirs(struct trace_array *tr)
2894{
2895        struct trace_event_call *call;
2896        int ret;
2897
2898        list_for_each_entry(call, &ftrace_events, list) {
2899                ret = __trace_add_new_event(call, tr);
2900                if (ret < 0)
2901                        pr_warn("Could not create directory for event %s\n",
2902                                trace_event_name(call));
2903        }
2904}
2905
2906/* Returns any file that matches the system and event */
2907struct trace_event_file *
2908__find_event_file(struct trace_array *tr, const char *system, const char *event)
2909{
2910        struct trace_event_file *file;
2911        struct trace_event_call *call;
2912        const char *name;
2913
2914        list_for_each_entry(file, &tr->events, list) {
2915
2916                call = file->event_call;
2917                name = trace_event_name(call);
2918
2919                if (!name || !call->class)
2920                        continue;
2921
2922                if (strcmp(event, name) == 0 &&
2923                    strcmp(system, call->class->system) == 0)
2924                        return file;
2925        }
2926        return NULL;
2927}
2928
2929/* Returns valid trace event files that match system and event */
2930struct trace_event_file *
2931find_event_file(struct trace_array *tr, const char *system, const char *event)
2932{
2933        struct trace_event_file *file;
2934
2935        file = __find_event_file(tr, system, event);
2936        if (!file || !file->event_call->class->reg ||
2937            file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2938                return NULL;
2939
2940        return file;
2941}
2942
2943/**
2944 * trace_get_event_file - Find and return a trace event file
2945 * @instance: The name of the trace instance containing the event
2946 * @system: The name of the system containing the event
2947 * @event: The name of the event
2948 *
2949 * Return a trace event file given the trace instance name, trace
2950 * system, and trace event name.  If the instance name is NULL, it
2951 * refers to the top-level trace array.
2952 *
2953 * This function will look it up and return it if found, after calling
2954 * trace_array_get() to prevent the instance from going away, and
2955 * increment the event's module refcount to prevent it from being
2956 * removed.
2957 *
2958 * To release the file, call trace_put_event_file(), which will call
2959 * trace_array_put() and decrement the event's module refcount.
2960 *
2961 * Return: The trace event on success, ERR_PTR otherwise.
2962 */
2963struct trace_event_file *trace_get_event_file(const char *instance,
2964                                              const char *system,
2965                                              const char *event)
2966{
2967        struct trace_array *tr = top_trace_array();
2968        struct trace_event_file *file = NULL;
2969        int ret = -EINVAL;
2970
2971        if (instance) {
2972                tr = trace_array_find_get(instance);
2973                if (!tr)
2974                        return ERR_PTR(-ENOENT);
2975        } else {
2976                ret = trace_array_get(tr);
2977                if (ret)
2978                        return ERR_PTR(ret);
2979        }
2980
2981        mutex_lock(&event_mutex);
2982
2983        file = find_event_file(tr, system, event);
2984        if (!file) {
2985                trace_array_put(tr);
2986                ret = -EINVAL;
2987                goto out;
2988        }
2989
2990        /* Don't let event modules unload while in use */
2991        ret = trace_event_try_get_ref(file->event_call);
2992        if (!ret) {
2993                trace_array_put(tr);
2994                ret = -EBUSY;
2995                goto out;
2996        }
2997
2998        ret = 0;
2999 out:
3000        mutex_unlock(&event_mutex);
3001
3002        if (ret)
3003                file = ERR_PTR(ret);
3004
3005        return file;
3006}
3007EXPORT_SYMBOL_GPL(trace_get_event_file);
3008
3009/**
3010 * trace_put_event_file - Release a file from trace_get_event_file()
3011 * @file: The trace event file
3012 *
3013 * If a file was retrieved using trace_get_event_file(), this should
3014 * be called when it's no longer needed.  It will cancel the previous
3015 * trace_array_get() called by that function, and decrement the
3016 * event's module refcount.
3017 */
3018void trace_put_event_file(struct trace_event_file *file)
3019{
3020        mutex_lock(&event_mutex);
3021        trace_event_put_ref(file->event_call);
3022        mutex_unlock(&event_mutex);
3023
3024        trace_array_put(file->tr);
3025}
3026EXPORT_SYMBOL_GPL(trace_put_event_file);
3027
3028#ifdef CONFIG_DYNAMIC_FTRACE
3029
3030/* Avoid typos */
3031#define ENABLE_EVENT_STR        "enable_event"
3032#define DISABLE_EVENT_STR       "disable_event"
3033
3034struct event_probe_data {
3035        struct trace_event_file *file;
3036        unsigned long                   count;
3037        int                             ref;
3038        bool                            enable;
3039};
3040
3041static void update_event_probe(struct event_probe_data *data)
3042{
3043        if (data->enable)
3044                clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3045        else
3046                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3047}
3048
3049static void
3050event_enable_probe(unsigned long ip, unsigned long parent_ip,
3051                   struct trace_array *tr, struct ftrace_probe_ops *ops,
3052                   void *data)
3053{
3054        struct ftrace_func_mapper *mapper = data;
3055        struct event_probe_data *edata;
3056        void **pdata;
3057
3058        pdata = ftrace_func_mapper_find_ip(mapper, ip);
3059        if (!pdata || !*pdata)
3060                return;
3061
3062        edata = *pdata;
3063        update_event_probe(edata);
3064}
3065
3066static void
3067event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
3068                         struct trace_array *tr, struct ftrace_probe_ops *ops,
3069                         void *data)
3070{
3071        struct ftrace_func_mapper *mapper = data;
3072        struct event_probe_data *edata;
3073        void **pdata;
3074
3075        pdata = ftrace_func_mapper_find_ip(mapper, ip);
3076        if (!pdata || !*pdata)
3077                return;
3078
3079        edata = *pdata;
3080
3081        if (!edata->count)
3082                return;
3083
3084        /* Skip if the event is in a state we want to switch to */
3085        if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
3086                return;
3087
3088        if (edata->count != -1)
3089                (edata->count)--;
3090
3091        update_event_probe(edata);
3092}
3093
3094static int
3095event_enable_print(struct seq_file *m, unsigned long ip,
3096                   struct ftrace_probe_ops *ops, void *data)
3097{
3098        struct ftrace_func_mapper *mapper = data;
3099        struct event_probe_data *edata;
3100        void **pdata;
3101
3102        pdata = ftrace_func_mapper_find_ip(mapper, ip);
3103
3104        if (WARN_ON_ONCE(!pdata || !*pdata))
3105                return 0;
3106
3107        edata = *pdata;
3108
3109        seq_printf(m, "%ps:", (void *)ip);
3110
3111        seq_printf(m, "%s:%s:%s",
3112                   edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
3113                   edata->file->event_call->class->system,
3114                   trace_event_name(edata->file->event_call));
3115
3116        if (edata->count == -1)
3117                seq_puts(m, ":unlimited\n");
3118        else
3119                seq_printf(m, ":count=%ld\n", edata->count);
3120
3121        return 0;
3122}
3123
3124static int
3125event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
3126                  unsigned long ip, void *init_data, void **data)
3127{
3128        struct ftrace_func_mapper *mapper = *data;
3129        struct event_probe_data *edata = init_data;
3130        int ret;
3131
3132        if (!mapper) {
3133                mapper = allocate_ftrace_func_mapper();
3134                if (!mapper)
3135                        return -ENODEV;
3136                *data = mapper;
3137        }
3138
3139        ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
3140        if (ret < 0)
3141                return ret;
3142
3143        edata->ref++;
3144
3145        return 0;
3146}
3147
3148static int free_probe_data(void *data)
3149{
3150        struct event_probe_data *edata = data;
3151
3152        edata->ref--;
3153        if (!edata->ref) {
3154                /* Remove the SOFT_MODE flag */
3155                __ftrace_event_enable_disable(edata->file, 0, 1);
3156                trace_event_put_ref(edata->file->event_call);
3157                kfree(edata);
3158        }
3159        return 0;
3160}
3161
3162static void
3163event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
3164                  unsigned long ip, void *data)
3165{
3166        struct ftrace_func_mapper *mapper = data;
3167        struct event_probe_data *edata;
3168
3169        if (!ip) {
3170                if (!mapper)
3171                        return;
3172                free_ftrace_func_mapper(mapper, free_probe_data);
3173                return;
3174        }
3175
3176        edata = ftrace_func_mapper_remove_ip(mapper, ip);
3177
3178        if (WARN_ON_ONCE(!edata))
3179                return;
3180
3181        if (WARN_ON_ONCE(edata->ref <= 0))
3182                return;
3183
3184        free_probe_data(edata);
3185}
3186
3187static struct ftrace_probe_ops event_enable_probe_ops = {
3188        .func                   = event_enable_probe,
3189        .print                  = event_enable_print,
3190        .init                   = event_enable_init,
3191        .free                   = event_enable_free,
3192};
3193
3194static struct ftrace_probe_ops event_enable_count_probe_ops = {
3195        .func                   = event_enable_count_probe,
3196        .print                  = event_enable_print,
3197        .init                   = event_enable_init,
3198        .free                   = event_enable_free,
3199};
3200
3201static struct ftrace_probe_ops event_disable_probe_ops = {
3202        .func                   = event_enable_probe,
3203        .print                  = event_enable_print,
3204        .init                   = event_enable_init,
3205        .free                   = event_enable_free,
3206};
3207
3208static struct ftrace_probe_ops event_disable_count_probe_ops = {
3209        .func                   = event_enable_count_probe,
3210        .print                  = event_enable_print,
3211        .init                   = event_enable_init,
3212        .free                   = event_enable_free,
3213};
3214
3215static int
3216event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
3217                  char *glob, char *cmd, char *param, int enabled)
3218{
3219        struct trace_event_file *file;
3220        struct ftrace_probe_ops *ops;
3221        struct event_probe_data *data;
3222        const char *system;
3223        const char *event;
3224        char *number;
3225        bool enable;
3226        int ret;
3227
3228        if (!tr)
3229                return -ENODEV;
3230
3231        /* hash funcs only work with set_ftrace_filter */
3232        if (!enabled || !param)
3233                return -EINVAL;
3234
3235        system = strsep(&param, ":");
3236        if (!param)
3237                return -EINVAL;
3238
3239        event = strsep(&param, ":");
3240
3241        mutex_lock(&event_mutex);
3242
3243        ret = -EINVAL;
3244        file = find_event_file(tr, system, event);
3245        if (!file)
3246                goto out;
3247
3248        enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
3249
3250        if (enable)
3251                ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
3252        else
3253                ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
3254
3255        if (glob[0] == '!') {
3256                ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
3257                goto out;
3258        }
3259
3260        ret = -ENOMEM;
3261
3262        data = kzalloc(sizeof(*data), GFP_KERNEL);
3263        if (!data)
3264                goto out;
3265
3266        data->enable = enable;
3267        data->count = -1;
3268        data->file = file;
3269
3270        if (!param)
3271                goto out_reg;
3272
3273        number = strsep(&param, ":");
3274
3275        ret = -EINVAL;
3276        if (!strlen(number))
3277                goto out_free;
3278
3279        /*
3280         * We use the callback data field (which is a pointer)
3281         * as our counter.
3282         */
3283        ret = kstrtoul(number, 0, &data->count);
3284        if (ret)
3285                goto out_free;
3286
3287 out_reg:
3288        /* Don't let event modules unload while probe registered */
3289        ret = trace_event_try_get_ref(file->event_call);
3290        if (!ret) {
3291                ret = -EBUSY;
3292                goto out_free;
3293        }
3294
3295        ret = __ftrace_event_enable_disable(file, 1, 1);
3296        if (ret < 0)
3297                goto out_put;
3298
3299        ret = register_ftrace_function_probe(glob, tr, ops, data);
3300        /*
3301         * The above returns on success the # of functions enabled,
3302         * but if it didn't find any functions it returns zero.
3303         * Consider no functions a failure too.
3304         */
3305        if (!ret) {
3306                ret = -ENOENT;
3307                goto out_disable;
3308        } else if (ret < 0)
3309                goto out_disable;
3310        /* Just return zero, not the number of enabled functions */
3311        ret = 0;
3312 out:
3313        mutex_unlock(&event_mutex);
3314        return ret;
3315
3316 out_disable:
3317        __ftrace_event_enable_disable(file, 0, 1);
3318 out_put:
3319        trace_event_put_ref(file->event_call);
3320 out_free:
3321        kfree(data);
3322        goto out;
3323}
3324
3325static struct ftrace_func_command event_enable_cmd = {
3326        .name                   = ENABLE_EVENT_STR,
3327        .func                   = event_enable_func,
3328};
3329
3330static struct ftrace_func_command event_disable_cmd = {
3331        .name                   = DISABLE_EVENT_STR,
3332        .func                   = event_enable_func,
3333};
3334
3335static __init int register_event_cmds(void)
3336{
3337        int ret;
3338
3339        ret = register_ftrace_command(&event_enable_cmd);
3340        if (WARN_ON(ret < 0))
3341                return ret;
3342        ret = register_ftrace_command(&event_disable_cmd);
3343        if (WARN_ON(ret < 0))
3344                unregister_ftrace_command(&event_enable_cmd);
3345        return ret;
3346}
3347#else
3348static inline int register_event_cmds(void) { return 0; }
3349#endif /* CONFIG_DYNAMIC_FTRACE */
3350
3351/*
3352 * The top level array and trace arrays created by boot-time tracing
3353 * have already had its trace_event_file descriptors created in order
3354 * to allow for early events to be recorded.
3355 * This function is called after the tracefs has been initialized,
3356 * and we now have to create the files associated to the events.
3357 */
3358static void __trace_early_add_event_dirs(struct trace_array *tr)
3359{
3360        struct trace_event_file *file;
3361        int ret;
3362
3363
3364        list_for_each_entry(file, &tr->events, list) {
3365                ret = event_create_dir(tr->event_dir, file);
3366                if (ret < 0)
3367                        pr_warn("Could not create directory for event %s\n",
3368                                trace_event_name(file->event_call));
3369        }
3370}
3371
3372/*
3373 * For early boot up, the top trace array and the trace arrays created
3374 * by boot-time tracing require to have a list of events that can be
3375 * enabled. This must be done before the filesystem is set up in order
3376 * to allow events to be traced early.
3377 */
3378void __trace_early_add_events(struct trace_array *tr)
3379{
3380        struct trace_event_call *call;
3381        int ret;
3382
3383        list_for_each_entry(call, &ftrace_events, list) {
3384                /* Early boot up should not have any modules loaded */
3385                if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
3386                    WARN_ON_ONCE(call->module))
3387                        continue;
3388
3389                ret = __trace_early_add_new_event(call, tr);
3390                if (ret < 0)
3391                        pr_warn("Could not create early event %s\n",
3392                                trace_event_name(call));
3393        }
3394}
3395
3396/* Remove the event directory structure for a trace directory. */
3397static void
3398__trace_remove_event_dirs(struct trace_array *tr)
3399{
3400        struct trace_event_file *file, *next;
3401
3402        list_for_each_entry_safe(file, next, &tr->events, list)
3403                remove_event_file_dir(file);
3404}
3405
3406static void __add_event_to_tracers(struct trace_event_call *call)
3407{
3408        struct trace_array *tr;
3409
3410        list_for_each_entry(tr, &ftrace_trace_arrays, list)
3411                __trace_add_new_event(call, tr);
3412}
3413
3414extern struct trace_event_call *__start_ftrace_events[];
3415extern struct trace_event_call *__stop_ftrace_events[];
3416
3417static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
3418
3419static __init int setup_trace_event(char *str)
3420{
3421        strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
3422        ring_buffer_expanded = true;
3423        disable_tracing_selftest("running event tracing");
3424
3425        return 1;
3426}
3427__setup("trace_event=", setup_trace_event);
3428
3429/* Expects to have event_mutex held when called */
3430static int
3431create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
3432{
3433        struct dentry *d_events;
3434        struct dentry *entry;
3435
3436        entry = tracefs_create_file("set_event", 0644, parent,
3437                                    tr, &ftrace_set_event_fops);
3438        if (!entry) {
3439                pr_warn("Could not create tracefs 'set_event' entry\n");
3440                return -ENOMEM;
3441        }
3442
3443        d_events = tracefs_create_dir("events", parent);
3444        if (!d_events) {
3445                pr_warn("Could not create tracefs 'events' directory\n");
3446                return -ENOMEM;
3447        }
3448
3449        entry = trace_create_file("enable", 0644, d_events,
3450                                  tr, &ftrace_tr_enable_fops);
3451        if (!entry) {
3452                pr_warn("Could not create tracefs 'enable' entry\n");
3453                return -ENOMEM;
3454        }
3455
3456        /* There are not as crucial, just warn if they are not created */
3457
3458        entry = tracefs_create_file("set_event_pid", 0644, parent,
3459                                    tr, &ftrace_set_event_pid_fops);
3460        if (!entry)
3461                pr_warn("Could not create tracefs 'set_event_pid' entry\n");
3462
3463        entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
3464                                    tr, &ftrace_set_event_notrace_pid_fops);
3465        if (!entry)
3466                pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
3467
3468        /* ring buffer internal formats */
3469        entry = trace_create_file("header_page", 0444, d_events,
3470                                  ring_buffer_print_page_header,
3471                                  &ftrace_show_header_fops);
3472        if (!entry)
3473                pr_warn("Could not create tracefs 'header_page' entry\n");
3474
3475        entry = trace_create_file("header_event", 0444, d_events,
3476                                  ring_buffer_print_entry_header,
3477                                  &ftrace_show_header_fops);
3478        if (!entry)
3479                pr_warn("Could not create tracefs 'header_event' entry\n");
3480
3481        tr->event_dir = d_events;
3482
3483        return 0;
3484}
3485
3486/**
3487 * event_trace_add_tracer - add a instance of a trace_array to events
3488 * @parent: The parent dentry to place the files/directories for events in
3489 * @tr: The trace array associated with these events
3490 *
3491 * When a new instance is created, it needs to set up its events
3492 * directory, as well as other files associated with events. It also
3493 * creates the event hierarchy in the @parent/events directory.
3494 *
3495 * Returns 0 on success.
3496 *
3497 * Must be called with event_mutex held.
3498 */
3499int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
3500{
3501        int ret;
3502
3503        lockdep_assert_held(&event_mutex);
3504
3505        ret = create_event_toplevel_files(parent, tr);
3506        if (ret)
3507                goto out;
3508
3509        down_write(&trace_event_sem);
3510        /* If tr already has the event list, it is initialized in early boot. */
3511        if (unlikely(!list_empty(&tr->events)))
3512                __trace_early_add_event_dirs(tr);
3513        else
3514                __trace_add_event_dirs(tr);
3515        up_write(&trace_event_sem);
3516
3517 out:
3518        return ret;
3519}
3520
3521/*
3522 * The top trace array already had its file descriptors created.
3523 * Now the files themselves need to be created.
3524 */
3525static __init int
3526early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
3527{
3528        int ret;
3529
3530        mutex_lock(&event_mutex);
3531
3532        ret = create_event_toplevel_files(parent, tr);
3533        if (ret)
3534                goto out_unlock;
3535
3536        down_write(&trace_event_sem);
3537        __trace_early_add_event_dirs(tr);
3538        up_write(&trace_event_sem);
3539
3540 out_unlock:
3541        mutex_unlock(&event_mutex);
3542
3543        return ret;
3544}
3545
3546/* Must be called with event_mutex held */
3547int event_trace_del_tracer(struct trace_array *tr)
3548{
3549        lockdep_assert_held(&event_mutex);
3550
3551        /* Disable any event triggers and associated soft-disabled events */
3552        clear_event_triggers(tr);
3553
3554        /* Clear the pid list */
3555        __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
3556
3557        /* Disable any running events */
3558        __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3559
3560        /* Make sure no more events are being executed */
3561        tracepoint_synchronize_unregister();
3562
3563        down_write(&trace_event_sem);
3564        __trace_remove_event_dirs(tr);
3565        tracefs_remove(tr->event_dir);
3566        up_write(&trace_event_sem);
3567
3568        tr->event_dir = NULL;
3569
3570        return 0;
3571}
3572
3573static __init int event_trace_memsetup(void)
3574{
3575        field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3576        file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3577        return 0;
3578}
3579
3580static __init void
3581early_enable_events(struct trace_array *tr, bool disable_first)
3582{
3583        char *buf = bootup_event_buf;
3584        char *token;
3585        int ret;
3586
3587        while (true) {
3588                token = strsep(&buf, ",");
3589
3590                if (!token)
3591                        break;
3592
3593                if (*token) {
3594                        /* Restarting syscalls requires that we stop them first */
3595                        if (disable_first)
3596                                ftrace_set_clr_event(tr, token, 0);
3597
3598                        ret = ftrace_set_clr_event(tr, token, 1);
3599                        if (ret)
3600                                pr_warn("Failed to enable trace event: %s\n", token);
3601                }
3602
3603                /* Put back the comma to allow this to be called again */
3604                if (buf)
3605                        *(buf - 1) = ',';
3606        }
3607}
3608
3609static __init int event_trace_enable(void)
3610{
3611        struct trace_array *tr = top_trace_array();
3612        struct trace_event_call **iter, *call;
3613        int ret;
3614
3615        if (!tr)
3616                return -ENODEV;
3617
3618        for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3619
3620                call = *iter;
3621                ret = event_init(call);
3622                if (!ret)
3623                        list_add(&call->list, &ftrace_events);
3624        }
3625
3626        /*
3627         * We need the top trace array to have a working set of trace
3628         * points at early init, before the debug files and directories
3629         * are created. Create the file entries now, and attach them
3630         * to the actual file dentries later.
3631         */
3632        __trace_early_add_events(tr);
3633
3634        early_enable_events(tr, false);
3635
3636        trace_printk_start_comm();
3637
3638        register_event_cmds();
3639
3640        register_trigger_cmds();
3641
3642        return 0;
3643}
3644
3645/*
3646 * event_trace_enable() is called from trace_event_init() first to
3647 * initialize events and perhaps start any events that are on the
3648 * command line. Unfortunately, there are some events that will not
3649 * start this early, like the system call tracepoints that need
3650 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
3651 * event_trace_enable() is called before pid 1 starts, and this flag
3652 * is never set, making the syscall tracepoint never get reached, but
3653 * the event is enabled regardless (and not doing anything).
3654 */
3655static __init int event_trace_enable_again(void)
3656{
3657        struct trace_array *tr;
3658
3659        tr = top_trace_array();
3660        if (!tr)
3661                return -ENODEV;
3662
3663        early_enable_events(tr, true);
3664
3665        return 0;
3666}
3667
3668early_initcall(event_trace_enable_again);
3669
3670/* Init fields which doesn't related to the tracefs */
3671static __init int event_trace_init_fields(void)
3672{
3673        if (trace_define_generic_fields())
3674                pr_warn("tracing: Failed to allocated generic fields");
3675
3676        if (trace_define_common_fields())
3677                pr_warn("tracing: Failed to allocate common fields");
3678
3679        return 0;
3680}
3681
3682__init int event_trace_init(void)
3683{
3684        struct trace_array *tr;
3685        struct dentry *entry;
3686        int ret;
3687
3688        tr = top_trace_array();
3689        if (!tr)
3690                return -ENODEV;
3691
3692        entry = tracefs_create_file("available_events", 0444, NULL,
3693                                    tr, &ftrace_avail_fops);
3694        if (!entry)
3695                pr_warn("Could not create tracefs 'available_events' entry\n");
3696
3697        ret = early_event_add_tracer(NULL, tr);
3698        if (ret)
3699                return ret;
3700
3701#ifdef CONFIG_MODULES
3702        ret = register_module_notifier(&trace_module_nb);
3703        if (ret)
3704                pr_warn("Failed to register trace events module notifier\n");
3705#endif
3706
3707        eventdir_initialized = true;
3708
3709        return 0;
3710}
3711
3712void __init trace_event_init(void)
3713{
3714        event_trace_memsetup();
3715        init_ftrace_syscalls();
3716        event_trace_enable();
3717        event_trace_init_fields();
3718}
3719
3720#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
3721
3722static DEFINE_SPINLOCK(test_spinlock);
3723static DEFINE_SPINLOCK(test_spinlock_irq);
3724static DEFINE_MUTEX(test_mutex);
3725
3726static __init void test_work(struct work_struct *dummy)
3727{
3728        spin_lock(&test_spinlock);
3729        spin_lock_irq(&test_spinlock_irq);
3730        udelay(1);
3731        spin_unlock_irq(&test_spinlock_irq);
3732        spin_unlock(&test_spinlock);
3733
3734        mutex_lock(&test_mutex);
3735        msleep(1);
3736        mutex_unlock(&test_mutex);
3737}
3738
3739static __init int event_test_thread(void *unused)
3740{
3741        void *test_malloc;
3742
3743        test_malloc = kmalloc(1234, GFP_KERNEL);
3744        if (!test_malloc)
3745                pr_info("failed to kmalloc\n");
3746
3747        schedule_on_each_cpu(test_work);
3748
3749        kfree(test_malloc);
3750
3751        set_current_state(TASK_INTERRUPTIBLE);
3752        while (!kthread_should_stop()) {
3753                schedule();
3754                set_current_state(TASK_INTERRUPTIBLE);
3755        }
3756        __set_current_state(TASK_RUNNING);
3757
3758        return 0;
3759}
3760
3761/*
3762 * Do various things that may trigger events.
3763 */
3764static __init void event_test_stuff(void)
3765{
3766        struct task_struct *test_thread;
3767
3768        test_thread = kthread_run(event_test_thread, NULL, "test-events");
3769        msleep(1);
3770        kthread_stop(test_thread);
3771}
3772
3773/*
3774 * For every trace event defined, we will test each trace point separately,
3775 * and then by groups, and finally all trace points.
3776 */
3777static __init void event_trace_self_tests(void)
3778{
3779        struct trace_subsystem_dir *dir;
3780        struct trace_event_file *file;
3781        struct trace_event_call *call;
3782        struct event_subsystem *system;
3783        struct trace_array *tr;
3784        int ret;
3785
3786        tr = top_trace_array();
3787        if (!tr)
3788                return;
3789
3790        pr_info("Running tests on trace events:\n");
3791
3792        list_for_each_entry(file, &tr->events, list) {
3793
3794                call = file->event_call;
3795
3796                /* Only test those that have a probe */
3797                if (!call->class || !call->class->probe)
3798                        continue;
3799
3800/*
3801 * Testing syscall events here is pretty useless, but
3802 * we still do it if configured. But this is time consuming.
3803 * What we really need is a user thread to perform the
3804 * syscalls as we test.
3805 */
3806#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3807                if (call->class->system &&
3808                    strcmp(call->class->system, "syscalls") == 0)
3809                        continue;
3810#endif
3811
3812                pr_info("Testing event %s: ", trace_event_name(call));
3813
3814                /*
3815                 * If an event is already enabled, someone is using
3816                 * it and the self test should not be on.
3817                 */
3818                if (file->flags & EVENT_FILE_FL_ENABLED) {
3819                        pr_warn("Enabled event during self test!\n");
3820                        WARN_ON_ONCE(1);
3821                        continue;
3822                }
3823
3824                ftrace_event_enable_disable(file, 1);
3825                event_test_stuff();
3826                ftrace_event_enable_disable(file, 0);
3827
3828                pr_cont("OK\n");
3829        }
3830
3831        /* Now test at the sub system level */
3832
3833        pr_info("Running tests on trace event systems:\n");
3834
3835        list_for_each_entry(dir, &tr->systems, list) {
3836
3837                system = dir->subsystem;
3838
3839                /* the ftrace system is special, skip it */
3840                if (strcmp(system->name, "ftrace") == 0)
3841                        continue;
3842
3843                pr_info("Testing event system %s: ", system->name);
3844
3845                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3846                if (WARN_ON_ONCE(ret)) {
3847                        pr_warn("error enabling system %s\n",
3848                                system->name);
3849                        continue;
3850                }
3851
3852                event_test_stuff();
3853
3854                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3855                if (WARN_ON_ONCE(ret)) {
3856                        pr_warn("error disabling system %s\n",
3857                                system->name);
3858                        continue;
3859                }
3860
3861                pr_cont("OK\n");
3862        }
3863
3864        /* Test with all events enabled */
3865
3866        pr_info("Running tests on all trace events:\n");
3867        pr_info("Testing all events: ");
3868
3869        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3870        if (WARN_ON_ONCE(ret)) {
3871                pr_warn("error enabling all events\n");
3872                return;
3873        }
3874
3875        event_test_stuff();
3876
3877        /* reset sysname */
3878        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3879        if (WARN_ON_ONCE(ret)) {
3880                pr_warn("error disabling all events\n");
3881                return;
3882        }
3883
3884        pr_cont("OK\n");
3885}
3886
3887#ifdef CONFIG_FUNCTION_TRACER
3888
3889static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3890
3891static struct trace_event_file event_trace_file __initdata;
3892
3893static void __init
3894function_test_events_call(unsigned long ip, unsigned long parent_ip,
3895                          struct ftrace_ops *op, struct ftrace_regs *regs)
3896{
3897        struct trace_buffer *buffer;
3898        struct ring_buffer_event *event;
3899        struct ftrace_entry *entry;
3900        unsigned int trace_ctx;
3901        long disabled;
3902        int cpu;
3903
3904        trace_ctx = tracing_gen_ctx();
3905        preempt_disable_notrace();
3906        cpu = raw_smp_processor_id();
3907        disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3908
3909        if (disabled != 1)
3910                goto out;
3911
3912        event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
3913                                                TRACE_FN, sizeof(*entry),
3914                                                trace_ctx);
3915        if (!event)
3916                goto out;
3917        entry   = ring_buffer_event_data(event);
3918        entry->ip                       = ip;
3919        entry->parent_ip                = parent_ip;
3920
3921        event_trigger_unlock_commit(&event_trace_file, buffer, event,
3922                                    entry, trace_ctx);
3923 out:
3924        atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3925        preempt_enable_notrace();
3926}
3927
3928static struct ftrace_ops trace_ops __initdata  =
3929{
3930        .func = function_test_events_call,
3931};
3932
3933static __init void event_trace_self_test_with_function(void)
3934{
3935        int ret;
3936
3937        event_trace_file.tr = top_trace_array();
3938        if (WARN_ON(!event_trace_file.tr))
3939                return;
3940
3941        ret = register_ftrace_function(&trace_ops);
3942        if (WARN_ON(ret < 0)) {
3943                pr_info("Failed to enable function tracer for event tests\n");
3944                return;
3945        }
3946        pr_info("Running tests again, along with the function tracer\n");
3947        event_trace_self_tests();
3948        unregister_ftrace_function(&trace_ops);
3949}
3950#else
3951static __init void event_trace_self_test_with_function(void)
3952{
3953}
3954#endif
3955
3956static __init int event_trace_self_tests_init(void)
3957{
3958        if (!tracing_selftest_disabled) {
3959                event_trace_self_tests();
3960                event_trace_self_test_with_function();
3961        }
3962
3963        return 0;
3964}
3965
3966late_initcall(event_trace_self_tests_init);
3967
3968#endif
3969