linux/kernel/trace/trace_events.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * event tracer
   4 *
   5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   6 *
   7 *  - Added format output of fields of the trace point.
   8 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   9 *
  10 */
  11
  12#define pr_fmt(fmt) fmt
  13
  14#include <linux/workqueue.h>
  15#include <linux/security.h>
  16#include <linux/spinlock.h>
  17#include <linux/kthread.h>
  18#include <linux/tracefs.h>
  19#include <linux/uaccess.h>
  20#include <linux/module.h>
  21#include <linux/ctype.h>
  22#include <linux/sort.h>
  23#include <linux/slab.h>
  24#include <linux/delay.h>
  25
  26#include <trace/events/sched.h>
  27
  28#include <asm/setup.h>
  29
  30#include "trace_output.h"
  31
  32#undef TRACE_SYSTEM
  33#define TRACE_SYSTEM "TRACE_SYSTEM"
  34
  35DEFINE_MUTEX(event_mutex);
  36
  37LIST_HEAD(ftrace_events);
  38static LIST_HEAD(ftrace_generic_fields);
  39static LIST_HEAD(ftrace_common_fields);
  40
  41#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
  42
  43static struct kmem_cache *field_cachep;
  44static struct kmem_cache *file_cachep;
  45
  46static inline int system_refcount(struct event_subsystem *system)
  47{
  48        return system->ref_count;
  49}
  50
  51static int system_refcount_inc(struct event_subsystem *system)
  52{
  53        return system->ref_count++;
  54}
  55
  56static int system_refcount_dec(struct event_subsystem *system)
  57{
  58        return --system->ref_count;
  59}
  60
  61/* Double loops, do not use break, only goto's work */
  62#define do_for_each_event_file(tr, file)                        \
  63        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  64                list_for_each_entry(file, &tr->events, list)
  65
  66#define do_for_each_event_file_safe(tr, file)                   \
  67        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  68                struct trace_event_file *___n;                          \
  69                list_for_each_entry_safe(file, ___n, &tr->events, list)
  70
  71#define while_for_each_event_file()             \
  72        }
  73
  74static struct ftrace_event_field *
  75__find_event_field(struct list_head *head, char *name)
  76{
  77        struct ftrace_event_field *field;
  78
  79        list_for_each_entry(field, head, link) {
  80                if (!strcmp(field->name, name))
  81                        return field;
  82        }
  83
  84        return NULL;
  85}
  86
  87struct ftrace_event_field *
  88trace_find_event_field(struct trace_event_call *call, char *name)
  89{
  90        struct ftrace_event_field *field;
  91        struct list_head *head;
  92
  93        head = trace_get_fields(call);
  94        field = __find_event_field(head, name);
  95        if (field)
  96                return field;
  97
  98        field = __find_event_field(&ftrace_generic_fields, name);
  99        if (field)
 100                return field;
 101
 102        return __find_event_field(&ftrace_common_fields, name);
 103}
 104
 105static int __trace_define_field(struct list_head *head, const char *type,
 106                                const char *name, int offset, int size,
 107                                int is_signed, int filter_type)
 108{
 109        struct ftrace_event_field *field;
 110
 111        field = kmem_cache_alloc(field_cachep, GFP_TRACE);
 112        if (!field)
 113                return -ENOMEM;
 114
 115        field->name = name;
 116        field->type = type;
 117
 118        if (filter_type == FILTER_OTHER)
 119                field->filter_type = filter_assign_type(type);
 120        else
 121                field->filter_type = filter_type;
 122
 123        field->offset = offset;
 124        field->size = size;
 125        field->is_signed = is_signed;
 126
 127        list_add(&field->link, head);
 128
 129        return 0;
 130}
 131
 132int trace_define_field(struct trace_event_call *call, const char *type,
 133                       const char *name, int offset, int size, int is_signed,
 134                       int filter_type)
 135{
 136        struct list_head *head;
 137
 138        if (WARN_ON(!call->class))
 139                return 0;
 140
 141        head = trace_get_fields(call);
 142        return __trace_define_field(head, type, name, offset, size,
 143                                    is_signed, filter_type);
 144}
 145EXPORT_SYMBOL_GPL(trace_define_field);
 146
 147#define __generic_field(type, item, filter_type)                        \
 148        ret = __trace_define_field(&ftrace_generic_fields, #type,       \
 149                                   #item, 0, 0, is_signed_type(type),   \
 150                                   filter_type);                        \
 151        if (ret)                                                        \
 152                return ret;
 153
 154#define __common_field(type, item)                                      \
 155        ret = __trace_define_field(&ftrace_common_fields, #type,        \
 156                                   "common_" #item,                     \
 157                                   offsetof(typeof(ent), item),         \
 158                                   sizeof(ent.item),                    \
 159                                   is_signed_type(type), FILTER_OTHER); \
 160        if (ret)                                                        \
 161                return ret;
 162
 163static int trace_define_generic_fields(void)
 164{
 165        int ret;
 166
 167        __generic_field(int, CPU, FILTER_CPU);
 168        __generic_field(int, cpu, FILTER_CPU);
 169        __generic_field(char *, COMM, FILTER_COMM);
 170        __generic_field(char *, comm, FILTER_COMM);
 171
 172        return ret;
 173}
 174
 175static int trace_define_common_fields(void)
 176{
 177        int ret;
 178        struct trace_entry ent;
 179
 180        __common_field(unsigned short, type);
 181        __common_field(unsigned char, flags);
 182        __common_field(unsigned char, preempt_count);
 183        __common_field(int, pid);
 184
 185        return ret;
 186}
 187
 188static void trace_destroy_fields(struct trace_event_call *call)
 189{
 190        struct ftrace_event_field *field, *next;
 191        struct list_head *head;
 192
 193        head = trace_get_fields(call);
 194        list_for_each_entry_safe(field, next, head, link) {
 195                list_del(&field->link);
 196                kmem_cache_free(field_cachep, field);
 197        }
 198}
 199
 200/*
 201 * run-time version of trace_event_get_offsets_<call>() that returns the last
 202 * accessible offset of trace fields excluding __dynamic_array bytes
 203 */
 204int trace_event_get_offsets(struct trace_event_call *call)
 205{
 206        struct ftrace_event_field *tail;
 207        struct list_head *head;
 208
 209        head = trace_get_fields(call);
 210        /*
 211         * head->next points to the last field with the largest offset,
 212         * since it was added last by trace_define_field()
 213         */
 214        tail = list_first_entry(head, struct ftrace_event_field, link);
 215        return tail->offset + tail->size;
 216}
 217
 218int trace_event_raw_init(struct trace_event_call *call)
 219{
 220        int id;
 221
 222        id = register_trace_event(&call->event);
 223        if (!id)
 224                return -ENODEV;
 225
 226        return 0;
 227}
 228EXPORT_SYMBOL_GPL(trace_event_raw_init);
 229
 230bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
 231{
 232        struct trace_array *tr = trace_file->tr;
 233        struct trace_array_cpu *data;
 234        struct trace_pid_list *pid_list;
 235
 236        pid_list = rcu_dereference_raw(tr->filtered_pids);
 237        if (!pid_list)
 238                return false;
 239
 240        data = this_cpu_ptr(tr->trace_buffer.data);
 241
 242        return data->ignore_pid;
 243}
 244EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
 245
 246void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 247                                 struct trace_event_file *trace_file,
 248                                 unsigned long len)
 249{
 250        struct trace_event_call *event_call = trace_file->event_call;
 251
 252        if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
 253            trace_event_ignore_this_pid(trace_file))
 254                return NULL;
 255
 256        local_save_flags(fbuffer->flags);
 257        fbuffer->pc = preempt_count();
 258        /*
 259         * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
 260         * preemption (adding one to the preempt_count). Since we are
 261         * interested in the preempt_count at the time the tracepoint was
 262         * hit, we need to subtract one to offset the increment.
 263         */
 264        if (IS_ENABLED(CONFIG_PREEMPTION))
 265                fbuffer->pc--;
 266        fbuffer->trace_file = trace_file;
 267
 268        fbuffer->event =
 269                trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
 270                                                event_call->event.type, len,
 271                                                fbuffer->flags, fbuffer->pc);
 272        if (!fbuffer->event)
 273                return NULL;
 274
 275        fbuffer->entry = ring_buffer_event_data(fbuffer->event);
 276        return fbuffer->entry;
 277}
 278EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
 279
 280int trace_event_reg(struct trace_event_call *call,
 281                    enum trace_reg type, void *data)
 282{
 283        struct trace_event_file *file = data;
 284
 285        WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
 286        switch (type) {
 287        case TRACE_REG_REGISTER:
 288                return tracepoint_probe_register(call->tp,
 289                                                 call->class->probe,
 290                                                 file);
 291        case TRACE_REG_UNREGISTER:
 292                tracepoint_probe_unregister(call->tp,
 293                                            call->class->probe,
 294                                            file);
 295                return 0;
 296
 297#ifdef CONFIG_PERF_EVENTS
 298        case TRACE_REG_PERF_REGISTER:
 299                return tracepoint_probe_register(call->tp,
 300                                                 call->class->perf_probe,
 301                                                 call);
 302        case TRACE_REG_PERF_UNREGISTER:
 303                tracepoint_probe_unregister(call->tp,
 304                                            call->class->perf_probe,
 305                                            call);
 306                return 0;
 307        case TRACE_REG_PERF_OPEN:
 308        case TRACE_REG_PERF_CLOSE:
 309        case TRACE_REG_PERF_ADD:
 310        case TRACE_REG_PERF_DEL:
 311                return 0;
 312#endif
 313        }
 314        return 0;
 315}
 316EXPORT_SYMBOL_GPL(trace_event_reg);
 317
 318void trace_event_enable_cmd_record(bool enable)
 319{
 320        struct trace_event_file *file;
 321        struct trace_array *tr;
 322
 323        mutex_lock(&event_mutex);
 324        do_for_each_event_file(tr, file) {
 325
 326                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 327                        continue;
 328
 329                if (enable) {
 330                        tracing_start_cmdline_record();
 331                        set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 332                } else {
 333                        tracing_stop_cmdline_record();
 334                        clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 335                }
 336        } while_for_each_event_file();
 337        mutex_unlock(&event_mutex);
 338}
 339
 340void trace_event_enable_tgid_record(bool enable)
 341{
 342        struct trace_event_file *file;
 343        struct trace_array *tr;
 344
 345        mutex_lock(&event_mutex);
 346        do_for_each_event_file(tr, file) {
 347                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 348                        continue;
 349
 350                if (enable) {
 351                        tracing_start_tgid_record();
 352                        set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 353                } else {
 354                        tracing_stop_tgid_record();
 355                        clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
 356                                  &file->flags);
 357                }
 358        } while_for_each_event_file();
 359        mutex_unlock(&event_mutex);
 360}
 361
 362static int __ftrace_event_enable_disable(struct trace_event_file *file,
 363                                         int enable, int soft_disable)
 364{
 365        struct trace_event_call *call = file->event_call;
 366        struct trace_array *tr = file->tr;
 367        unsigned long file_flags = file->flags;
 368        int ret = 0;
 369        int disable;
 370
 371        switch (enable) {
 372        case 0:
 373                /*
 374                 * When soft_disable is set and enable is cleared, the sm_ref
 375                 * reference counter is decremented. If it reaches 0, we want
 376                 * to clear the SOFT_DISABLED flag but leave the event in the
 377                 * state that it was. That is, if the event was enabled and
 378                 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
 379                 * is set we do not want the event to be enabled before we
 380                 * clear the bit.
 381                 *
 382                 * When soft_disable is not set but the SOFT_MODE flag is,
 383                 * we do nothing. Do not disable the tracepoint, otherwise
 384                 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
 385                 */
 386                if (soft_disable) {
 387                        if (atomic_dec_return(&file->sm_ref) > 0)
 388                                break;
 389                        disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
 390                        clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 391                } else
 392                        disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
 393
 394                if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
 395                        clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 396                        if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
 397                                tracing_stop_cmdline_record();
 398                                clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 399                        }
 400
 401                        if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
 402                                tracing_stop_tgid_record();
 403                                clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 404                        }
 405
 406                        call->class->reg(call, TRACE_REG_UNREGISTER, file);
 407                }
 408                /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
 409                if (file->flags & EVENT_FILE_FL_SOFT_MODE)
 410                        set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 411                else
 412                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 413                break;
 414        case 1:
 415                /*
 416                 * When soft_disable is set and enable is set, we want to
 417                 * register the tracepoint for the event, but leave the event
 418                 * as is. That means, if the event was already enabled, we do
 419                 * nothing (but set SOFT_MODE). If the event is disabled, we
 420                 * set SOFT_DISABLED before enabling the event tracepoint, so
 421                 * it still seems to be disabled.
 422                 */
 423                if (!soft_disable)
 424                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 425                else {
 426                        if (atomic_inc_return(&file->sm_ref) > 1)
 427                                break;
 428                        set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 429                }
 430
 431                if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
 432                        bool cmd = false, tgid = false;
 433
 434                        /* Keep the event disabled, when going to SOFT_MODE. */
 435                        if (soft_disable)
 436                                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 437
 438                        if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
 439                                cmd = true;
 440                                tracing_start_cmdline_record();
 441                                set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 442                        }
 443
 444                        if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
 445                                tgid = true;
 446                                tracing_start_tgid_record();
 447                                set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
 448                        }
 449
 450                        ret = call->class->reg(call, TRACE_REG_REGISTER, file);
 451                        if (ret) {
 452                                if (cmd)
 453                                        tracing_stop_cmdline_record();
 454                                if (tgid)
 455                                        tracing_stop_tgid_record();
 456                                pr_info("event trace: Could not enable event "
 457                                        "%s\n", trace_event_name(call));
 458                                break;
 459                        }
 460                        set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 461
 462                        /* WAS_ENABLED gets set but never cleared. */
 463                        set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
 464                }
 465                break;
 466        }
 467
 468        /* Enable or disable use of trace_buffered_event */
 469        if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
 470            (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
 471                if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
 472                        trace_buffered_event_enable();
 473                else
 474                        trace_buffered_event_disable();
 475        }
 476
 477        return ret;
 478}
 479
 480int trace_event_enable_disable(struct trace_event_file *file,
 481                               int enable, int soft_disable)
 482{
 483        return __ftrace_event_enable_disable(file, enable, soft_disable);
 484}
 485
 486static int ftrace_event_enable_disable(struct trace_event_file *file,
 487                                       int enable)
 488{
 489        return __ftrace_event_enable_disable(file, enable, 0);
 490}
 491
 492static void ftrace_clear_events(struct trace_array *tr)
 493{
 494        struct trace_event_file *file;
 495
 496        mutex_lock(&event_mutex);
 497        list_for_each_entry(file, &tr->events, list) {
 498                ftrace_event_enable_disable(file, 0);
 499        }
 500        mutex_unlock(&event_mutex);
 501}
 502
 503static void
 504event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 505{
 506        struct trace_pid_list *pid_list;
 507        struct trace_array *tr = data;
 508
 509        pid_list = rcu_dereference_raw(tr->filtered_pids);
 510        trace_filter_add_remove_task(pid_list, NULL, task);
 511}
 512
 513static void
 514event_filter_pid_sched_process_fork(void *data,
 515                                    struct task_struct *self,
 516                                    struct task_struct *task)
 517{
 518        struct trace_pid_list *pid_list;
 519        struct trace_array *tr = data;
 520
 521        pid_list = rcu_dereference_sched(tr->filtered_pids);
 522        trace_filter_add_remove_task(pid_list, self, task);
 523}
 524
 525void trace_event_follow_fork(struct trace_array *tr, bool enable)
 526{
 527        if (enable) {
 528                register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
 529                                                       tr, INT_MIN);
 530                register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
 531                                                       tr, INT_MAX);
 532        } else {
 533                unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
 534                                                    tr);
 535                unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
 536                                                    tr);
 537        }
 538}
 539
 540static void
 541event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 542                    struct task_struct *prev, struct task_struct *next)
 543{
 544        struct trace_array *tr = data;
 545        struct trace_pid_list *pid_list;
 546
 547        pid_list = rcu_dereference_sched(tr->filtered_pids);
 548
 549        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 550                       trace_ignore_this_task(pid_list, prev) &&
 551                       trace_ignore_this_task(pid_list, next));
 552}
 553
 554static void
 555event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
 556                    struct task_struct *prev, struct task_struct *next)
 557{
 558        struct trace_array *tr = data;
 559        struct trace_pid_list *pid_list;
 560
 561        pid_list = rcu_dereference_sched(tr->filtered_pids);
 562
 563        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 564                       trace_ignore_this_task(pid_list, next));
 565}
 566
 567static void
 568event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 569{
 570        struct trace_array *tr = data;
 571        struct trace_pid_list *pid_list;
 572
 573        /* Nothing to do if we are already tracing */
 574        if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
 575                return;
 576
 577        pid_list = rcu_dereference_sched(tr->filtered_pids);
 578
 579        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 580                       trace_ignore_this_task(pid_list, task));
 581}
 582
 583static void
 584event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 585{
 586        struct trace_array *tr = data;
 587        struct trace_pid_list *pid_list;
 588
 589        /* Nothing to do if we are not tracing */
 590        if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
 591                return;
 592
 593        pid_list = rcu_dereference_sched(tr->filtered_pids);
 594
 595        /* Set tracing if current is enabled */
 596        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 597                       trace_ignore_this_task(pid_list, current));
 598}
 599
 600static void __ftrace_clear_event_pids(struct trace_array *tr)
 601{
 602        struct trace_pid_list *pid_list;
 603        struct trace_event_file *file;
 604        int cpu;
 605
 606        pid_list = rcu_dereference_protected(tr->filtered_pids,
 607                                             lockdep_is_held(&event_mutex));
 608        if (!pid_list)
 609                return;
 610
 611        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
 612        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
 613
 614        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 615        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 616
 617        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
 618        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
 619
 620        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
 621        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
 622
 623        list_for_each_entry(file, &tr->events, list) {
 624                clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 625        }
 626
 627        for_each_possible_cpu(cpu)
 628                per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
 629
 630        rcu_assign_pointer(tr->filtered_pids, NULL);
 631
 632        /* Wait till all users are no longer using pid filtering */
 633        tracepoint_synchronize_unregister();
 634
 635        trace_free_pid_list(pid_list);
 636}
 637
 638static void ftrace_clear_event_pids(struct trace_array *tr)
 639{
 640        mutex_lock(&event_mutex);
 641        __ftrace_clear_event_pids(tr);
 642        mutex_unlock(&event_mutex);
 643}
 644
 645static void __put_system(struct event_subsystem *system)
 646{
 647        struct event_filter *filter = system->filter;
 648
 649        WARN_ON_ONCE(system_refcount(system) == 0);
 650        if (system_refcount_dec(system))
 651                return;
 652
 653        list_del(&system->list);
 654
 655        if (filter) {
 656                kfree(filter->filter_string);
 657                kfree(filter);
 658        }
 659        kfree_const(system->name);
 660        kfree(system);
 661}
 662
 663static void __get_system(struct event_subsystem *system)
 664{
 665        WARN_ON_ONCE(system_refcount(system) == 0);
 666        system_refcount_inc(system);
 667}
 668
 669static void __get_system_dir(struct trace_subsystem_dir *dir)
 670{
 671        WARN_ON_ONCE(dir->ref_count == 0);
 672        dir->ref_count++;
 673        __get_system(dir->subsystem);
 674}
 675
 676static void __put_system_dir(struct trace_subsystem_dir *dir)
 677{
 678        WARN_ON_ONCE(dir->ref_count == 0);
 679        /* If the subsystem is about to be freed, the dir must be too */
 680        WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
 681
 682        __put_system(dir->subsystem);
 683        if (!--dir->ref_count)
 684                kfree(dir);
 685}
 686
 687static void put_system(struct trace_subsystem_dir *dir)
 688{
 689        mutex_lock(&event_mutex);
 690        __put_system_dir(dir);
 691        mutex_unlock(&event_mutex);
 692}
 693
 694static void remove_subsystem(struct trace_subsystem_dir *dir)
 695{
 696        if (!dir)
 697                return;
 698
 699        if (!--dir->nr_events) {
 700                tracefs_remove_recursive(dir->entry);
 701                list_del(&dir->list);
 702                __put_system_dir(dir);
 703        }
 704}
 705
 706static void remove_event_file_dir(struct trace_event_file *file)
 707{
 708        struct dentry *dir = file->dir;
 709        struct dentry *child;
 710
 711        if (dir) {
 712                spin_lock(&dir->d_lock);        /* probably unneeded */
 713                list_for_each_entry(child, &dir->d_subdirs, d_child) {
 714                        if (d_really_is_positive(child))        /* probably unneeded */
 715                                d_inode(child)->i_private = NULL;
 716                }
 717                spin_unlock(&dir->d_lock);
 718
 719                tracefs_remove_recursive(dir);
 720        }
 721
 722        list_del(&file->list);
 723        remove_subsystem(file->system);
 724        free_event_filter(file->filter);
 725        kmem_cache_free(file_cachep, file);
 726}
 727
 728/*
 729 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 730 */
 731static int
 732__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
 733                              const char *sub, const char *event, int set)
 734{
 735        struct trace_event_file *file;
 736        struct trace_event_call *call;
 737        const char *name;
 738        int ret = -EINVAL;
 739        int eret = 0;
 740
 741        list_for_each_entry(file, &tr->events, list) {
 742
 743                call = file->event_call;
 744                name = trace_event_name(call);
 745
 746                if (!name || !call->class || !call->class->reg)
 747                        continue;
 748
 749                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
 750                        continue;
 751
 752                if (match &&
 753                    strcmp(match, name) != 0 &&
 754                    strcmp(match, call->class->system) != 0)
 755                        continue;
 756
 757                if (sub && strcmp(sub, call->class->system) != 0)
 758                        continue;
 759
 760                if (event && strcmp(event, name) != 0)
 761                        continue;
 762
 763                ret = ftrace_event_enable_disable(file, set);
 764
 765                /*
 766                 * Save the first error and return that. Some events
 767                 * may still have been enabled, but let the user
 768                 * know that something went wrong.
 769                 */
 770                if (ret && !eret)
 771                        eret = ret;
 772
 773                ret = eret;
 774        }
 775
 776        return ret;
 777}
 778
 779static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 780                                  const char *sub, const char *event, int set)
 781{
 782        int ret;
 783
 784        mutex_lock(&event_mutex);
 785        ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
 786        mutex_unlock(&event_mutex);
 787
 788        return ret;
 789}
 790
 791int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 792{
 793        char *event = NULL, *sub = NULL, *match;
 794        int ret;
 795
 796        /*
 797         * The buf format can be <subsystem>:<event-name>
 798         *  *:<event-name> means any event by that name.
 799         *  :<event-name> is the same.
 800         *
 801         *  <subsystem>:* means all events in that subsystem
 802         *  <subsystem>: means the same.
 803         *
 804         *  <name> (no ':') means all events in a subsystem with
 805         *  the name <name> or any event that matches <name>
 806         */
 807
 808        match = strsep(&buf, ":");
 809        if (buf) {
 810                sub = match;
 811                event = buf;
 812                match = NULL;
 813
 814                if (!strlen(sub) || strcmp(sub, "*") == 0)
 815                        sub = NULL;
 816                if (!strlen(event) || strcmp(event, "*") == 0)
 817                        event = NULL;
 818        }
 819
 820        ret = __ftrace_set_clr_event(tr, match, sub, event, set);
 821
 822        /* Put back the colon to allow this to be called again */
 823        if (buf)
 824                *(buf - 1) = ':';
 825
 826        return ret;
 827}
 828EXPORT_SYMBOL_GPL(ftrace_set_clr_event);
 829
 830/**
 831 * trace_set_clr_event - enable or disable an event
 832 * @system: system name to match (NULL for any system)
 833 * @event: event name to match (NULL for all events, within system)
 834 * @set: 1 to enable, 0 to disable
 835 *
 836 * This is a way for other parts of the kernel to enable or disable
 837 * event recording.
 838 *
 839 * Returns 0 on success, -EINVAL if the parameters do not match any
 840 * registered events.
 841 */
 842int trace_set_clr_event(const char *system, const char *event, int set)
 843{
 844        struct trace_array *tr = top_trace_array();
 845
 846        if (!tr)
 847                return -ENODEV;
 848
 849        return __ftrace_set_clr_event(tr, NULL, system, event, set);
 850}
 851EXPORT_SYMBOL_GPL(trace_set_clr_event);
 852
 853/* 128 should be much more than enough */
 854#define EVENT_BUF_SIZE          127
 855
 856static ssize_t
 857ftrace_event_write(struct file *file, const char __user *ubuf,
 858                   size_t cnt, loff_t *ppos)
 859{
 860        struct trace_parser parser;
 861        struct seq_file *m = file->private_data;
 862        struct trace_array *tr = m->private;
 863        ssize_t read, ret;
 864
 865        if (!cnt)
 866                return 0;
 867
 868        ret = tracing_update_buffers();
 869        if (ret < 0)
 870                return ret;
 871
 872        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
 873                return -ENOMEM;
 874
 875        read = trace_get_user(&parser, ubuf, cnt, ppos);
 876
 877        if (read >= 0 && trace_parser_loaded((&parser))) {
 878                int set = 1;
 879
 880                if (*parser.buffer == '!')
 881                        set = 0;
 882
 883                ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
 884                if (ret)
 885                        goto out_put;
 886        }
 887
 888        ret = read;
 889
 890 out_put:
 891        trace_parser_put(&parser);
 892
 893        return ret;
 894}
 895
 896static void *
 897t_next(struct seq_file *m, void *v, loff_t *pos)
 898{
 899        struct trace_event_file *file = v;
 900        struct trace_event_call *call;
 901        struct trace_array *tr = m->private;
 902
 903        (*pos)++;
 904
 905        list_for_each_entry_continue(file, &tr->events, list) {
 906                call = file->event_call;
 907                /*
 908                 * The ftrace subsystem is for showing formats only.
 909                 * They can not be enabled or disabled via the event files.
 910                 */
 911                if (call->class && call->class->reg &&
 912                    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 913                        return file;
 914        }
 915
 916        return NULL;
 917}
 918
 919static void *t_start(struct seq_file *m, loff_t *pos)
 920{
 921        struct trace_event_file *file;
 922        struct trace_array *tr = m->private;
 923        loff_t l;
 924
 925        mutex_lock(&event_mutex);
 926
 927        file = list_entry(&tr->events, struct trace_event_file, list);
 928        for (l = 0; l <= *pos; ) {
 929                file = t_next(m, file, &l);
 930                if (!file)
 931                        break;
 932        }
 933        return file;
 934}
 935
 936static void *
 937s_next(struct seq_file *m, void *v, loff_t *pos)
 938{
 939        struct trace_event_file *file = v;
 940        struct trace_array *tr = m->private;
 941
 942        (*pos)++;
 943
 944        list_for_each_entry_continue(file, &tr->events, list) {
 945                if (file->flags & EVENT_FILE_FL_ENABLED)
 946                        return file;
 947        }
 948
 949        return NULL;
 950}
 951
 952static void *s_start(struct seq_file *m, loff_t *pos)
 953{
 954        struct trace_event_file *file;
 955        struct trace_array *tr = m->private;
 956        loff_t l;
 957
 958        mutex_lock(&event_mutex);
 959
 960        file = list_entry(&tr->events, struct trace_event_file, list);
 961        for (l = 0; l <= *pos; ) {
 962                file = s_next(m, file, &l);
 963                if (!file)
 964                        break;
 965        }
 966        return file;
 967}
 968
 969static int t_show(struct seq_file *m, void *v)
 970{
 971        struct trace_event_file *file = v;
 972        struct trace_event_call *call = file->event_call;
 973
 974        if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
 975                seq_printf(m, "%s:", call->class->system);
 976        seq_printf(m, "%s\n", trace_event_name(call));
 977
 978        return 0;
 979}
 980
 981static void t_stop(struct seq_file *m, void *p)
 982{
 983        mutex_unlock(&event_mutex);
 984}
 985
 986static void *
 987p_next(struct seq_file *m, void *v, loff_t *pos)
 988{
 989        struct trace_array *tr = m->private;
 990        struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
 991
 992        return trace_pid_next(pid_list, v, pos);
 993}
 994
 995static void *p_start(struct seq_file *m, loff_t *pos)
 996        __acquires(RCU)
 997{
 998        struct trace_pid_list *pid_list;
 999        struct trace_array *tr = m->private;
1000
1001        /*
1002         * Grab the mutex, to keep calls to p_next() having the same
1003         * tr->filtered_pids as p_start() has.
1004         * If we just passed the tr->filtered_pids around, then RCU would
1005         * have been enough, but doing that makes things more complex.
1006         */
1007        mutex_lock(&event_mutex);
1008        rcu_read_lock_sched();
1009
1010        pid_list = rcu_dereference_sched(tr->filtered_pids);
1011
1012        if (!pid_list)
1013                return NULL;
1014
1015        return trace_pid_start(pid_list, pos);
1016}
1017
1018static void p_stop(struct seq_file *m, void *p)
1019        __releases(RCU)
1020{
1021        rcu_read_unlock_sched();
1022        mutex_unlock(&event_mutex);
1023}
1024
1025static ssize_t
1026event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1027                  loff_t *ppos)
1028{
1029        struct trace_event_file *file;
1030        unsigned long flags;
1031        char buf[4] = "0";
1032
1033        mutex_lock(&event_mutex);
1034        file = event_file_data(filp);
1035        if (likely(file))
1036                flags = file->flags;
1037        mutex_unlock(&event_mutex);
1038
1039        if (!file)
1040                return -ENODEV;
1041
1042        if (flags & EVENT_FILE_FL_ENABLED &&
1043            !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1044                strcpy(buf, "1");
1045
1046        if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1047            flags & EVENT_FILE_FL_SOFT_MODE)
1048                strcat(buf, "*");
1049
1050        strcat(buf, "\n");
1051
1052        return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1053}
1054
1055static ssize_t
1056event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1057                   loff_t *ppos)
1058{
1059        struct trace_event_file *file;
1060        unsigned long val;
1061        int ret;
1062
1063        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1064        if (ret)
1065                return ret;
1066
1067        ret = tracing_update_buffers();
1068        if (ret < 0)
1069                return ret;
1070
1071        switch (val) {
1072        case 0:
1073        case 1:
1074                ret = -ENODEV;
1075                mutex_lock(&event_mutex);
1076                file = event_file_data(filp);
1077                if (likely(file))
1078                        ret = ftrace_event_enable_disable(file, val);
1079                mutex_unlock(&event_mutex);
1080                break;
1081
1082        default:
1083                return -EINVAL;
1084        }
1085
1086        *ppos += cnt;
1087
1088        return ret ? ret : cnt;
1089}
1090
1091static ssize_t
1092system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1093                   loff_t *ppos)
1094{
1095        const char set_to_char[4] = { '?', '0', '1', 'X' };
1096        struct trace_subsystem_dir *dir = filp->private_data;
1097        struct event_subsystem *system = dir->subsystem;
1098        struct trace_event_call *call;
1099        struct trace_event_file *file;
1100        struct trace_array *tr = dir->tr;
1101        char buf[2];
1102        int set = 0;
1103        int ret;
1104
1105        mutex_lock(&event_mutex);
1106        list_for_each_entry(file, &tr->events, list) {
1107                call = file->event_call;
1108                if (!trace_event_name(call) || !call->class || !call->class->reg)
1109                        continue;
1110
1111                if (system && strcmp(call->class->system, system->name) != 0)
1112                        continue;
1113
1114                /*
1115                 * We need to find out if all the events are set
1116                 * or if all events or cleared, or if we have
1117                 * a mixture.
1118                 */
1119                set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1120
1121                /*
1122                 * If we have a mixture, no need to look further.
1123                 */
1124                if (set == 3)
1125                        break;
1126        }
1127        mutex_unlock(&event_mutex);
1128
1129        buf[0] = set_to_char[set];
1130        buf[1] = '\n';
1131
1132        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1133
1134        return ret;
1135}
1136
1137static ssize_t
1138system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1139                    loff_t *ppos)
1140{
1141        struct trace_subsystem_dir *dir = filp->private_data;
1142        struct event_subsystem *system = dir->subsystem;
1143        const char *name = NULL;
1144        unsigned long val;
1145        ssize_t ret;
1146
1147        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1148        if (ret)
1149                return ret;
1150
1151        ret = tracing_update_buffers();
1152        if (ret < 0)
1153                return ret;
1154
1155        if (val != 0 && val != 1)
1156                return -EINVAL;
1157
1158        /*
1159         * Opening of "enable" adds a ref count to system,
1160         * so the name is safe to use.
1161         */
1162        if (system)
1163                name = system->name;
1164
1165        ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1166        if (ret)
1167                goto out;
1168
1169        ret = cnt;
1170
1171out:
1172        *ppos += cnt;
1173
1174        return ret;
1175}
1176
1177enum {
1178        FORMAT_HEADER           = 1,
1179        FORMAT_FIELD_SEPERATOR  = 2,
1180        FORMAT_PRINTFMT         = 3,
1181};
1182
1183static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1184{
1185        struct trace_event_call *call = event_file_data(m->private);
1186        struct list_head *common_head = &ftrace_common_fields;
1187        struct list_head *head = trace_get_fields(call);
1188        struct list_head *node = v;
1189
1190        (*pos)++;
1191
1192        switch ((unsigned long)v) {
1193        case FORMAT_HEADER:
1194                node = common_head;
1195                break;
1196
1197        case FORMAT_FIELD_SEPERATOR:
1198                node = head;
1199                break;
1200
1201        case FORMAT_PRINTFMT:
1202                /* all done */
1203                return NULL;
1204        }
1205
1206        node = node->prev;
1207        if (node == common_head)
1208                return (void *)FORMAT_FIELD_SEPERATOR;
1209        else if (node == head)
1210                return (void *)FORMAT_PRINTFMT;
1211        else
1212                return node;
1213}
1214
1215static int f_show(struct seq_file *m, void *v)
1216{
1217        struct trace_event_call *call = event_file_data(m->private);
1218        struct ftrace_event_field *field;
1219        const char *array_descriptor;
1220
1221        switch ((unsigned long)v) {
1222        case FORMAT_HEADER:
1223                seq_printf(m, "name: %s\n", trace_event_name(call));
1224                seq_printf(m, "ID: %d\n", call->event.type);
1225                seq_puts(m, "format:\n");
1226                return 0;
1227
1228        case FORMAT_FIELD_SEPERATOR:
1229                seq_putc(m, '\n');
1230                return 0;
1231
1232        case FORMAT_PRINTFMT:
1233                seq_printf(m, "\nprint fmt: %s\n",
1234                           call->print_fmt);
1235                return 0;
1236        }
1237
1238        field = list_entry(v, struct ftrace_event_field, link);
1239        /*
1240         * Smartly shows the array type(except dynamic array).
1241         * Normal:
1242         *      field:TYPE VAR
1243         * If TYPE := TYPE[LEN], it is shown:
1244         *      field:TYPE VAR[LEN]
1245         */
1246        array_descriptor = strchr(field->type, '[');
1247
1248        if (str_has_prefix(field->type, "__data_loc"))
1249                array_descriptor = NULL;
1250
1251        if (!array_descriptor)
1252                seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1253                           field->type, field->name, field->offset,
1254                           field->size, !!field->is_signed);
1255        else
1256                seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1257                           (int)(array_descriptor - field->type),
1258                           field->type, field->name,
1259                           array_descriptor, field->offset,
1260                           field->size, !!field->is_signed);
1261
1262        return 0;
1263}
1264
1265static void *f_start(struct seq_file *m, loff_t *pos)
1266{
1267        void *p = (void *)FORMAT_HEADER;
1268        loff_t l = 0;
1269
1270        /* ->stop() is called even if ->start() fails */
1271        mutex_lock(&event_mutex);
1272        if (!event_file_data(m->private))
1273                return ERR_PTR(-ENODEV);
1274
1275        while (l < *pos && p)
1276                p = f_next(m, p, &l);
1277
1278        return p;
1279}
1280
1281static void f_stop(struct seq_file *m, void *p)
1282{
1283        mutex_unlock(&event_mutex);
1284}
1285
1286static const struct seq_operations trace_format_seq_ops = {
1287        .start          = f_start,
1288        .next           = f_next,
1289        .stop           = f_stop,
1290        .show           = f_show,
1291};
1292
1293static int trace_format_open(struct inode *inode, struct file *file)
1294{
1295        struct seq_file *m;
1296        int ret;
1297
1298        /* Do we want to hide event format files on tracefs lockdown? */
1299
1300        ret = seq_open(file, &trace_format_seq_ops);
1301        if (ret < 0)
1302                return ret;
1303
1304        m = file->private_data;
1305        m->private = file;
1306
1307        return 0;
1308}
1309
1310static ssize_t
1311event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1312{
1313        int id = (long)event_file_data(filp);
1314        char buf[32];
1315        int len;
1316
1317        if (unlikely(!id))
1318                return -ENODEV;
1319
1320        len = sprintf(buf, "%d\n", id);
1321
1322        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1323}
1324
1325static ssize_t
1326event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1327                  loff_t *ppos)
1328{
1329        struct trace_event_file *file;
1330        struct trace_seq *s;
1331        int r = -ENODEV;
1332
1333        if (*ppos)
1334                return 0;
1335
1336        s = kmalloc(sizeof(*s), GFP_KERNEL);
1337
1338        if (!s)
1339                return -ENOMEM;
1340
1341        trace_seq_init(s);
1342
1343        mutex_lock(&event_mutex);
1344        file = event_file_data(filp);
1345        if (file)
1346                print_event_filter(file, s);
1347        mutex_unlock(&event_mutex);
1348
1349        if (file)
1350                r = simple_read_from_buffer(ubuf, cnt, ppos,
1351                                            s->buffer, trace_seq_used(s));
1352
1353        kfree(s);
1354
1355        return r;
1356}
1357
1358static ssize_t
1359event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1360                   loff_t *ppos)
1361{
1362        struct trace_event_file *file;
1363        char *buf;
1364        int err = -ENODEV;
1365
1366        if (cnt >= PAGE_SIZE)
1367                return -EINVAL;
1368
1369        buf = memdup_user_nul(ubuf, cnt);
1370        if (IS_ERR(buf))
1371                return PTR_ERR(buf);
1372
1373        mutex_lock(&event_mutex);
1374        file = event_file_data(filp);
1375        if (file)
1376                err = apply_event_filter(file, buf);
1377        mutex_unlock(&event_mutex);
1378
1379        kfree(buf);
1380        if (err < 0)
1381                return err;
1382
1383        *ppos += cnt;
1384
1385        return cnt;
1386}
1387
1388static LIST_HEAD(event_subsystems);
1389
1390static int subsystem_open(struct inode *inode, struct file *filp)
1391{
1392        struct event_subsystem *system = NULL;
1393        struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1394        struct trace_array *tr;
1395        int ret;
1396
1397        if (tracing_is_disabled())
1398                return -ENODEV;
1399
1400        /* Make sure the system still exists */
1401        mutex_lock(&event_mutex);
1402        mutex_lock(&trace_types_lock);
1403        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1404                list_for_each_entry(dir, &tr->systems, list) {
1405                        if (dir == inode->i_private) {
1406                                /* Don't open systems with no events */
1407                                if (dir->nr_events) {
1408                                        __get_system_dir(dir);
1409                                        system = dir->subsystem;
1410                                }
1411                                goto exit_loop;
1412                        }
1413                }
1414        }
1415 exit_loop:
1416        mutex_unlock(&trace_types_lock);
1417        mutex_unlock(&event_mutex);
1418
1419        if (!system)
1420                return -ENODEV;
1421
1422        /* Some versions of gcc think dir can be uninitialized here */
1423        WARN_ON(!dir);
1424
1425        /* Still need to increment the ref count of the system */
1426        if (trace_array_get(tr) < 0) {
1427                put_system(dir);
1428                return -ENODEV;
1429        }
1430
1431        ret = tracing_open_generic(inode, filp);
1432        if (ret < 0) {
1433                trace_array_put(tr);
1434                put_system(dir);
1435        }
1436
1437        return ret;
1438}
1439
1440static int system_tr_open(struct inode *inode, struct file *filp)
1441{
1442        struct trace_subsystem_dir *dir;
1443        struct trace_array *tr = inode->i_private;
1444        int ret;
1445
1446        /* Make a temporary dir that has no system but points to tr */
1447        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1448        if (!dir)
1449                return -ENOMEM;
1450
1451        ret = tracing_open_generic_tr(inode, filp);
1452        if (ret < 0) {
1453                kfree(dir);
1454                return ret;
1455        }
1456        dir->tr = tr;
1457        filp->private_data = dir;
1458
1459        return 0;
1460}
1461
1462static int subsystem_release(struct inode *inode, struct file *file)
1463{
1464        struct trace_subsystem_dir *dir = file->private_data;
1465
1466        trace_array_put(dir->tr);
1467
1468        /*
1469         * If dir->subsystem is NULL, then this is a temporary
1470         * descriptor that was made for a trace_array to enable
1471         * all subsystems.
1472         */
1473        if (dir->subsystem)
1474                put_system(dir);
1475        else
1476                kfree(dir);
1477
1478        return 0;
1479}
1480
1481static ssize_t
1482subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1483                      loff_t *ppos)
1484{
1485        struct trace_subsystem_dir *dir = filp->private_data;
1486        struct event_subsystem *system = dir->subsystem;
1487        struct trace_seq *s;
1488        int r;
1489
1490        if (*ppos)
1491                return 0;
1492
1493        s = kmalloc(sizeof(*s), GFP_KERNEL);
1494        if (!s)
1495                return -ENOMEM;
1496
1497        trace_seq_init(s);
1498
1499        print_subsystem_event_filter(system, s);
1500        r = simple_read_from_buffer(ubuf, cnt, ppos,
1501                                    s->buffer, trace_seq_used(s));
1502
1503        kfree(s);
1504
1505        return r;
1506}
1507
1508static ssize_t
1509subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1510                       loff_t *ppos)
1511{
1512        struct trace_subsystem_dir *dir = filp->private_data;
1513        char *buf;
1514        int err;
1515
1516        if (cnt >= PAGE_SIZE)
1517                return -EINVAL;
1518
1519        buf = memdup_user_nul(ubuf, cnt);
1520        if (IS_ERR(buf))
1521                return PTR_ERR(buf);
1522
1523        err = apply_subsystem_event_filter(dir, buf);
1524        kfree(buf);
1525        if (err < 0)
1526                return err;
1527
1528        *ppos += cnt;
1529
1530        return cnt;
1531}
1532
1533static ssize_t
1534show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1535{
1536        int (*func)(struct trace_seq *s) = filp->private_data;
1537        struct trace_seq *s;
1538        int r;
1539
1540        if (*ppos)
1541                return 0;
1542
1543        s = kmalloc(sizeof(*s), GFP_KERNEL);
1544        if (!s)
1545                return -ENOMEM;
1546
1547        trace_seq_init(s);
1548
1549        func(s);
1550        r = simple_read_from_buffer(ubuf, cnt, ppos,
1551                                    s->buffer, trace_seq_used(s));
1552
1553        kfree(s);
1554
1555        return r;
1556}
1557
1558static void ignore_task_cpu(void *data)
1559{
1560        struct trace_array *tr = data;
1561        struct trace_pid_list *pid_list;
1562
1563        /*
1564         * This function is called by on_each_cpu() while the
1565         * event_mutex is held.
1566         */
1567        pid_list = rcu_dereference_protected(tr->filtered_pids,
1568                                             mutex_is_locked(&event_mutex));
1569
1570        this_cpu_write(tr->trace_buffer.data->ignore_pid,
1571                       trace_ignore_this_task(pid_list, current));
1572}
1573
1574static ssize_t
1575ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1576                       size_t cnt, loff_t *ppos)
1577{
1578        struct seq_file *m = filp->private_data;
1579        struct trace_array *tr = m->private;
1580        struct trace_pid_list *filtered_pids = NULL;
1581        struct trace_pid_list *pid_list;
1582        struct trace_event_file *file;
1583        ssize_t ret;
1584
1585        if (!cnt)
1586                return 0;
1587
1588        ret = tracing_update_buffers();
1589        if (ret < 0)
1590                return ret;
1591
1592        mutex_lock(&event_mutex);
1593
1594        filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1595                                             lockdep_is_held(&event_mutex));
1596
1597        ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
1598        if (ret < 0)
1599                goto out;
1600
1601        rcu_assign_pointer(tr->filtered_pids, pid_list);
1602
1603        list_for_each_entry(file, &tr->events, list) {
1604                set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1605        }
1606
1607        if (filtered_pids) {
1608                tracepoint_synchronize_unregister();
1609                trace_free_pid_list(filtered_pids);
1610        } else if (pid_list) {
1611                /*
1612                 * Register a probe that is called before all other probes
1613                 * to set ignore_pid if next or prev do not match.
1614                 * Register a probe this is called after all other probes
1615                 * to only keep ignore_pid set if next pid matches.
1616                 */
1617                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1618                                                 tr, INT_MAX);
1619                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1620                                                 tr, 0);
1621
1622                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1623                                                 tr, INT_MAX);
1624                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1625                                                 tr, 0);
1626
1627                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1628                                                     tr, INT_MAX);
1629                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1630                                                     tr, 0);
1631
1632                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1633                                                 tr, INT_MAX);
1634                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1635                                                 tr, 0);
1636        }
1637
1638        /*
1639         * Ignoring of pids is done at task switch. But we have to
1640         * check for those tasks that are currently running.
1641         * Always do this in case a pid was appended or removed.
1642         */
1643        on_each_cpu(ignore_task_cpu, tr, 1);
1644
1645 out:
1646        mutex_unlock(&event_mutex);
1647
1648        if (ret > 0)
1649                *ppos += ret;
1650
1651        return ret;
1652}
1653
1654static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1655static int ftrace_event_set_open(struct inode *inode, struct file *file);
1656static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1657static int ftrace_event_release(struct inode *inode, struct file *file);
1658
1659static const struct seq_operations show_event_seq_ops = {
1660        .start = t_start,
1661        .next = t_next,
1662        .show = t_show,
1663        .stop = t_stop,
1664};
1665
1666static const struct seq_operations show_set_event_seq_ops = {
1667        .start = s_start,
1668        .next = s_next,
1669        .show = t_show,
1670        .stop = t_stop,
1671};
1672
1673static const struct seq_operations show_set_pid_seq_ops = {
1674        .start = p_start,
1675        .next = p_next,
1676        .show = trace_pid_show,
1677        .stop = p_stop,
1678};
1679
1680static const struct file_operations ftrace_avail_fops = {
1681        .open = ftrace_event_avail_open,
1682        .read = seq_read,
1683        .llseek = seq_lseek,
1684        .release = seq_release,
1685};
1686
1687static const struct file_operations ftrace_set_event_fops = {
1688        .open = ftrace_event_set_open,
1689        .read = seq_read,
1690        .write = ftrace_event_write,
1691        .llseek = seq_lseek,
1692        .release = ftrace_event_release,
1693};
1694
1695static const struct file_operations ftrace_set_event_pid_fops = {
1696        .open = ftrace_event_set_pid_open,
1697        .read = seq_read,
1698        .write = ftrace_event_pid_write,
1699        .llseek = seq_lseek,
1700        .release = ftrace_event_release,
1701};
1702
1703static const struct file_operations ftrace_enable_fops = {
1704        .open = tracing_open_generic,
1705        .read = event_enable_read,
1706        .write = event_enable_write,
1707        .llseek = default_llseek,
1708};
1709
1710static const struct file_operations ftrace_event_format_fops = {
1711        .open = trace_format_open,
1712        .read = seq_read,
1713        .llseek = seq_lseek,
1714        .release = seq_release,
1715};
1716
1717static const struct file_operations ftrace_event_id_fops = {
1718        .read = event_id_read,
1719        .llseek = default_llseek,
1720};
1721
1722static const struct file_operations ftrace_event_filter_fops = {
1723        .open = tracing_open_generic,
1724        .read = event_filter_read,
1725        .write = event_filter_write,
1726        .llseek = default_llseek,
1727};
1728
1729static const struct file_operations ftrace_subsystem_filter_fops = {
1730        .open = subsystem_open,
1731        .read = subsystem_filter_read,
1732        .write = subsystem_filter_write,
1733        .llseek = default_llseek,
1734        .release = subsystem_release,
1735};
1736
1737static const struct file_operations ftrace_system_enable_fops = {
1738        .open = subsystem_open,
1739        .read = system_enable_read,
1740        .write = system_enable_write,
1741        .llseek = default_llseek,
1742        .release = subsystem_release,
1743};
1744
1745static const struct file_operations ftrace_tr_enable_fops = {
1746        .open = system_tr_open,
1747        .read = system_enable_read,
1748        .write = system_enable_write,
1749        .llseek = default_llseek,
1750        .release = subsystem_release,
1751};
1752
1753static const struct file_operations ftrace_show_header_fops = {
1754        .open = tracing_open_generic,
1755        .read = show_header,
1756        .llseek = default_llseek,
1757};
1758
1759static int
1760ftrace_event_open(struct inode *inode, struct file *file,
1761                  const struct seq_operations *seq_ops)
1762{
1763        struct seq_file *m;
1764        int ret;
1765
1766        ret = security_locked_down(LOCKDOWN_TRACEFS);
1767        if (ret)
1768                return ret;
1769
1770        ret = seq_open(file, seq_ops);
1771        if (ret < 0)
1772                return ret;
1773        m = file->private_data;
1774        /* copy tr over to seq ops */
1775        m->private = inode->i_private;
1776
1777        return ret;
1778}
1779
1780static int ftrace_event_release(struct inode *inode, struct file *file)
1781{
1782        struct trace_array *tr = inode->i_private;
1783
1784        trace_array_put(tr);
1785
1786        return seq_release(inode, file);
1787}
1788
1789static int
1790ftrace_event_avail_open(struct inode *inode, struct file *file)
1791{
1792        const struct seq_operations *seq_ops = &show_event_seq_ops;
1793
1794        /* Checks for tracefs lockdown */
1795        return ftrace_event_open(inode, file, seq_ops);
1796}
1797
1798static int
1799ftrace_event_set_open(struct inode *inode, struct file *file)
1800{
1801        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1802        struct trace_array *tr = inode->i_private;
1803        int ret;
1804
1805        ret = tracing_check_open_get_tr(tr);
1806        if (ret)
1807                return ret;
1808
1809        if ((file->f_mode & FMODE_WRITE) &&
1810            (file->f_flags & O_TRUNC))
1811                ftrace_clear_events(tr);
1812
1813        ret = ftrace_event_open(inode, file, seq_ops);
1814        if (ret < 0)
1815                trace_array_put(tr);
1816        return ret;
1817}
1818
1819static int
1820ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1821{
1822        const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1823        struct trace_array *tr = inode->i_private;
1824        int ret;
1825
1826        ret = tracing_check_open_get_tr(tr);
1827        if (ret)
1828                return ret;
1829
1830        if ((file->f_mode & FMODE_WRITE) &&
1831            (file->f_flags & O_TRUNC))
1832                ftrace_clear_event_pids(tr);
1833
1834        ret = ftrace_event_open(inode, file, seq_ops);
1835        if (ret < 0)
1836                trace_array_put(tr);
1837        return ret;
1838}
1839
1840static struct event_subsystem *
1841create_new_subsystem(const char *name)
1842{
1843        struct event_subsystem *system;
1844
1845        /* need to create new entry */
1846        system = kmalloc(sizeof(*system), GFP_KERNEL);
1847        if (!system)
1848                return NULL;
1849
1850        system->ref_count = 1;
1851
1852        /* Only allocate if dynamic (kprobes and modules) */
1853        system->name = kstrdup_const(name, GFP_KERNEL);
1854        if (!system->name)
1855                goto out_free;
1856
1857        system->filter = NULL;
1858
1859        system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
1860        if (!system->filter)
1861                goto out_free;
1862
1863        list_add(&system->list, &event_subsystems);
1864
1865        return system;
1866
1867 out_free:
1868        kfree_const(system->name);
1869        kfree(system);
1870        return NULL;
1871}
1872
1873static struct dentry *
1874event_subsystem_dir(struct trace_array *tr, const char *name,
1875                    struct trace_event_file *file, struct dentry *parent)
1876{
1877        struct trace_subsystem_dir *dir;
1878        struct event_subsystem *system;
1879        struct dentry *entry;
1880
1881        /* First see if we did not already create this dir */
1882        list_for_each_entry(dir, &tr->systems, list) {
1883                system = dir->subsystem;
1884                if (strcmp(system->name, name) == 0) {
1885                        dir->nr_events++;
1886                        file->system = dir;
1887                        return dir->entry;
1888                }
1889        }
1890
1891        /* Now see if the system itself exists. */
1892        list_for_each_entry(system, &event_subsystems, list) {
1893                if (strcmp(system->name, name) == 0)
1894                        break;
1895        }
1896        /* Reset system variable when not found */
1897        if (&system->list == &event_subsystems)
1898                system = NULL;
1899
1900        dir = kmalloc(sizeof(*dir), GFP_KERNEL);
1901        if (!dir)
1902                goto out_fail;
1903
1904        if (!system) {
1905                system = create_new_subsystem(name);
1906                if (!system)
1907                        goto out_free;
1908        } else
1909                __get_system(system);
1910
1911        dir->entry = tracefs_create_dir(name, parent);
1912        if (!dir->entry) {
1913                pr_warn("Failed to create system directory %s\n", name);
1914                __put_system(system);
1915                goto out_free;
1916        }
1917
1918        dir->tr = tr;
1919        dir->ref_count = 1;
1920        dir->nr_events = 1;
1921        dir->subsystem = system;
1922        file->system = dir;
1923
1924        entry = tracefs_create_file("filter", 0644, dir->entry, dir,
1925                                    &ftrace_subsystem_filter_fops);
1926        if (!entry) {
1927                kfree(system->filter);
1928                system->filter = NULL;
1929                pr_warn("Could not create tracefs '%s/filter' entry\n", name);
1930        }
1931
1932        trace_create_file("enable", 0644, dir->entry, dir,
1933                          &ftrace_system_enable_fops);
1934
1935        list_add(&dir->list, &tr->systems);
1936
1937        return dir->entry;
1938
1939 out_free:
1940        kfree(dir);
1941 out_fail:
1942        /* Only print this message if failed on memory allocation */
1943        if (!dir || !system)
1944                pr_warn("No memory to create event subsystem %s\n", name);
1945        return NULL;
1946}
1947
1948static int
1949event_create_dir(struct dentry *parent, struct trace_event_file *file)
1950{
1951        struct trace_event_call *call = file->event_call;
1952        struct trace_array *tr = file->tr;
1953        struct list_head *head;
1954        struct dentry *d_events;
1955        const char *name;
1956        int ret;
1957
1958        /*
1959         * If the trace point header did not define TRACE_SYSTEM
1960         * then the system would be called "TRACE_SYSTEM".
1961         */
1962        if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
1963                d_events = event_subsystem_dir(tr, call->class->system, file, parent);
1964                if (!d_events)
1965                        return -ENOMEM;
1966        } else
1967                d_events = parent;
1968
1969        name = trace_event_name(call);
1970        file->dir = tracefs_create_dir(name, d_events);
1971        if (!file->dir) {
1972                pr_warn("Could not create tracefs '%s' directory\n", name);
1973                return -1;
1974        }
1975
1976        if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1977                trace_create_file("enable", 0644, file->dir, file,
1978                                  &ftrace_enable_fops);
1979
1980#ifdef CONFIG_PERF_EVENTS
1981        if (call->event.type && call->class->reg)
1982                trace_create_file("id", 0444, file->dir,
1983                                  (void *)(long)call->event.type,
1984                                  &ftrace_event_id_fops);
1985#endif
1986
1987        /*
1988         * Other events may have the same class. Only update
1989         * the fields if they are not already defined.
1990         */
1991        head = trace_get_fields(call);
1992        if (list_empty(head)) {
1993                ret = call->class->define_fields(call);
1994                if (ret < 0) {
1995                        pr_warn("Could not initialize trace point events/%s\n",
1996                                name);
1997                        return -1;
1998                }
1999        }
2000
2001        /*
2002         * Only event directories that can be enabled should have
2003         * triggers or filters.
2004         */
2005        if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2006                trace_create_file("filter", 0644, file->dir, file,
2007                                  &ftrace_event_filter_fops);
2008
2009                trace_create_file("trigger", 0644, file->dir, file,
2010                                  &event_trigger_fops);
2011        }
2012
2013#ifdef CONFIG_HIST_TRIGGERS
2014        trace_create_file("hist", 0444, file->dir, file,
2015                          &event_hist_fops);
2016#endif
2017        trace_create_file("format", 0444, file->dir, call,
2018                          &ftrace_event_format_fops);
2019
2020        return 0;
2021}
2022
2023static void remove_event_from_tracers(struct trace_event_call *call)
2024{
2025        struct trace_event_file *file;
2026        struct trace_array *tr;
2027
2028        do_for_each_event_file_safe(tr, file) {
2029                if (file->event_call != call)
2030                        continue;
2031
2032                remove_event_file_dir(file);
2033                /*
2034                 * The do_for_each_event_file_safe() is
2035                 * a double loop. After finding the call for this
2036                 * trace_array, we use break to jump to the next
2037                 * trace_array.
2038                 */
2039                break;
2040        } while_for_each_event_file();
2041}
2042
2043static void event_remove(struct trace_event_call *call)
2044{
2045        struct trace_array *tr;
2046        struct trace_event_file *file;
2047
2048        do_for_each_event_file(tr, file) {
2049                if (file->event_call != call)
2050                        continue;
2051
2052                if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2053                        tr->clear_trace = true;
2054
2055                ftrace_event_enable_disable(file, 0);
2056                /*
2057                 * The do_for_each_event_file() is
2058                 * a double loop. After finding the call for this
2059                 * trace_array, we use break to jump to the next
2060                 * trace_array.
2061                 */
2062                break;
2063        } while_for_each_event_file();
2064
2065        if (call->event.funcs)
2066                __unregister_trace_event(&call->event);
2067        remove_event_from_tracers(call);
2068        list_del(&call->list);
2069}
2070
2071static int event_init(struct trace_event_call *call)
2072{
2073        int ret = 0;
2074        const char *name;
2075
2076        name = trace_event_name(call);
2077        if (WARN_ON(!name))
2078                return -EINVAL;
2079
2080        if (call->class->raw_init) {
2081                ret = call->class->raw_init(call);
2082                if (ret < 0 && ret != -ENOSYS)
2083                        pr_warn("Could not initialize trace events/%s\n", name);
2084        }
2085
2086        return ret;
2087}
2088
2089static int
2090__register_event(struct trace_event_call *call, struct module *mod)
2091{
2092        int ret;
2093
2094        ret = event_init(call);
2095        if (ret < 0)
2096                return ret;
2097
2098        list_add(&call->list, &ftrace_events);
2099        call->mod = mod;
2100
2101        return 0;
2102}
2103
2104static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2105{
2106        int rlen;
2107        int elen;
2108
2109        /* Find the length of the eval value as a string */
2110        elen = snprintf(ptr, 0, "%ld", map->eval_value);
2111        /* Make sure there's enough room to replace the string with the value */
2112        if (len < elen)
2113                return NULL;
2114
2115        snprintf(ptr, elen + 1, "%ld", map->eval_value);
2116
2117        /* Get the rest of the string of ptr */
2118        rlen = strlen(ptr + len);
2119        memmove(ptr + elen, ptr + len, rlen);
2120        /* Make sure we end the new string */
2121        ptr[elen + rlen] = 0;
2122
2123        return ptr + elen;
2124}
2125
2126static void update_event_printk(struct trace_event_call *call,
2127                                struct trace_eval_map *map)
2128{
2129        char *ptr;
2130        int quote = 0;
2131        int len = strlen(map->eval_string);
2132
2133        for (ptr = call->print_fmt; *ptr; ptr++) {
2134                if (*ptr == '\\') {
2135                        ptr++;
2136                        /* paranoid */
2137                        if (!*ptr)
2138                                break;
2139                        continue;
2140                }
2141                if (*ptr == '"') {
2142                        quote ^= 1;
2143                        continue;
2144                }
2145                if (quote)
2146                        continue;
2147                if (isdigit(*ptr)) {
2148                        /* skip numbers */
2149                        do {
2150                                ptr++;
2151                                /* Check for alpha chars like ULL */
2152                        } while (isalnum(*ptr));
2153                        if (!*ptr)
2154                                break;
2155                        /*
2156                         * A number must have some kind of delimiter after
2157                         * it, and we can ignore that too.
2158                         */
2159                        continue;
2160                }
2161                if (isalpha(*ptr) || *ptr == '_') {
2162                        if (strncmp(map->eval_string, ptr, len) == 0 &&
2163                            !isalnum(ptr[len]) && ptr[len] != '_') {
2164                                ptr = eval_replace(ptr, map, len);
2165                                /* enum/sizeof string smaller than value */
2166                                if (WARN_ON_ONCE(!ptr))
2167                                        return;
2168                                /*
2169                                 * No need to decrement here, as eval_replace()
2170                                 * returns the pointer to the character passed
2171                                 * the eval, and two evals can not be placed
2172                                 * back to back without something in between.
2173                                 * We can skip that something in between.
2174                                 */
2175                                continue;
2176                        }
2177                skip_more:
2178                        do {
2179                                ptr++;
2180                        } while (isalnum(*ptr) || *ptr == '_');
2181                        if (!*ptr)
2182                                break;
2183                        /*
2184                         * If what comes after this variable is a '.' or
2185                         * '->' then we can continue to ignore that string.
2186                         */
2187                        if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2188                                ptr += *ptr == '.' ? 1 : 2;
2189                                if (!*ptr)
2190                                        break;
2191                                goto skip_more;
2192                        }
2193                        /*
2194                         * Once again, we can skip the delimiter that came
2195                         * after the string.
2196                         */
2197                        continue;
2198                }
2199        }
2200}
2201
2202void trace_event_eval_update(struct trace_eval_map **map, int len)
2203{
2204        struct trace_event_call *call, *p;
2205        const char *last_system = NULL;
2206        bool first = false;
2207        int last_i;
2208        int i;
2209
2210        down_write(&trace_event_sem);
2211        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2212                /* events are usually grouped together with systems */
2213                if (!last_system || call->class->system != last_system) {
2214                        first = true;
2215                        last_i = 0;
2216                        last_system = call->class->system;
2217                }
2218
2219                /*
2220                 * Since calls are grouped by systems, the likelyhood that the
2221                 * next call in the iteration belongs to the same system as the
2222                 * previous call is high. As an optimization, we skip seaching
2223                 * for a map[] that matches the call's system if the last call
2224                 * was from the same system. That's what last_i is for. If the
2225                 * call has the same system as the previous call, then last_i
2226                 * will be the index of the first map[] that has a matching
2227                 * system.
2228                 */
2229                for (i = last_i; i < len; i++) {
2230                        if (call->class->system == map[i]->system) {
2231                                /* Save the first system if need be */
2232                                if (first) {
2233                                        last_i = i;
2234                                        first = false;
2235                                }
2236                                update_event_printk(call, map[i]);
2237                        }
2238                }
2239        }
2240        up_write(&trace_event_sem);
2241}
2242
2243static struct trace_event_file *
2244trace_create_new_event(struct trace_event_call *call,
2245                       struct trace_array *tr)
2246{
2247        struct trace_event_file *file;
2248
2249        file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2250        if (!file)
2251                return NULL;
2252
2253        file->event_call = call;
2254        file->tr = tr;
2255        atomic_set(&file->sm_ref, 0);
2256        atomic_set(&file->tm_ref, 0);
2257        INIT_LIST_HEAD(&file->triggers);
2258        list_add(&file->list, &tr->events);
2259
2260        return file;
2261}
2262
2263/* Add an event to a trace directory */
2264static int
2265__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2266{
2267        struct trace_event_file *file;
2268
2269        file = trace_create_new_event(call, tr);
2270        if (!file)
2271                return -ENOMEM;
2272
2273        return event_create_dir(tr->event_dir, file);
2274}
2275
2276/*
2277 * Just create a decriptor for early init. A descriptor is required
2278 * for enabling events at boot. We want to enable events before
2279 * the filesystem is initialized.
2280 */
2281static __init int
2282__trace_early_add_new_event(struct trace_event_call *call,
2283                            struct trace_array *tr)
2284{
2285        struct trace_event_file *file;
2286
2287        file = trace_create_new_event(call, tr);
2288        if (!file)
2289                return -ENOMEM;
2290
2291        return 0;
2292}
2293
2294struct ftrace_module_file_ops;
2295static void __add_event_to_tracers(struct trace_event_call *call);
2296
2297/* Add an additional event_call dynamically */
2298int trace_add_event_call(struct trace_event_call *call)
2299{
2300        int ret;
2301        lockdep_assert_held(&event_mutex);
2302
2303        mutex_lock(&trace_types_lock);
2304
2305        ret = __register_event(call, NULL);
2306        if (ret >= 0)
2307                __add_event_to_tracers(call);
2308
2309        mutex_unlock(&trace_types_lock);
2310        return ret;
2311}
2312
2313/*
2314 * Must be called under locking of trace_types_lock, event_mutex and
2315 * trace_event_sem.
2316 */
2317static void __trace_remove_event_call(struct trace_event_call *call)
2318{
2319        event_remove(call);
2320        trace_destroy_fields(call);
2321        free_event_filter(call->filter);
2322        call->filter = NULL;
2323}
2324
2325static int probe_remove_event_call(struct trace_event_call *call)
2326{
2327        struct trace_array *tr;
2328        struct trace_event_file *file;
2329
2330#ifdef CONFIG_PERF_EVENTS
2331        if (call->perf_refcount)
2332                return -EBUSY;
2333#endif
2334        do_for_each_event_file(tr, file) {
2335                if (file->event_call != call)
2336                        continue;
2337                /*
2338                 * We can't rely on ftrace_event_enable_disable(enable => 0)
2339                 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2340                 * TRACE_REG_UNREGISTER.
2341                 */
2342                if (file->flags & EVENT_FILE_FL_ENABLED)
2343                        return -EBUSY;
2344                /*
2345                 * The do_for_each_event_file_safe() is
2346                 * a double loop. After finding the call for this
2347                 * trace_array, we use break to jump to the next
2348                 * trace_array.
2349                 */
2350                break;
2351        } while_for_each_event_file();
2352
2353        __trace_remove_event_call(call);
2354
2355        return 0;
2356}
2357
2358/* Remove an event_call */
2359int trace_remove_event_call(struct trace_event_call *call)
2360{
2361        int ret;
2362
2363        lockdep_assert_held(&event_mutex);
2364
2365        mutex_lock(&trace_types_lock);
2366        down_write(&trace_event_sem);
2367        ret = probe_remove_event_call(call);
2368        up_write(&trace_event_sem);
2369        mutex_unlock(&trace_types_lock);
2370
2371        return ret;
2372}
2373
2374#define for_each_event(event, start, end)                       \
2375        for (event = start;                                     \
2376             (unsigned long)event < (unsigned long)end;         \
2377             event++)
2378
2379#ifdef CONFIG_MODULES
2380
2381static void trace_module_add_events(struct module *mod)
2382{
2383        struct trace_event_call **call, **start, **end;
2384
2385        if (!mod->num_trace_events)
2386                return;
2387
2388        /* Don't add infrastructure for mods without tracepoints */
2389        if (trace_module_has_bad_taint(mod)) {
2390                pr_err("%s: module has bad taint, not creating trace events\n",
2391                       mod->name);
2392                return;
2393        }
2394
2395        start = mod->trace_events;
2396        end = mod->trace_events + mod->num_trace_events;
2397
2398        for_each_event(call, start, end) {
2399                __register_event(*call, mod);
2400                __add_event_to_tracers(*call);
2401        }
2402}
2403
2404static void trace_module_remove_events(struct module *mod)
2405{
2406        struct trace_event_call *call, *p;
2407
2408        down_write(&trace_event_sem);
2409        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2410                if (call->mod == mod)
2411                        __trace_remove_event_call(call);
2412        }
2413        up_write(&trace_event_sem);
2414
2415        /*
2416         * It is safest to reset the ring buffer if the module being unloaded
2417         * registered any events that were used. The only worry is if
2418         * a new module gets loaded, and takes on the same id as the events
2419         * of this module. When printing out the buffer, traced events left
2420         * over from this module may be passed to the new module events and
2421         * unexpected results may occur.
2422         */
2423        tracing_reset_all_online_cpus();
2424}
2425
2426static int trace_module_notify(struct notifier_block *self,
2427                               unsigned long val, void *data)
2428{
2429        struct module *mod = data;
2430
2431        mutex_lock(&event_mutex);
2432        mutex_lock(&trace_types_lock);
2433        switch (val) {
2434        case MODULE_STATE_COMING:
2435                trace_module_add_events(mod);
2436                break;
2437        case MODULE_STATE_GOING:
2438                trace_module_remove_events(mod);
2439                break;
2440        }
2441        mutex_unlock(&trace_types_lock);
2442        mutex_unlock(&event_mutex);
2443
2444        return 0;
2445}
2446
2447static struct notifier_block trace_module_nb = {
2448        .notifier_call = trace_module_notify,
2449        .priority = 1, /* higher than trace.c module notify */
2450};
2451#endif /* CONFIG_MODULES */
2452
2453/* Create a new event directory structure for a trace directory. */
2454static void
2455__trace_add_event_dirs(struct trace_array *tr)
2456{
2457        struct trace_event_call *call;
2458        int ret;
2459
2460        list_for_each_entry(call, &ftrace_events, list) {
2461                ret = __trace_add_new_event(call, tr);
2462                if (ret < 0)
2463                        pr_warn("Could not create directory for event %s\n",
2464                                trace_event_name(call));
2465        }
2466}
2467
2468/* Returns any file that matches the system and event */
2469struct trace_event_file *
2470__find_event_file(struct trace_array *tr, const char *system, const char *event)
2471{
2472        struct trace_event_file *file;
2473        struct trace_event_call *call;
2474        const char *name;
2475
2476        list_for_each_entry(file, &tr->events, list) {
2477
2478                call = file->event_call;
2479                name = trace_event_name(call);
2480
2481                if (!name || !call->class)
2482                        continue;
2483
2484                if (strcmp(event, name) == 0 &&
2485                    strcmp(system, call->class->system) == 0)
2486                        return file;
2487        }
2488        return NULL;
2489}
2490
2491/* Returns valid trace event files that match system and event */
2492struct trace_event_file *
2493find_event_file(struct trace_array *tr, const char *system, const char *event)
2494{
2495        struct trace_event_file *file;
2496
2497        file = __find_event_file(tr, system, event);
2498        if (!file || !file->event_call->class->reg ||
2499            file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2500                return NULL;
2501
2502        return file;
2503}
2504
2505#ifdef CONFIG_DYNAMIC_FTRACE
2506
2507/* Avoid typos */
2508#define ENABLE_EVENT_STR        "enable_event"
2509#define DISABLE_EVENT_STR       "disable_event"
2510
2511struct event_probe_data {
2512        struct trace_event_file *file;
2513        unsigned long                   count;
2514        int                             ref;
2515        bool                            enable;
2516};
2517
2518static void update_event_probe(struct event_probe_data *data)
2519{
2520        if (data->enable)
2521                clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2522        else
2523                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2524}
2525
2526static void
2527event_enable_probe(unsigned long ip, unsigned long parent_ip,
2528                   struct trace_array *tr, struct ftrace_probe_ops *ops,
2529                   void *data)
2530{
2531        struct ftrace_func_mapper *mapper = data;
2532        struct event_probe_data *edata;
2533        void **pdata;
2534
2535        pdata = ftrace_func_mapper_find_ip(mapper, ip);
2536        if (!pdata || !*pdata)
2537                return;
2538
2539        edata = *pdata;
2540        update_event_probe(edata);
2541}
2542
2543static void
2544event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
2545                         struct trace_array *tr, struct ftrace_probe_ops *ops,
2546                         void *data)
2547{
2548        struct ftrace_func_mapper *mapper = data;
2549        struct event_probe_data *edata;
2550        void **pdata;
2551
2552        pdata = ftrace_func_mapper_find_ip(mapper, ip);
2553        if (!pdata || !*pdata)
2554                return;
2555
2556        edata = *pdata;
2557
2558        if (!edata->count)
2559                return;
2560
2561        /* Skip if the event is in a state we want to switch to */
2562        if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2563                return;
2564
2565        if (edata->count != -1)
2566                (edata->count)--;
2567
2568        update_event_probe(edata);
2569}
2570
2571static int
2572event_enable_print(struct seq_file *m, unsigned long ip,
2573                   struct ftrace_probe_ops *ops, void *data)
2574{
2575        struct ftrace_func_mapper *mapper = data;
2576        struct event_probe_data *edata;
2577        void **pdata;
2578
2579        pdata = ftrace_func_mapper_find_ip(mapper, ip);
2580
2581        if (WARN_ON_ONCE(!pdata || !*pdata))
2582                return 0;
2583
2584        edata = *pdata;
2585
2586        seq_printf(m, "%ps:", (void *)ip);
2587
2588        seq_printf(m, "%s:%s:%s",
2589                   edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2590                   edata->file->event_call->class->system,
2591                   trace_event_name(edata->file->event_call));
2592
2593        if (edata->count == -1)
2594                seq_puts(m, ":unlimited\n");
2595        else
2596                seq_printf(m, ":count=%ld\n", edata->count);
2597
2598        return 0;
2599}
2600
2601static int
2602event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
2603                  unsigned long ip, void *init_data, void **data)
2604{
2605        struct ftrace_func_mapper *mapper = *data;
2606        struct event_probe_data *edata = init_data;
2607        int ret;
2608
2609        if (!mapper) {
2610                mapper = allocate_ftrace_func_mapper();
2611                if (!mapper)
2612                        return -ENODEV;
2613                *data = mapper;
2614        }
2615
2616        ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
2617        if (ret < 0)
2618                return ret;
2619
2620        edata->ref++;
2621
2622        return 0;
2623}
2624
2625static int free_probe_data(void *data)
2626{
2627        struct event_probe_data *edata = data;
2628
2629        edata->ref--;
2630        if (!edata->ref) {
2631                /* Remove the SOFT_MODE flag */
2632                __ftrace_event_enable_disable(edata->file, 0, 1);
2633                module_put(edata->file->event_call->mod);
2634                kfree(edata);
2635        }
2636        return 0;
2637}
2638
2639static void
2640event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
2641                  unsigned long ip, void *data)
2642{
2643        struct ftrace_func_mapper *mapper = data;
2644        struct event_probe_data *edata;
2645
2646        if (!ip) {
2647                if (!mapper)
2648                        return;
2649                free_ftrace_func_mapper(mapper, free_probe_data);
2650                return;
2651        }
2652
2653        edata = ftrace_func_mapper_remove_ip(mapper, ip);
2654
2655        if (WARN_ON_ONCE(!edata))
2656                return;
2657
2658        if (WARN_ON_ONCE(edata->ref <= 0))
2659                return;
2660
2661        free_probe_data(edata);
2662}
2663
2664static struct ftrace_probe_ops event_enable_probe_ops = {
2665        .func                   = event_enable_probe,
2666        .print                  = event_enable_print,
2667        .init                   = event_enable_init,
2668        .free                   = event_enable_free,
2669};
2670
2671static struct ftrace_probe_ops event_enable_count_probe_ops = {
2672        .func                   = event_enable_count_probe,
2673        .print                  = event_enable_print,
2674        .init                   = event_enable_init,
2675        .free                   = event_enable_free,
2676};
2677
2678static struct ftrace_probe_ops event_disable_probe_ops = {
2679        .func                   = event_enable_probe,
2680        .print                  = event_enable_print,
2681        .init                   = event_enable_init,
2682        .free                   = event_enable_free,
2683};
2684
2685static struct ftrace_probe_ops event_disable_count_probe_ops = {
2686        .func                   = event_enable_count_probe,
2687        .print                  = event_enable_print,
2688        .init                   = event_enable_init,
2689        .free                   = event_enable_free,
2690};
2691
2692static int
2693event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
2694                  char *glob, char *cmd, char *param, int enabled)
2695{
2696        struct trace_event_file *file;
2697        struct ftrace_probe_ops *ops;
2698        struct event_probe_data *data;
2699        const char *system;
2700        const char *event;
2701        char *number;
2702        bool enable;
2703        int ret;
2704
2705        if (!tr)
2706                return -ENODEV;
2707
2708        /* hash funcs only work with set_ftrace_filter */
2709        if (!enabled || !param)
2710                return -EINVAL;
2711
2712        system = strsep(&param, ":");
2713        if (!param)
2714                return -EINVAL;
2715
2716        event = strsep(&param, ":");
2717
2718        mutex_lock(&event_mutex);
2719
2720        ret = -EINVAL;
2721        file = find_event_file(tr, system, event);
2722        if (!file)
2723                goto out;
2724
2725        enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2726
2727        if (enable)
2728                ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2729        else
2730                ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2731
2732        if (glob[0] == '!') {
2733                ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
2734                goto out;
2735        }
2736
2737        ret = -ENOMEM;
2738
2739        data = kzalloc(sizeof(*data), GFP_KERNEL);
2740        if (!data)
2741                goto out;
2742
2743        data->enable = enable;
2744        data->count = -1;
2745        data->file = file;
2746
2747        if (!param)
2748                goto out_reg;
2749
2750        number = strsep(&param, ":");
2751
2752        ret = -EINVAL;
2753        if (!strlen(number))
2754                goto out_free;
2755
2756        /*
2757         * We use the callback data field (which is a pointer)
2758         * as our counter.
2759         */
2760        ret = kstrtoul(number, 0, &data->count);
2761        if (ret)
2762                goto out_free;
2763
2764 out_reg:
2765        /* Don't let event modules unload while probe registered */
2766        ret = try_module_get(file->event_call->mod);
2767        if (!ret) {
2768                ret = -EBUSY;
2769                goto out_free;
2770        }
2771
2772        ret = __ftrace_event_enable_disable(file, 1, 1);
2773        if (ret < 0)
2774                goto out_put;
2775
2776        ret = register_ftrace_function_probe(glob, tr, ops, data);
2777        /*
2778         * The above returns on success the # of functions enabled,
2779         * but if it didn't find any functions it returns zero.
2780         * Consider no functions a failure too.
2781         */
2782        if (!ret) {
2783                ret = -ENOENT;
2784                goto out_disable;
2785        } else if (ret < 0)
2786                goto out_disable;
2787        /* Just return zero, not the number of enabled functions */
2788        ret = 0;
2789 out:
2790        mutex_unlock(&event_mutex);
2791        return ret;
2792
2793 out_disable:
2794        __ftrace_event_enable_disable(file, 0, 1);
2795 out_put:
2796        module_put(file->event_call->mod);
2797 out_free:
2798        kfree(data);
2799        goto out;
2800}
2801
2802static struct ftrace_func_command event_enable_cmd = {
2803        .name                   = ENABLE_EVENT_STR,
2804        .func                   = event_enable_func,
2805};
2806
2807static struct ftrace_func_command event_disable_cmd = {
2808        .name                   = DISABLE_EVENT_STR,
2809        .func                   = event_enable_func,
2810};
2811
2812static __init int register_event_cmds(void)
2813{
2814        int ret;
2815
2816        ret = register_ftrace_command(&event_enable_cmd);
2817        if (WARN_ON(ret < 0))
2818                return ret;
2819        ret = register_ftrace_command(&event_disable_cmd);
2820        if (WARN_ON(ret < 0))
2821                unregister_ftrace_command(&event_enable_cmd);
2822        return ret;
2823}
2824#else
2825static inline int register_event_cmds(void) { return 0; }
2826#endif /* CONFIG_DYNAMIC_FTRACE */
2827
2828/*
2829 * The top level array has already had its trace_event_file
2830 * descriptors created in order to allow for early events to
2831 * be recorded. This function is called after the tracefs has been
2832 * initialized, and we now have to create the files associated
2833 * to the events.
2834 */
2835static __init void
2836__trace_early_add_event_dirs(struct trace_array *tr)
2837{
2838        struct trace_event_file *file;
2839        int ret;
2840
2841
2842        list_for_each_entry(file, &tr->events, list) {
2843                ret = event_create_dir(tr->event_dir, file);
2844                if (ret < 0)
2845                        pr_warn("Could not create directory for event %s\n",
2846                                trace_event_name(file->event_call));
2847        }
2848}
2849
2850/*
2851 * For early boot up, the top trace array requires to have
2852 * a list of events that can be enabled. This must be done before
2853 * the filesystem is set up in order to allow events to be traced
2854 * early.
2855 */
2856static __init void
2857__trace_early_add_events(struct trace_array *tr)
2858{
2859        struct trace_event_call *call;
2860        int ret;
2861
2862        list_for_each_entry(call, &ftrace_events, list) {
2863                /* Early boot up should not have any modules loaded */
2864                if (WARN_ON_ONCE(call->mod))
2865                        continue;
2866
2867                ret = __trace_early_add_new_event(call, tr);
2868                if (ret < 0)
2869                        pr_warn("Could not create early event %s\n",
2870                                trace_event_name(call));
2871        }
2872}
2873
2874/* Remove the event directory structure for a trace directory. */
2875static void
2876__trace_remove_event_dirs(struct trace_array *tr)
2877{
2878        struct trace_event_file *file, *next;
2879
2880        list_for_each_entry_safe(file, next, &tr->events, list)
2881                remove_event_file_dir(file);
2882}
2883
2884static void __add_event_to_tracers(struct trace_event_call *call)
2885{
2886        struct trace_array *tr;
2887
2888        list_for_each_entry(tr, &ftrace_trace_arrays, list)
2889                __trace_add_new_event(call, tr);
2890}
2891
2892extern struct trace_event_call *__start_ftrace_events[];
2893extern struct trace_event_call *__stop_ftrace_events[];
2894
2895static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2896
2897static __init int setup_trace_event(char *str)
2898{
2899        strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2900        ring_buffer_expanded = true;
2901        tracing_selftest_disabled = true;
2902
2903        return 1;
2904}
2905__setup("trace_event=", setup_trace_event);
2906
2907/* Expects to have event_mutex held when called */
2908static int
2909create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2910{
2911        struct dentry *d_events;
2912        struct dentry *entry;
2913
2914        entry = tracefs_create_file("set_event", 0644, parent,
2915                                    tr, &ftrace_set_event_fops);
2916        if (!entry) {
2917                pr_warn("Could not create tracefs 'set_event' entry\n");
2918                return -ENOMEM;
2919        }
2920
2921        d_events = tracefs_create_dir("events", parent);
2922        if (!d_events) {
2923                pr_warn("Could not create tracefs 'events' directory\n");
2924                return -ENOMEM;
2925        }
2926
2927        entry = trace_create_file("enable", 0644, d_events,
2928                                  tr, &ftrace_tr_enable_fops);
2929        if (!entry) {
2930                pr_warn("Could not create tracefs 'enable' entry\n");
2931                return -ENOMEM;
2932        }
2933
2934        /* There are not as crucial, just warn if they are not created */
2935
2936        entry = tracefs_create_file("set_event_pid", 0644, parent,
2937                                    tr, &ftrace_set_event_pid_fops);
2938        if (!entry)
2939                pr_warn("Could not create tracefs 'set_event_pid' entry\n");
2940
2941        /* ring buffer internal formats */
2942        entry = trace_create_file("header_page", 0444, d_events,
2943                                  ring_buffer_print_page_header,
2944                                  &ftrace_show_header_fops);
2945        if (!entry)
2946                pr_warn("Could not create tracefs 'header_page' entry\n");
2947
2948        entry = trace_create_file("header_event", 0444, d_events,
2949                                  ring_buffer_print_entry_header,
2950                                  &ftrace_show_header_fops);
2951        if (!entry)
2952                pr_warn("Could not create tracefs 'header_event' entry\n");
2953
2954        tr->event_dir = d_events;
2955
2956        return 0;
2957}
2958
2959/**
2960 * event_trace_add_tracer - add a instance of a trace_array to events
2961 * @parent: The parent dentry to place the files/directories for events in
2962 * @tr: The trace array associated with these events
2963 *
2964 * When a new instance is created, it needs to set up its events
2965 * directory, as well as other files associated with events. It also
2966 * creates the event hierachry in the @parent/events directory.
2967 *
2968 * Returns 0 on success.
2969 *
2970 * Must be called with event_mutex held.
2971 */
2972int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
2973{
2974        int ret;
2975
2976        lockdep_assert_held(&event_mutex);
2977
2978        ret = create_event_toplevel_files(parent, tr);
2979        if (ret)
2980                goto out;
2981
2982        down_write(&trace_event_sem);
2983        __trace_add_event_dirs(tr);
2984        up_write(&trace_event_sem);
2985
2986 out:
2987        return ret;
2988}
2989
2990/*
2991 * The top trace array already had its file descriptors created.
2992 * Now the files themselves need to be created.
2993 */
2994static __init int
2995early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
2996{
2997        int ret;
2998
2999        mutex_lock(&event_mutex);
3000
3001        ret = create_event_toplevel_files(parent, tr);
3002        if (ret)
3003                goto out_unlock;
3004
3005        down_write(&trace_event_sem);
3006        __trace_early_add_event_dirs(tr);
3007        up_write(&trace_event_sem);
3008
3009 out_unlock:
3010        mutex_unlock(&event_mutex);
3011
3012        return ret;
3013}
3014
3015/* Must be called with event_mutex held */
3016int event_trace_del_tracer(struct trace_array *tr)
3017{
3018        lockdep_assert_held(&event_mutex);
3019
3020        /* Disable any event triggers and associated soft-disabled events */
3021        clear_event_triggers(tr);
3022
3023        /* Clear the pid list */
3024        __ftrace_clear_event_pids(tr);
3025
3026        /* Disable any running events */
3027        __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3028
3029        /* Make sure no more events are being executed */
3030        tracepoint_synchronize_unregister();
3031
3032        down_write(&trace_event_sem);
3033        __trace_remove_event_dirs(tr);
3034        tracefs_remove_recursive(tr->event_dir);
3035        up_write(&trace_event_sem);
3036
3037        tr->event_dir = NULL;
3038
3039        return 0;
3040}
3041
3042static __init int event_trace_memsetup(void)
3043{
3044        field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3045        file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3046        return 0;
3047}
3048
3049static __init void
3050early_enable_events(struct trace_array *tr, bool disable_first)
3051{
3052        char *buf = bootup_event_buf;
3053        char *token;
3054        int ret;
3055
3056        while (true) {
3057                token = strsep(&buf, ",");
3058
3059                if (!token)
3060                        break;
3061
3062                if (*token) {
3063                        /* Restarting syscalls requires that we stop them first */
3064                        if (disable_first)
3065                                ftrace_set_clr_event(tr, token, 0);
3066
3067                        ret = ftrace_set_clr_event(tr, token, 1);
3068                        if (ret)
3069                                pr_warn("Failed to enable trace event: %s\n", token);
3070                }
3071
3072                /* Put back the comma to allow this to be called again */
3073                if (buf)
3074                        *(buf - 1) = ',';
3075        }
3076}
3077
3078static __init int event_trace_enable(void)
3079{
3080        struct trace_array *tr = top_trace_array();
3081        struct trace_event_call **iter, *call;
3082        int ret;
3083
3084        if (!tr)
3085                return -ENODEV;
3086
3087        for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3088
3089                call = *iter;
3090                ret = event_init(call);
3091                if (!ret)
3092                        list_add(&call->list, &ftrace_events);
3093        }
3094
3095        /*
3096         * We need the top trace array to have a working set of trace
3097         * points at early init, before the debug files and directories
3098         * are created. Create the file entries now, and attach them
3099         * to the actual file dentries later.
3100         */
3101        __trace_early_add_events(tr);
3102
3103        early_enable_events(tr, false);
3104
3105        trace_printk_start_comm();
3106
3107        register_event_cmds();
3108
3109        register_trigger_cmds();
3110
3111        return 0;
3112}
3113
3114/*
3115 * event_trace_enable() is called from trace_event_init() first to
3116 * initialize events and perhaps start any events that are on the
3117 * command line. Unfortunately, there are some events that will not
3118 * start this early, like the system call tracepoints that need
3119 * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3120 * is called before pid 1 starts, and this flag is never set, making
3121 * the syscall tracepoint never get reached, but the event is enabled
3122 * regardless (and not doing anything).
3123 */
3124static __init int event_trace_enable_again(void)
3125{
3126        struct trace_array *tr;
3127
3128        tr = top_trace_array();
3129        if (!tr)
3130                return -ENODEV;
3131
3132        early_enable_events(tr, true);
3133
3134        return 0;
3135}
3136
3137early_initcall(event_trace_enable_again);
3138
3139__init int event_trace_init(void)
3140{
3141        struct trace_array *tr;
3142        struct dentry *d_tracer;
3143        struct dentry *entry;
3144        int ret;
3145
3146        tr = top_trace_array();
3147        if (!tr)
3148                return -ENODEV;
3149
3150        d_tracer = tracing_init_dentry();
3151        if (IS_ERR(d_tracer))
3152                return 0;
3153
3154        entry = tracefs_create_file("available_events", 0444, d_tracer,
3155                                    tr, &ftrace_avail_fops);
3156        if (!entry)
3157                pr_warn("Could not create tracefs 'available_events' entry\n");
3158
3159        if (trace_define_generic_fields())
3160                pr_warn("tracing: Failed to allocated generic fields");
3161
3162        if (trace_define_common_fields())
3163                pr_warn("tracing: Failed to allocate common fields");
3164
3165        ret = early_event_add_tracer(d_tracer, tr);
3166        if (ret)
3167                return ret;
3168
3169#ifdef CONFIG_MODULES
3170        ret = register_module_notifier(&trace_module_nb);
3171        if (ret)
3172                pr_warn("Failed to register trace events module notifier\n");
3173#endif
3174        return 0;
3175}
3176
3177void __init trace_event_init(void)
3178{
3179        event_trace_memsetup();
3180        init_ftrace_syscalls();
3181        event_trace_enable();
3182}
3183
3184#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
3185
3186static DEFINE_SPINLOCK(test_spinlock);
3187static DEFINE_SPINLOCK(test_spinlock_irq);
3188static DEFINE_MUTEX(test_mutex);
3189
3190static __init void test_work(struct work_struct *dummy)
3191{
3192        spin_lock(&test_spinlock);
3193        spin_lock_irq(&test_spinlock_irq);
3194        udelay(1);
3195        spin_unlock_irq(&test_spinlock_irq);
3196        spin_unlock(&test_spinlock);
3197
3198        mutex_lock(&test_mutex);
3199        msleep(1);
3200        mutex_unlock(&test_mutex);
3201}
3202
3203static __init int event_test_thread(void *unused)
3204{
3205        void *test_malloc;
3206
3207        test_malloc = kmalloc(1234, GFP_KERNEL);
3208        if (!test_malloc)
3209                pr_info("failed to kmalloc\n");
3210
3211        schedule_on_each_cpu(test_work);
3212
3213        kfree(test_malloc);
3214
3215        set_current_state(TASK_INTERRUPTIBLE);
3216        while (!kthread_should_stop()) {
3217                schedule();
3218                set_current_state(TASK_INTERRUPTIBLE);
3219        }
3220        __set_current_state(TASK_RUNNING);
3221
3222        return 0;
3223}
3224
3225/*
3226 * Do various things that may trigger events.
3227 */
3228static __init void event_test_stuff(void)
3229{
3230        struct task_struct *test_thread;
3231
3232        test_thread = kthread_run(event_test_thread, NULL, "test-events");
3233        msleep(1);
3234        kthread_stop(test_thread);
3235}
3236
3237/*
3238 * For every trace event defined, we will test each trace point separately,
3239 * and then by groups, and finally all trace points.
3240 */
3241static __init void event_trace_self_tests(void)
3242{
3243        struct trace_subsystem_dir *dir;
3244        struct trace_event_file *file;
3245        struct trace_event_call *call;
3246        struct event_subsystem *system;
3247        struct trace_array *tr;
3248        int ret;
3249
3250        tr = top_trace_array();
3251        if (!tr)
3252                return;
3253
3254        pr_info("Running tests on trace events:\n");
3255
3256        list_for_each_entry(file, &tr->events, list) {
3257
3258                call = file->event_call;
3259
3260                /* Only test those that have a probe */
3261                if (!call->class || !call->class->probe)
3262                        continue;
3263
3264/*
3265 * Testing syscall events here is pretty useless, but
3266 * we still do it if configured. But this is time consuming.
3267 * What we really need is a user thread to perform the
3268 * syscalls as we test.
3269 */
3270#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3271                if (call->class->system &&
3272                    strcmp(call->class->system, "syscalls") == 0)
3273                        continue;
3274#endif
3275
3276                pr_info("Testing event %s: ", trace_event_name(call));
3277
3278                /*
3279                 * If an event is already enabled, someone is using
3280                 * it and the self test should not be on.
3281                 */
3282                if (file->flags & EVENT_FILE_FL_ENABLED) {
3283                        pr_warn("Enabled event during self test!\n");
3284                        WARN_ON_ONCE(1);
3285                        continue;
3286                }
3287
3288                ftrace_event_enable_disable(file, 1);
3289                event_test_stuff();
3290                ftrace_event_enable_disable(file, 0);
3291
3292                pr_cont("OK\n");
3293        }
3294
3295        /* Now test at the sub system level */
3296
3297        pr_info("Running tests on trace event systems:\n");
3298
3299        list_for_each_entry(dir, &tr->systems, list) {
3300
3301                system = dir->subsystem;
3302
3303                /* the ftrace system is special, skip it */
3304                if (strcmp(system->name, "ftrace") == 0)
3305                        continue;
3306
3307                pr_info("Testing event system %s: ", system->name);
3308
3309                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3310                if (WARN_ON_ONCE(ret)) {
3311                        pr_warn("error enabling system %s\n",
3312                                system->name);
3313                        continue;
3314                }
3315
3316                event_test_stuff();
3317
3318                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3319                if (WARN_ON_ONCE(ret)) {
3320                        pr_warn("error disabling system %s\n",
3321                                system->name);
3322                        continue;
3323                }
3324
3325                pr_cont("OK\n");
3326        }
3327
3328        /* Test with all events enabled */
3329
3330        pr_info("Running tests on all trace events:\n");
3331        pr_info("Testing all events: ");
3332
3333        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3334        if (WARN_ON_ONCE(ret)) {
3335                pr_warn("error enabling all events\n");
3336                return;
3337        }
3338
3339        event_test_stuff();
3340
3341        /* reset sysname */
3342        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3343        if (WARN_ON_ONCE(ret)) {
3344                pr_warn("error disabling all events\n");
3345                return;
3346        }
3347
3348        pr_cont("OK\n");
3349}
3350
3351#ifdef CONFIG_FUNCTION_TRACER
3352
3353static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3354
3355static struct trace_event_file event_trace_file __initdata;
3356
3357static void __init
3358function_test_events_call(unsigned long ip, unsigned long parent_ip,
3359                          struct ftrace_ops *op, struct pt_regs *pt_regs)
3360{
3361        struct ring_buffer_event *event;
3362        struct ring_buffer *buffer;
3363        struct ftrace_entry *entry;
3364        unsigned long flags;
3365        long disabled;
3366        int cpu;
3367        int pc;
3368
3369        pc = preempt_count();
3370        preempt_disable_notrace();
3371        cpu = raw_smp_processor_id();
3372        disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3373
3374        if (disabled != 1)
3375                goto out;
3376
3377        local_save_flags(flags);
3378
3379        event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
3380                                                TRACE_FN, sizeof(*entry),
3381                                                flags, pc);
3382        if (!event)
3383                goto out;
3384        entry   = ring_buffer_event_data(event);
3385        entry->ip                       = ip;
3386        entry->parent_ip                = parent_ip;
3387
3388        event_trigger_unlock_commit(&event_trace_file, buffer, event,
3389                                    entry, flags, pc);
3390 out:
3391        atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3392        preempt_enable_notrace();
3393}
3394
3395static struct ftrace_ops trace_ops __initdata  =
3396{
3397        .func = function_test_events_call,
3398        .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3399};
3400
3401static __init void event_trace_self_test_with_function(void)
3402{
3403        int ret;
3404
3405        event_trace_file.tr = top_trace_array();
3406        if (WARN_ON(!event_trace_file.tr))
3407                return;
3408
3409        ret = register_ftrace_function(&trace_ops);
3410        if (WARN_ON(ret < 0)) {
3411                pr_info("Failed to enable function tracer for event tests\n");
3412                return;
3413        }
3414        pr_info("Running tests again, along with the function tracer\n");
3415        event_trace_self_tests();
3416        unregister_ftrace_function(&trace_ops);
3417}
3418#else
3419static __init void event_trace_self_test_with_function(void)
3420{
3421}
3422#endif
3423
3424static __init int event_trace_self_tests_init(void)
3425{
3426        if (!tracing_selftest_disabled) {
3427                event_trace_self_tests();
3428                event_trace_self_test_with_function();
3429        }
3430
3431        return 0;
3432}
3433
3434late_initcall(event_trace_self_tests_init);
3435
3436#endif
3437