linux/kernel/trace/trace_events.c
<<
>>
Prefs
   1/*
   2 * event tracer
   3 *
   4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   5 *
   6 *  - Added format output of fields of the trace point.
   7 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   8 *
   9 */
  10
  11#define pr_fmt(fmt) fmt
  12
  13#include <linux/workqueue.h>
  14#include <linux/spinlock.h>
  15#include <linux/kthread.h>
  16#include <linux/tracefs.h>
  17#include <linux/uaccess.h>
  18#include <linux/module.h>
  19#include <linux/ctype.h>
  20#include <linux/sort.h>
  21#include <linux/slab.h>
  22#include <linux/delay.h>
  23
  24#include <trace/events/sched.h>
  25
  26#include <asm/setup.h>
  27
  28#include "trace_output.h"
  29
  30#undef TRACE_SYSTEM
  31#define TRACE_SYSTEM "TRACE_SYSTEM"
  32
  33DEFINE_MUTEX(event_mutex);
  34
  35LIST_HEAD(ftrace_events);
  36static LIST_HEAD(ftrace_generic_fields);
  37static LIST_HEAD(ftrace_common_fields);
  38
  39#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
  40
  41static struct kmem_cache *field_cachep;
  42static struct kmem_cache *file_cachep;
  43
  44static inline int system_refcount(struct event_subsystem *system)
  45{
  46        return system->ref_count;
  47}
  48
  49static int system_refcount_inc(struct event_subsystem *system)
  50{
  51        return system->ref_count++;
  52}
  53
  54static int system_refcount_dec(struct event_subsystem *system)
  55{
  56        return --system->ref_count;
  57}
  58
  59/* Double loops, do not use break, only goto's work */
  60#define do_for_each_event_file(tr, file)                        \
  61        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  62                list_for_each_entry(file, &tr->events, list)
  63
  64#define do_for_each_event_file_safe(tr, file)                   \
  65        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  66                struct trace_event_file *___n;                          \
  67                list_for_each_entry_safe(file, ___n, &tr->events, list)
  68
  69#define while_for_each_event_file()             \
  70        }
  71
  72static struct list_head *
  73trace_get_fields(struct trace_event_call *event_call)
  74{
  75        if (!event_call->class->get_fields)
  76                return &event_call->class->fields;
  77        return event_call->class->get_fields(event_call);
  78}
  79
  80static struct ftrace_event_field *
  81__find_event_field(struct list_head *head, char *name)
  82{
  83        struct ftrace_event_field *field;
  84
  85        list_for_each_entry(field, head, link) {
  86                if (!strcmp(field->name, name))
  87                        return field;
  88        }
  89
  90        return NULL;
  91}
  92
  93struct ftrace_event_field *
  94trace_find_event_field(struct trace_event_call *call, char *name)
  95{
  96        struct ftrace_event_field *field;
  97        struct list_head *head;
  98
  99        head = trace_get_fields(call);
 100        field = __find_event_field(head, name);
 101        if (field)
 102                return field;
 103
 104        field = __find_event_field(&ftrace_generic_fields, name);
 105        if (field)
 106                return field;
 107
 108        return __find_event_field(&ftrace_common_fields, name);
 109}
 110
 111static int __trace_define_field(struct list_head *head, const char *type,
 112                                const char *name, int offset, int size,
 113                                int is_signed, int filter_type)
 114{
 115        struct ftrace_event_field *field;
 116
 117        field = kmem_cache_alloc(field_cachep, GFP_TRACE);
 118        if (!field)
 119                return -ENOMEM;
 120
 121        field->name = name;
 122        field->type = type;
 123
 124        if (filter_type == FILTER_OTHER)
 125                field->filter_type = filter_assign_type(type);
 126        else
 127                field->filter_type = filter_type;
 128
 129        field->offset = offset;
 130        field->size = size;
 131        field->is_signed = is_signed;
 132
 133        list_add(&field->link, head);
 134
 135        return 0;
 136}
 137
 138int trace_define_field(struct trace_event_call *call, const char *type,
 139                       const char *name, int offset, int size, int is_signed,
 140                       int filter_type)
 141{
 142        struct list_head *head;
 143
 144        if (WARN_ON(!call->class))
 145                return 0;
 146
 147        head = trace_get_fields(call);
 148        return __trace_define_field(head, type, name, offset, size,
 149                                    is_signed, filter_type);
 150}
 151EXPORT_SYMBOL_GPL(trace_define_field);
 152
 153#define __generic_field(type, item, filter_type)                        \
 154        ret = __trace_define_field(&ftrace_generic_fields, #type,       \
 155                                   #item, 0, 0, is_signed_type(type),   \
 156                                   filter_type);                        \
 157        if (ret)                                                        \
 158                return ret;
 159
 160#define __common_field(type, item)                                      \
 161        ret = __trace_define_field(&ftrace_common_fields, #type,        \
 162                                   "common_" #item,                     \
 163                                   offsetof(typeof(ent), item),         \
 164                                   sizeof(ent.item),                    \
 165                                   is_signed_type(type), FILTER_OTHER); \
 166        if (ret)                                                        \
 167                return ret;
 168
 169static int trace_define_generic_fields(void)
 170{
 171        int ret;
 172
 173        __generic_field(int, CPU, FILTER_CPU);
 174        __generic_field(int, cpu, FILTER_CPU);
 175        __generic_field(char *, COMM, FILTER_COMM);
 176        __generic_field(char *, comm, FILTER_COMM);
 177
 178        return ret;
 179}
 180
 181static int trace_define_common_fields(void)
 182{
 183        int ret;
 184        struct trace_entry ent;
 185
 186        __common_field(unsigned short, type);
 187        __common_field(unsigned char, flags);
 188        __common_field(unsigned char, preempt_count);
 189        __common_field(int, pid);
 190
 191        return ret;
 192}
 193
 194static void trace_destroy_fields(struct trace_event_call *call)
 195{
 196        struct ftrace_event_field *field, *next;
 197        struct list_head *head;
 198
 199        head = trace_get_fields(call);
 200        list_for_each_entry_safe(field, next, head, link) {
 201                list_del(&field->link);
 202                kmem_cache_free(field_cachep, field);
 203        }
 204}
 205
 206/*
 207 * run-time version of trace_event_get_offsets_<call>() that returns the last
 208 * accessible offset of trace fields excluding __dynamic_array bytes
 209 */
 210int trace_event_get_offsets(struct trace_event_call *call)
 211{
 212        struct ftrace_event_field *tail;
 213        struct list_head *head;
 214
 215        head = trace_get_fields(call);
 216        /*
 217         * head->next points to the last field with the largest offset,
 218         * since it was added last by trace_define_field()
 219         */
 220        tail = list_first_entry(head, struct ftrace_event_field, link);
 221        return tail->offset + tail->size;
 222}
 223
 224int trace_event_raw_init(struct trace_event_call *call)
 225{
 226        int id;
 227
 228        id = register_trace_event(&call->event);
 229        if (!id)
 230                return -ENODEV;
 231
 232        return 0;
 233}
 234EXPORT_SYMBOL_GPL(trace_event_raw_init);
 235
 236bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
 237{
 238        struct trace_array *tr = trace_file->tr;
 239        struct trace_array_cpu *data;
 240        struct trace_pid_list *pid_list;
 241
 242        pid_list = rcu_dereference_sched(tr->filtered_pids);
 243        if (!pid_list)
 244                return false;
 245
 246        data = this_cpu_ptr(tr->trace_buffer.data);
 247
 248        return data->ignore_pid;
 249}
 250EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
 251
 252void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 253                                 struct trace_event_file *trace_file,
 254                                 unsigned long len)
 255{
 256        struct trace_event_call *event_call = trace_file->event_call;
 257
 258        if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
 259            trace_event_ignore_this_pid(trace_file))
 260                return NULL;
 261
 262        local_save_flags(fbuffer->flags);
 263        fbuffer->pc = preempt_count();
 264        /*
 265         * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
 266         * preemption (adding one to the preempt_count). Since we are
 267         * interested in the preempt_count at the time the tracepoint was
 268         * hit, we need to subtract one to offset the increment.
 269         */
 270        if (IS_ENABLED(CONFIG_PREEMPT))
 271                fbuffer->pc--;
 272        fbuffer->trace_file = trace_file;
 273
 274        fbuffer->event =
 275                trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
 276                                                event_call->event.type, len,
 277                                                fbuffer->flags, fbuffer->pc);
 278        if (!fbuffer->event)
 279                return NULL;
 280
 281        fbuffer->entry = ring_buffer_event_data(fbuffer->event);
 282        return fbuffer->entry;
 283}
 284EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
 285
 286static DEFINE_SPINLOCK(tracepoint_iter_lock);
 287
 288static void output_printk(struct trace_event_buffer *fbuffer)
 289{
 290        struct trace_event_call *event_call;
 291        struct trace_event *event;
 292        unsigned long flags;
 293        struct trace_iterator *iter = tracepoint_print_iter;
 294
 295        if (!iter)
 296                return;
 297
 298        event_call = fbuffer->trace_file->event_call;
 299        if (!event_call || !event_call->event.funcs ||
 300            !event_call->event.funcs->trace)
 301                return;
 302
 303        event = &fbuffer->trace_file->event_call->event;
 304
 305        spin_lock_irqsave(&tracepoint_iter_lock, flags);
 306        trace_seq_init(&iter->seq);
 307        iter->ent = fbuffer->entry;
 308        event_call->event.funcs->trace(iter, 0, event);
 309        trace_seq_putc(&iter->seq, 0);
 310        printk("%s", iter->seq.buffer);
 311
 312        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
 313}
 314
 315void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
 316{
 317        if (tracepoint_printk)
 318                output_printk(fbuffer);
 319
 320        event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
 321                                    fbuffer->event, fbuffer->entry,
 322                                    fbuffer->flags, fbuffer->pc);
 323}
 324EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
 325
 326int trace_event_reg(struct trace_event_call *call,
 327                    enum trace_reg type, void *data)
 328{
 329        struct trace_event_file *file = data;
 330
 331        WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
 332        switch (type) {
 333        case TRACE_REG_REGISTER:
 334                return tracepoint_probe_register(call->tp,
 335                                                 call->class->probe,
 336                                                 file);
 337        case TRACE_REG_UNREGISTER:
 338                tracepoint_probe_unregister(call->tp,
 339                                            call->class->probe,
 340                                            file);
 341                return 0;
 342
 343#ifdef CONFIG_PERF_EVENTS
 344        case TRACE_REG_PERF_REGISTER:
 345                return tracepoint_probe_register(call->tp,
 346                                                 call->class->perf_probe,
 347                                                 call);
 348        case TRACE_REG_PERF_UNREGISTER:
 349                tracepoint_probe_unregister(call->tp,
 350                                            call->class->perf_probe,
 351                                            call);
 352                return 0;
 353        case TRACE_REG_PERF_OPEN:
 354        case TRACE_REG_PERF_CLOSE:
 355        case TRACE_REG_PERF_ADD:
 356        case TRACE_REG_PERF_DEL:
 357                return 0;
 358#endif
 359        }
 360        return 0;
 361}
 362EXPORT_SYMBOL_GPL(trace_event_reg);
 363
 364void trace_event_enable_cmd_record(bool enable)
 365{
 366        struct trace_event_file *file;
 367        struct trace_array *tr;
 368
 369        mutex_lock(&event_mutex);
 370        do_for_each_event_file(tr, file) {
 371
 372                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 373                        continue;
 374
 375                if (enable) {
 376                        tracing_start_cmdline_record();
 377                        set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 378                } else {
 379                        tracing_stop_cmdline_record();
 380                        clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 381                }
 382        } while_for_each_event_file();
 383        mutex_unlock(&event_mutex);
 384}
 385
 386static int __ftrace_event_enable_disable(struct trace_event_file *file,
 387                                         int enable, int soft_disable)
 388{
 389        struct trace_event_call *call = file->event_call;
 390        struct trace_array *tr = file->tr;
 391        unsigned long file_flags = file->flags;
 392        int ret = 0;
 393        int disable;
 394
 395        switch (enable) {
 396        case 0:
 397                /*
 398                 * When soft_disable is set and enable is cleared, the sm_ref
 399                 * reference counter is decremented. If it reaches 0, we want
 400                 * to clear the SOFT_DISABLED flag but leave the event in the
 401                 * state that it was. That is, if the event was enabled and
 402                 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
 403                 * is set we do not want the event to be enabled before we
 404                 * clear the bit.
 405                 *
 406                 * When soft_disable is not set but the SOFT_MODE flag is,
 407                 * we do nothing. Do not disable the tracepoint, otherwise
 408                 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
 409                 */
 410                if (soft_disable) {
 411                        if (atomic_dec_return(&file->sm_ref) > 0)
 412                                break;
 413                        disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
 414                        clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 415                } else
 416                        disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
 417
 418                if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
 419                        clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 420                        if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
 421                                tracing_stop_cmdline_record();
 422                                clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 423                        }
 424                        call->class->reg(call, TRACE_REG_UNREGISTER, file);
 425                }
 426                /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
 427                if (file->flags & EVENT_FILE_FL_SOFT_MODE)
 428                        set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 429                else
 430                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 431                break;
 432        case 1:
 433                /*
 434                 * When soft_disable is set and enable is set, we want to
 435                 * register the tracepoint for the event, but leave the event
 436                 * as is. That means, if the event was already enabled, we do
 437                 * nothing (but set SOFT_MODE). If the event is disabled, we
 438                 * set SOFT_DISABLED before enabling the event tracepoint, so
 439                 * it still seems to be disabled.
 440                 */
 441                if (!soft_disable)
 442                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 443                else {
 444                        if (atomic_inc_return(&file->sm_ref) > 1)
 445                                break;
 446                        set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 447                }
 448
 449                if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
 450
 451                        /* Keep the event disabled, when going to SOFT_MODE. */
 452                        if (soft_disable)
 453                                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 454
 455                        if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
 456                                tracing_start_cmdline_record();
 457                                set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 458                        }
 459                        ret = call->class->reg(call, TRACE_REG_REGISTER, file);
 460                        if (ret) {
 461                                tracing_stop_cmdline_record();
 462                                pr_info("event trace: Could not enable event "
 463                                        "%s\n", trace_event_name(call));
 464                                break;
 465                        }
 466                        set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 467
 468                        /* WAS_ENABLED gets set but never cleared. */
 469                        call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
 470                }
 471                break;
 472        }
 473
 474        /* Enable or disable use of trace_buffered_event */
 475        if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
 476            (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
 477                if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
 478                        trace_buffered_event_enable();
 479                else
 480                        trace_buffered_event_disable();
 481        }
 482
 483        return ret;
 484}
 485
 486int trace_event_enable_disable(struct trace_event_file *file,
 487                               int enable, int soft_disable)
 488{
 489        return __ftrace_event_enable_disable(file, enable, soft_disable);
 490}
 491
 492static int ftrace_event_enable_disable(struct trace_event_file *file,
 493                                       int enable)
 494{
 495        return __ftrace_event_enable_disable(file, enable, 0);
 496}
 497
 498static void ftrace_clear_events(struct trace_array *tr)
 499{
 500        struct trace_event_file *file;
 501
 502        mutex_lock(&event_mutex);
 503        list_for_each_entry(file, &tr->events, list) {
 504                ftrace_event_enable_disable(file, 0);
 505        }
 506        mutex_unlock(&event_mutex);
 507}
 508
 509static void
 510event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 511{
 512        struct trace_pid_list *pid_list;
 513        struct trace_array *tr = data;
 514
 515        pid_list = rcu_dereference_sched(tr->filtered_pids);
 516        trace_filter_add_remove_task(pid_list, NULL, task);
 517}
 518
 519static void
 520event_filter_pid_sched_process_fork(void *data,
 521                                    struct task_struct *self,
 522                                    struct task_struct *task)
 523{
 524        struct trace_pid_list *pid_list;
 525        struct trace_array *tr = data;
 526
 527        pid_list = rcu_dereference_sched(tr->filtered_pids);
 528        trace_filter_add_remove_task(pid_list, self, task);
 529}
 530
 531void trace_event_follow_fork(struct trace_array *tr, bool enable)
 532{
 533        if (enable) {
 534                register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
 535                                                       tr, INT_MIN);
 536                register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
 537                                                       tr, INT_MAX);
 538        } else {
 539                unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
 540                                                    tr);
 541                unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
 542                                                    tr);
 543        }
 544}
 545
 546static void
 547event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 548                    struct task_struct *prev, struct task_struct *next)
 549{
 550        struct trace_array *tr = data;
 551        struct trace_pid_list *pid_list;
 552
 553        pid_list = rcu_dereference_sched(tr->filtered_pids);
 554
 555        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 556                       trace_ignore_this_task(pid_list, prev) &&
 557                       trace_ignore_this_task(pid_list, next));
 558}
 559
 560static void
 561event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
 562                    struct task_struct *prev, struct task_struct *next)
 563{
 564        struct trace_array *tr = data;
 565        struct trace_pid_list *pid_list;
 566
 567        pid_list = rcu_dereference_sched(tr->filtered_pids);
 568
 569        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 570                       trace_ignore_this_task(pid_list, next));
 571}
 572
 573static void
 574event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 575{
 576        struct trace_array *tr = data;
 577        struct trace_pid_list *pid_list;
 578
 579        /* Nothing to do if we are already tracing */
 580        if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
 581                return;
 582
 583        pid_list = rcu_dereference_sched(tr->filtered_pids);
 584
 585        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 586                       trace_ignore_this_task(pid_list, task));
 587}
 588
 589static void
 590event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 591{
 592        struct trace_array *tr = data;
 593        struct trace_pid_list *pid_list;
 594
 595        /* Nothing to do if we are not tracing */
 596        if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
 597                return;
 598
 599        pid_list = rcu_dereference_sched(tr->filtered_pids);
 600
 601        /* Set tracing if current is enabled */
 602        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 603                       trace_ignore_this_task(pid_list, current));
 604}
 605
 606static void __ftrace_clear_event_pids(struct trace_array *tr)
 607{
 608        struct trace_pid_list *pid_list;
 609        struct trace_event_file *file;
 610        int cpu;
 611
 612        pid_list = rcu_dereference_protected(tr->filtered_pids,
 613                                             lockdep_is_held(&event_mutex));
 614        if (!pid_list)
 615                return;
 616
 617        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
 618        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
 619
 620        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 621        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 622
 623        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
 624        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
 625
 626        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
 627        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
 628
 629        list_for_each_entry(file, &tr->events, list) {
 630                clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 631        }
 632
 633        for_each_possible_cpu(cpu)
 634                per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
 635
 636        rcu_assign_pointer(tr->filtered_pids, NULL);
 637
 638        /* Wait till all users are no longer using pid filtering */
 639        synchronize_sched();
 640
 641        trace_free_pid_list(pid_list);
 642}
 643
 644static void ftrace_clear_event_pids(struct trace_array *tr)
 645{
 646        mutex_lock(&event_mutex);
 647        __ftrace_clear_event_pids(tr);
 648        mutex_unlock(&event_mutex);
 649}
 650
 651static void __put_system(struct event_subsystem *system)
 652{
 653        struct event_filter *filter = system->filter;
 654
 655        WARN_ON_ONCE(system_refcount(system) == 0);
 656        if (system_refcount_dec(system))
 657                return;
 658
 659        list_del(&system->list);
 660
 661        if (filter) {
 662                kfree(filter->filter_string);
 663                kfree(filter);
 664        }
 665        kfree_const(system->name);
 666        kfree(system);
 667}
 668
 669static void __get_system(struct event_subsystem *system)
 670{
 671        WARN_ON_ONCE(system_refcount(system) == 0);
 672        system_refcount_inc(system);
 673}
 674
 675static void __get_system_dir(struct trace_subsystem_dir *dir)
 676{
 677        WARN_ON_ONCE(dir->ref_count == 0);
 678        dir->ref_count++;
 679        __get_system(dir->subsystem);
 680}
 681
 682static void __put_system_dir(struct trace_subsystem_dir *dir)
 683{
 684        WARN_ON_ONCE(dir->ref_count == 0);
 685        /* If the subsystem is about to be freed, the dir must be too */
 686        WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
 687
 688        __put_system(dir->subsystem);
 689        if (!--dir->ref_count)
 690                kfree(dir);
 691}
 692
 693static void put_system(struct trace_subsystem_dir *dir)
 694{
 695        mutex_lock(&event_mutex);
 696        __put_system_dir(dir);
 697        mutex_unlock(&event_mutex);
 698}
 699
 700static void remove_subsystem(struct trace_subsystem_dir *dir)
 701{
 702        if (!dir)
 703                return;
 704
 705        if (!--dir->nr_events) {
 706                tracefs_remove_recursive(dir->entry);
 707                list_del(&dir->list);
 708                __put_system_dir(dir);
 709        }
 710}
 711
 712static void remove_event_file_dir(struct trace_event_file *file)
 713{
 714        struct dentry *dir = file->dir;
 715        struct dentry *child;
 716
 717        if (dir) {
 718                spin_lock(&dir->d_lock);        /* probably unneeded */
 719                list_for_each_entry(child, &dir->d_subdirs, d_child) {
 720                        if (d_really_is_positive(child))        /* probably unneeded */
 721                                d_inode(child)->i_private = NULL;
 722                }
 723                spin_unlock(&dir->d_lock);
 724
 725                tracefs_remove_recursive(dir);
 726        }
 727
 728        list_del(&file->list);
 729        remove_subsystem(file->system);
 730        free_event_filter(file->filter);
 731        kmem_cache_free(file_cachep, file);
 732}
 733
 734/*
 735 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 736 */
 737static int
 738__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
 739                              const char *sub, const char *event, int set)
 740{
 741        struct trace_event_file *file;
 742        struct trace_event_call *call;
 743        const char *name;
 744        int ret = -EINVAL;
 745
 746        list_for_each_entry(file, &tr->events, list) {
 747
 748                call = file->event_call;
 749                name = trace_event_name(call);
 750
 751                if (!name || !call->class || !call->class->reg)
 752                        continue;
 753
 754                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
 755                        continue;
 756
 757                if (match &&
 758                    strcmp(match, name) != 0 &&
 759                    strcmp(match, call->class->system) != 0)
 760                        continue;
 761
 762                if (sub && strcmp(sub, call->class->system) != 0)
 763                        continue;
 764
 765                if (event && strcmp(event, name) != 0)
 766                        continue;
 767
 768                ftrace_event_enable_disable(file, set);
 769
 770                ret = 0;
 771        }
 772
 773        return ret;
 774}
 775
 776static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 777                                  const char *sub, const char *event, int set)
 778{
 779        int ret;
 780
 781        mutex_lock(&event_mutex);
 782        ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
 783        mutex_unlock(&event_mutex);
 784
 785        return ret;
 786}
 787
 788static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 789{
 790        char *event = NULL, *sub = NULL, *match;
 791        int ret;
 792
 793        /*
 794         * The buf format can be <subsystem>:<event-name>
 795         *  *:<event-name> means any event by that name.
 796         *  :<event-name> is the same.
 797         *
 798         *  <subsystem>:* means all events in that subsystem
 799         *  <subsystem>: means the same.
 800         *
 801         *  <name> (no ':') means all events in a subsystem with
 802         *  the name <name> or any event that matches <name>
 803         */
 804
 805        match = strsep(&buf, ":");
 806        if (buf) {
 807                sub = match;
 808                event = buf;
 809                match = NULL;
 810
 811                if (!strlen(sub) || strcmp(sub, "*") == 0)
 812                        sub = NULL;
 813                if (!strlen(event) || strcmp(event, "*") == 0)
 814                        event = NULL;
 815        }
 816
 817        ret = __ftrace_set_clr_event(tr, match, sub, event, set);
 818
 819        /* Put back the colon to allow this to be called again */
 820        if (buf)
 821                *(buf - 1) = ':';
 822
 823        return ret;
 824}
 825
 826/**
 827 * trace_set_clr_event - enable or disable an event
 828 * @system: system name to match (NULL for any system)
 829 * @event: event name to match (NULL for all events, within system)
 830 * @set: 1 to enable, 0 to disable
 831 *
 832 * This is a way for other parts of the kernel to enable or disable
 833 * event recording.
 834 *
 835 * Returns 0 on success, -EINVAL if the parameters do not match any
 836 * registered events.
 837 */
 838int trace_set_clr_event(const char *system, const char *event, int set)
 839{
 840        struct trace_array *tr = top_trace_array();
 841
 842        if (!tr)
 843                return -ENODEV;
 844
 845        return __ftrace_set_clr_event(tr, NULL, system, event, set);
 846}
 847EXPORT_SYMBOL_GPL(trace_set_clr_event);
 848
 849/* 128 should be much more than enough */
 850#define EVENT_BUF_SIZE          127
 851
 852static ssize_t
 853ftrace_event_write(struct file *file, const char __user *ubuf,
 854                   size_t cnt, loff_t *ppos)
 855{
 856        struct trace_parser parser;
 857        struct seq_file *m = file->private_data;
 858        struct trace_array *tr = m->private;
 859        ssize_t read, ret;
 860
 861        if (!cnt)
 862                return 0;
 863
 864        ret = tracing_update_buffers();
 865        if (ret < 0)
 866                return ret;
 867
 868        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
 869                return -ENOMEM;
 870
 871        read = trace_get_user(&parser, ubuf, cnt, ppos);
 872
 873        if (read >= 0 && trace_parser_loaded((&parser))) {
 874                int set = 1;
 875
 876                if (*parser.buffer == '!')
 877                        set = 0;
 878
 879                parser.buffer[parser.idx] = 0;
 880
 881                ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
 882                if (ret)
 883                        goto out_put;
 884        }
 885
 886        ret = read;
 887
 888 out_put:
 889        trace_parser_put(&parser);
 890
 891        return ret;
 892}
 893
 894static void *
 895t_next(struct seq_file *m, void *v, loff_t *pos)
 896{
 897        struct trace_event_file *file = v;
 898        struct trace_event_call *call;
 899        struct trace_array *tr = m->private;
 900
 901        (*pos)++;
 902
 903        list_for_each_entry_continue(file, &tr->events, list) {
 904                call = file->event_call;
 905                /*
 906                 * The ftrace subsystem is for showing formats only.
 907                 * They can not be enabled or disabled via the event files.
 908                 */
 909                if (call->class && call->class->reg &&
 910                    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 911                        return file;
 912        }
 913
 914        return NULL;
 915}
 916
 917static void *t_start(struct seq_file *m, loff_t *pos)
 918{
 919        struct trace_event_file *file;
 920        struct trace_array *tr = m->private;
 921        loff_t l;
 922
 923        mutex_lock(&event_mutex);
 924
 925        file = list_entry(&tr->events, struct trace_event_file, list);
 926        for (l = 0; l <= *pos; ) {
 927                file = t_next(m, file, &l);
 928                if (!file)
 929                        break;
 930        }
 931        return file;
 932}
 933
 934static void *
 935s_next(struct seq_file *m, void *v, loff_t *pos)
 936{
 937        struct trace_event_file *file = v;
 938        struct trace_array *tr = m->private;
 939
 940        (*pos)++;
 941
 942        list_for_each_entry_continue(file, &tr->events, list) {
 943                if (file->flags & EVENT_FILE_FL_ENABLED)
 944                        return file;
 945        }
 946
 947        return NULL;
 948}
 949
 950static void *s_start(struct seq_file *m, loff_t *pos)
 951{
 952        struct trace_event_file *file;
 953        struct trace_array *tr = m->private;
 954        loff_t l;
 955
 956        mutex_lock(&event_mutex);
 957
 958        file = list_entry(&tr->events, struct trace_event_file, list);
 959        for (l = 0; l <= *pos; ) {
 960                file = s_next(m, file, &l);
 961                if (!file)
 962                        break;
 963        }
 964        return file;
 965}
 966
 967static int t_show(struct seq_file *m, void *v)
 968{
 969        struct trace_event_file *file = v;
 970        struct trace_event_call *call = file->event_call;
 971
 972        if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
 973                seq_printf(m, "%s:", call->class->system);
 974        seq_printf(m, "%s\n", trace_event_name(call));
 975
 976        return 0;
 977}
 978
 979static void t_stop(struct seq_file *m, void *p)
 980{
 981        mutex_unlock(&event_mutex);
 982}
 983
 984static void *
 985p_next(struct seq_file *m, void *v, loff_t *pos)
 986{
 987        struct trace_array *tr = m->private;
 988        struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
 989
 990        return trace_pid_next(pid_list, v, pos);
 991}
 992
 993static void *p_start(struct seq_file *m, loff_t *pos)
 994        __acquires(RCU)
 995{
 996        struct trace_pid_list *pid_list;
 997        struct trace_array *tr = m->private;
 998
 999        /*
1000         * Grab the mutex, to keep calls to p_next() having the same
1001         * tr->filtered_pids as p_start() has.
1002         * If we just passed the tr->filtered_pids around, then RCU would
1003         * have been enough, but doing that makes things more complex.
1004         */
1005        mutex_lock(&event_mutex);
1006        rcu_read_lock_sched();
1007
1008        pid_list = rcu_dereference_sched(tr->filtered_pids);
1009
1010        if (!pid_list)
1011                return NULL;
1012
1013        return trace_pid_start(pid_list, pos);
1014}
1015
1016static void p_stop(struct seq_file *m, void *p)
1017        __releases(RCU)
1018{
1019        rcu_read_unlock_sched();
1020        mutex_unlock(&event_mutex);
1021}
1022
1023static ssize_t
1024event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1025                  loff_t *ppos)
1026{
1027        struct trace_event_file *file;
1028        unsigned long flags;
1029        char buf[4] = "0";
1030
1031        mutex_lock(&event_mutex);
1032        file = event_file_data(filp);
1033        if (likely(file))
1034                flags = file->flags;
1035        mutex_unlock(&event_mutex);
1036
1037        if (!file)
1038                return -ENODEV;
1039
1040        if (flags & EVENT_FILE_FL_ENABLED &&
1041            !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1042                strcpy(buf, "1");
1043
1044        if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1045            flags & EVENT_FILE_FL_SOFT_MODE)
1046                strcat(buf, "*");
1047
1048        strcat(buf, "\n");
1049
1050        return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1051}
1052
1053static ssize_t
1054event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1055                   loff_t *ppos)
1056{
1057        struct trace_event_file *file;
1058        unsigned long val;
1059        int ret;
1060
1061        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1062        if (ret)
1063                return ret;
1064
1065        ret = tracing_update_buffers();
1066        if (ret < 0)
1067                return ret;
1068
1069        switch (val) {
1070        case 0:
1071        case 1:
1072                ret = -ENODEV;
1073                mutex_lock(&event_mutex);
1074                file = event_file_data(filp);
1075                if (likely(file))
1076                        ret = ftrace_event_enable_disable(file, val);
1077                mutex_unlock(&event_mutex);
1078                break;
1079
1080        default:
1081                return -EINVAL;
1082        }
1083
1084        *ppos += cnt;
1085
1086        return ret ? ret : cnt;
1087}
1088
1089static ssize_t
1090system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1091                   loff_t *ppos)
1092{
1093        const char set_to_char[4] = { '?', '0', '1', 'X' };
1094        struct trace_subsystem_dir *dir = filp->private_data;
1095        struct event_subsystem *system = dir->subsystem;
1096        struct trace_event_call *call;
1097        struct trace_event_file *file;
1098        struct trace_array *tr = dir->tr;
1099        char buf[2];
1100        int set = 0;
1101        int ret;
1102
1103        mutex_lock(&event_mutex);
1104        list_for_each_entry(file, &tr->events, list) {
1105                call = file->event_call;
1106                if (!trace_event_name(call) || !call->class || !call->class->reg)
1107                        continue;
1108
1109                if (system && strcmp(call->class->system, system->name) != 0)
1110                        continue;
1111
1112                /*
1113                 * We need to find out if all the events are set
1114                 * or if all events or cleared, or if we have
1115                 * a mixture.
1116                 */
1117                set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1118
1119                /*
1120                 * If we have a mixture, no need to look further.
1121                 */
1122                if (set == 3)
1123                        break;
1124        }
1125        mutex_unlock(&event_mutex);
1126
1127        buf[0] = set_to_char[set];
1128        buf[1] = '\n';
1129
1130        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1131
1132        return ret;
1133}
1134
1135static ssize_t
1136system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1137                    loff_t *ppos)
1138{
1139        struct trace_subsystem_dir *dir = filp->private_data;
1140        struct event_subsystem *system = dir->subsystem;
1141        const char *name = NULL;
1142        unsigned long val;
1143        ssize_t ret;
1144
1145        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1146        if (ret)
1147                return ret;
1148
1149        ret = tracing_update_buffers();
1150        if (ret < 0)
1151                return ret;
1152
1153        if (val != 0 && val != 1)
1154                return -EINVAL;
1155
1156        /*
1157         * Opening of "enable" adds a ref count to system,
1158         * so the name is safe to use.
1159         */
1160        if (system)
1161                name = system->name;
1162
1163        ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1164        if (ret)
1165                goto out;
1166
1167        ret = cnt;
1168
1169out:
1170        *ppos += cnt;
1171
1172        return ret;
1173}
1174
1175enum {
1176        FORMAT_HEADER           = 1,
1177        FORMAT_FIELD_SEPERATOR  = 2,
1178        FORMAT_PRINTFMT         = 3,
1179};
1180
1181static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1182{
1183        struct trace_event_call *call = event_file_data(m->private);
1184        struct list_head *common_head = &ftrace_common_fields;
1185        struct list_head *head = trace_get_fields(call);
1186        struct list_head *node = v;
1187
1188        (*pos)++;
1189
1190        switch ((unsigned long)v) {
1191        case FORMAT_HEADER:
1192                node = common_head;
1193                break;
1194
1195        case FORMAT_FIELD_SEPERATOR:
1196                node = head;
1197                break;
1198
1199        case FORMAT_PRINTFMT:
1200                /* all done */
1201                return NULL;
1202        }
1203
1204        node = node->prev;
1205        if (node == common_head)
1206                return (void *)FORMAT_FIELD_SEPERATOR;
1207        else if (node == head)
1208                return (void *)FORMAT_PRINTFMT;
1209        else
1210                return node;
1211}
1212
1213static int f_show(struct seq_file *m, void *v)
1214{
1215        struct trace_event_call *call = event_file_data(m->private);
1216        struct ftrace_event_field *field;
1217        const char *array_descriptor;
1218
1219        switch ((unsigned long)v) {
1220        case FORMAT_HEADER:
1221                seq_printf(m, "name: %s\n", trace_event_name(call));
1222                seq_printf(m, "ID: %d\n", call->event.type);
1223                seq_puts(m, "format:\n");
1224                return 0;
1225
1226        case FORMAT_FIELD_SEPERATOR:
1227                seq_putc(m, '\n');
1228                return 0;
1229
1230        case FORMAT_PRINTFMT:
1231                seq_printf(m, "\nprint fmt: %s\n",
1232                           call->print_fmt);
1233                return 0;
1234        }
1235
1236        field = list_entry(v, struct ftrace_event_field, link);
1237        /*
1238         * Smartly shows the array type(except dynamic array).
1239         * Normal:
1240         *      field:TYPE VAR
1241         * If TYPE := TYPE[LEN], it is shown:
1242         *      field:TYPE VAR[LEN]
1243         */
1244        array_descriptor = strchr(field->type, '[');
1245
1246        if (!strncmp(field->type, "__data_loc", 10))
1247                array_descriptor = NULL;
1248
1249        if (!array_descriptor)
1250                seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1251                           field->type, field->name, field->offset,
1252                           field->size, !!field->is_signed);
1253        else
1254                seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1255                           (int)(array_descriptor - field->type),
1256                           field->type, field->name,
1257                           array_descriptor, field->offset,
1258                           field->size, !!field->is_signed);
1259
1260        return 0;
1261}
1262
1263static void *f_start(struct seq_file *m, loff_t *pos)
1264{
1265        void *p = (void *)FORMAT_HEADER;
1266        loff_t l = 0;
1267
1268        /* ->stop() is called even if ->start() fails */
1269        mutex_lock(&event_mutex);
1270        if (!event_file_data(m->private))
1271                return ERR_PTR(-ENODEV);
1272
1273        while (l < *pos && p)
1274                p = f_next(m, p, &l);
1275
1276        return p;
1277}
1278
1279static void f_stop(struct seq_file *m, void *p)
1280{
1281        mutex_unlock(&event_mutex);
1282}
1283
1284static const struct seq_operations trace_format_seq_ops = {
1285        .start          = f_start,
1286        .next           = f_next,
1287        .stop           = f_stop,
1288        .show           = f_show,
1289};
1290
1291static int trace_format_open(struct inode *inode, struct file *file)
1292{
1293        struct seq_file *m;
1294        int ret;
1295
1296        ret = seq_open(file, &trace_format_seq_ops);
1297        if (ret < 0)
1298                return ret;
1299
1300        m = file->private_data;
1301        m->private = file;
1302
1303        return 0;
1304}
1305
1306static ssize_t
1307event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1308{
1309        int id = (long)event_file_data(filp);
1310        char buf[32];
1311        int len;
1312
1313        if (*ppos)
1314                return 0;
1315
1316        if (unlikely(!id))
1317                return -ENODEV;
1318
1319        len = sprintf(buf, "%d\n", id);
1320
1321        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1322}
1323
1324static ssize_t
1325event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1326                  loff_t *ppos)
1327{
1328        struct trace_event_file *file;
1329        struct trace_seq *s;
1330        int r = -ENODEV;
1331
1332        if (*ppos)
1333                return 0;
1334
1335        s = kmalloc(sizeof(*s), GFP_KERNEL);
1336
1337        if (!s)
1338                return -ENOMEM;
1339
1340        trace_seq_init(s);
1341
1342        mutex_lock(&event_mutex);
1343        file = event_file_data(filp);
1344        if (file)
1345                print_event_filter(file, s);
1346        mutex_unlock(&event_mutex);
1347
1348        if (file)
1349                r = simple_read_from_buffer(ubuf, cnt, ppos,
1350                                            s->buffer, trace_seq_used(s));
1351
1352        kfree(s);
1353
1354        return r;
1355}
1356
1357static ssize_t
1358event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1359                   loff_t *ppos)
1360{
1361        struct trace_event_file *file;
1362        char *buf;
1363        int err = -ENODEV;
1364
1365        if (cnt >= PAGE_SIZE)
1366                return -EINVAL;
1367
1368        buf = memdup_user_nul(ubuf, cnt);
1369        if (IS_ERR(buf))
1370                return PTR_ERR(buf);
1371
1372        mutex_lock(&event_mutex);
1373        file = event_file_data(filp);
1374        if (file)
1375                err = apply_event_filter(file, buf);
1376        mutex_unlock(&event_mutex);
1377
1378        kfree(buf);
1379        if (err < 0)
1380                return err;
1381
1382        *ppos += cnt;
1383
1384        return cnt;
1385}
1386
1387static LIST_HEAD(event_subsystems);
1388
1389static int subsystem_open(struct inode *inode, struct file *filp)
1390{
1391        struct event_subsystem *system = NULL;
1392        struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1393        struct trace_array *tr;
1394        int ret;
1395
1396        if (tracing_is_disabled())
1397                return -ENODEV;
1398
1399        /* Make sure the system still exists */
1400        mutex_lock(&trace_types_lock);
1401        mutex_lock(&event_mutex);
1402        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1403                list_for_each_entry(dir, &tr->systems, list) {
1404                        if (dir == inode->i_private) {
1405                                /* Don't open systems with no events */
1406                                if (dir->nr_events) {
1407                                        __get_system_dir(dir);
1408                                        system = dir->subsystem;
1409                                }
1410                                goto exit_loop;
1411                        }
1412                }
1413        }
1414 exit_loop:
1415        mutex_unlock(&event_mutex);
1416        mutex_unlock(&trace_types_lock);
1417
1418        if (!system)
1419                return -ENODEV;
1420
1421        /* Some versions of gcc think dir can be uninitialized here */
1422        WARN_ON(!dir);
1423
1424        /* Still need to increment the ref count of the system */
1425        if (trace_array_get(tr) < 0) {
1426                put_system(dir);
1427                return -ENODEV;
1428        }
1429
1430        ret = tracing_open_generic(inode, filp);
1431        if (ret < 0) {
1432                trace_array_put(tr);
1433                put_system(dir);
1434        }
1435
1436        return ret;
1437}
1438
1439static int system_tr_open(struct inode *inode, struct file *filp)
1440{
1441        struct trace_subsystem_dir *dir;
1442        struct trace_array *tr = inode->i_private;
1443        int ret;
1444
1445        if (tracing_is_disabled())
1446                return -ENODEV;
1447
1448        if (trace_array_get(tr) < 0)
1449                return -ENODEV;
1450
1451        /* Make a temporary dir that has no system but points to tr */
1452        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1453        if (!dir) {
1454                trace_array_put(tr);
1455                return -ENOMEM;
1456        }
1457
1458        dir->tr = tr;
1459
1460        ret = tracing_open_generic(inode, filp);
1461        if (ret < 0) {
1462                trace_array_put(tr);
1463                kfree(dir);
1464                return ret;
1465        }
1466
1467        filp->private_data = dir;
1468
1469        return 0;
1470}
1471
1472static int subsystem_release(struct inode *inode, struct file *file)
1473{
1474        struct trace_subsystem_dir *dir = file->private_data;
1475
1476        trace_array_put(dir->tr);
1477
1478        /*
1479         * If dir->subsystem is NULL, then this is a temporary
1480         * descriptor that was made for a trace_array to enable
1481         * all subsystems.
1482         */
1483        if (dir->subsystem)
1484                put_system(dir);
1485        else
1486                kfree(dir);
1487
1488        return 0;
1489}
1490
1491static ssize_t
1492subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1493                      loff_t *ppos)
1494{
1495        struct trace_subsystem_dir *dir = filp->private_data;
1496        struct event_subsystem *system = dir->subsystem;
1497        struct trace_seq *s;
1498        int r;
1499
1500        if (*ppos)
1501                return 0;
1502
1503        s = kmalloc(sizeof(*s), GFP_KERNEL);
1504        if (!s)
1505                return -ENOMEM;
1506
1507        trace_seq_init(s);
1508
1509        print_subsystem_event_filter(system, s);
1510        r = simple_read_from_buffer(ubuf, cnt, ppos,
1511                                    s->buffer, trace_seq_used(s));
1512
1513        kfree(s);
1514
1515        return r;
1516}
1517
1518static ssize_t
1519subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1520                       loff_t *ppos)
1521{
1522        struct trace_subsystem_dir *dir = filp->private_data;
1523        char *buf;
1524        int err;
1525
1526        if (cnt >= PAGE_SIZE)
1527                return -EINVAL;
1528
1529        buf = memdup_user_nul(ubuf, cnt);
1530        if (IS_ERR(buf))
1531                return PTR_ERR(buf);
1532
1533        err = apply_subsystem_event_filter(dir, buf);
1534        kfree(buf);
1535        if (err < 0)
1536                return err;
1537
1538        *ppos += cnt;
1539
1540        return cnt;
1541}
1542
1543static ssize_t
1544show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1545{
1546        int (*func)(struct trace_seq *s) = filp->private_data;
1547        struct trace_seq *s;
1548        int r;
1549
1550        if (*ppos)
1551                return 0;
1552
1553        s = kmalloc(sizeof(*s), GFP_KERNEL);
1554        if (!s)
1555                return -ENOMEM;
1556
1557        trace_seq_init(s);
1558
1559        func(s);
1560        r = simple_read_from_buffer(ubuf, cnt, ppos,
1561                                    s->buffer, trace_seq_used(s));
1562
1563        kfree(s);
1564
1565        return r;
1566}
1567
1568static void ignore_task_cpu(void *data)
1569{
1570        struct trace_array *tr = data;
1571        struct trace_pid_list *pid_list;
1572
1573        /*
1574         * This function is called by on_each_cpu() while the
1575         * event_mutex is held.
1576         */
1577        pid_list = rcu_dereference_protected(tr->filtered_pids,
1578                                             mutex_is_locked(&event_mutex));
1579
1580        this_cpu_write(tr->trace_buffer.data->ignore_pid,
1581                       trace_ignore_this_task(pid_list, current));
1582}
1583
1584static ssize_t
1585ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1586                       size_t cnt, loff_t *ppos)
1587{
1588        struct seq_file *m = filp->private_data;
1589        struct trace_array *tr = m->private;
1590        struct trace_pid_list *filtered_pids = NULL;
1591        struct trace_pid_list *pid_list;
1592        struct trace_event_file *file;
1593        ssize_t ret;
1594
1595        if (!cnt)
1596                return 0;
1597
1598        ret = tracing_update_buffers();
1599        if (ret < 0)
1600                return ret;
1601
1602        mutex_lock(&event_mutex);
1603
1604        filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1605                                             lockdep_is_held(&event_mutex));
1606
1607        ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
1608        if (ret < 0)
1609                goto out;
1610
1611        rcu_assign_pointer(tr->filtered_pids, pid_list);
1612
1613        list_for_each_entry(file, &tr->events, list) {
1614                set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1615        }
1616
1617        if (filtered_pids) {
1618                synchronize_sched();
1619                trace_free_pid_list(filtered_pids);
1620        } else if (pid_list) {
1621                /*
1622                 * Register a probe that is called before all other probes
1623                 * to set ignore_pid if next or prev do not match.
1624                 * Register a probe this is called after all other probes
1625                 * to only keep ignore_pid set if next pid matches.
1626                 */
1627                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1628                                                 tr, INT_MAX);
1629                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1630                                                 tr, 0);
1631
1632                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1633                                                 tr, INT_MAX);
1634                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1635                                                 tr, 0);
1636
1637                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1638                                                     tr, INT_MAX);
1639                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1640                                                     tr, 0);
1641
1642                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1643                                                 tr, INT_MAX);
1644                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1645                                                 tr, 0);
1646        }
1647
1648        /*
1649         * Ignoring of pids is done at task switch. But we have to
1650         * check for those tasks that are currently running.
1651         * Always do this in case a pid was appended or removed.
1652         */
1653        on_each_cpu(ignore_task_cpu, tr, 1);
1654
1655 out:
1656        mutex_unlock(&event_mutex);
1657
1658        if (ret > 0)
1659                *ppos += ret;
1660
1661        return ret;
1662}
1663
1664static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1665static int ftrace_event_set_open(struct inode *inode, struct file *file);
1666static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1667static int ftrace_event_release(struct inode *inode, struct file *file);
1668
1669static const struct seq_operations show_event_seq_ops = {
1670        .start = t_start,
1671        .next = t_next,
1672        .show = t_show,
1673        .stop = t_stop,
1674};
1675
1676static const struct seq_operations show_set_event_seq_ops = {
1677        .start = s_start,
1678        .next = s_next,
1679        .show = t_show,
1680        .stop = t_stop,
1681};
1682
1683static const struct seq_operations show_set_pid_seq_ops = {
1684        .start = p_start,
1685        .next = p_next,
1686        .show = trace_pid_show,
1687        .stop = p_stop,
1688};
1689
1690static const struct file_operations ftrace_avail_fops = {
1691        .open = ftrace_event_avail_open,
1692        .read = seq_read,
1693        .llseek = seq_lseek,
1694        .release = seq_release,
1695};
1696
1697static const struct file_operations ftrace_set_event_fops = {
1698        .open = ftrace_event_set_open,
1699        .read = seq_read,
1700        .write = ftrace_event_write,
1701        .llseek = seq_lseek,
1702        .release = ftrace_event_release,
1703};
1704
1705static const struct file_operations ftrace_set_event_pid_fops = {
1706        .open = ftrace_event_set_pid_open,
1707        .read = seq_read,
1708        .write = ftrace_event_pid_write,
1709        .llseek = seq_lseek,
1710        .release = ftrace_event_release,
1711};
1712
1713static const struct file_operations ftrace_enable_fops = {
1714        .open = tracing_open_generic,
1715        .read = event_enable_read,
1716        .write = event_enable_write,
1717        .llseek = default_llseek,
1718};
1719
1720static const struct file_operations ftrace_event_format_fops = {
1721        .open = trace_format_open,
1722        .read = seq_read,
1723        .llseek = seq_lseek,
1724        .release = seq_release,
1725};
1726
1727static const struct file_operations ftrace_event_id_fops = {
1728        .read = event_id_read,
1729        .llseek = default_llseek,
1730};
1731
1732static const struct file_operations ftrace_event_filter_fops = {
1733        .open = tracing_open_generic,
1734        .read = event_filter_read,
1735        .write = event_filter_write,
1736        .llseek = default_llseek,
1737};
1738
1739static const struct file_operations ftrace_subsystem_filter_fops = {
1740        .open = subsystem_open,
1741        .read = subsystem_filter_read,
1742        .write = subsystem_filter_write,
1743        .llseek = default_llseek,
1744        .release = subsystem_release,
1745};
1746
1747static const struct file_operations ftrace_system_enable_fops = {
1748        .open = subsystem_open,
1749        .read = system_enable_read,
1750        .write = system_enable_write,
1751        .llseek = default_llseek,
1752        .release = subsystem_release,
1753};
1754
1755static const struct file_operations ftrace_tr_enable_fops = {
1756        .open = system_tr_open,
1757        .read = system_enable_read,
1758        .write = system_enable_write,
1759        .llseek = default_llseek,
1760        .release = subsystem_release,
1761};
1762
1763static const struct file_operations ftrace_show_header_fops = {
1764        .open = tracing_open_generic,
1765        .read = show_header,
1766        .llseek = default_llseek,
1767};
1768
1769static int
1770ftrace_event_open(struct inode *inode, struct file *file,
1771                  const struct seq_operations *seq_ops)
1772{
1773        struct seq_file *m;
1774        int ret;
1775
1776        ret = seq_open(file, seq_ops);
1777        if (ret < 0)
1778                return ret;
1779        m = file->private_data;
1780        /* copy tr over to seq ops */
1781        m->private = inode->i_private;
1782
1783        return ret;
1784}
1785
1786static int ftrace_event_release(struct inode *inode, struct file *file)
1787{
1788        struct trace_array *tr = inode->i_private;
1789
1790        trace_array_put(tr);
1791
1792        return seq_release(inode, file);
1793}
1794
1795static int
1796ftrace_event_avail_open(struct inode *inode, struct file *file)
1797{
1798        const struct seq_operations *seq_ops = &show_event_seq_ops;
1799
1800        return ftrace_event_open(inode, file, seq_ops);
1801}
1802
1803static int
1804ftrace_event_set_open(struct inode *inode, struct file *file)
1805{
1806        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1807        struct trace_array *tr = inode->i_private;
1808        int ret;
1809
1810        if (trace_array_get(tr) < 0)
1811                return -ENODEV;
1812
1813        if ((file->f_mode & FMODE_WRITE) &&
1814            (file->f_flags & O_TRUNC))
1815                ftrace_clear_events(tr);
1816
1817        ret = ftrace_event_open(inode, file, seq_ops);
1818        if (ret < 0)
1819                trace_array_put(tr);
1820        return ret;
1821}
1822
1823static int
1824ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1825{
1826        const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1827        struct trace_array *tr = inode->i_private;
1828        int ret;
1829
1830        if (trace_array_get(tr) < 0)
1831                return -ENODEV;
1832
1833        if ((file->f_mode & FMODE_WRITE) &&
1834            (file->f_flags & O_TRUNC))
1835                ftrace_clear_event_pids(tr);
1836
1837        ret = ftrace_event_open(inode, file, seq_ops);
1838        if (ret < 0)
1839                trace_array_put(tr);
1840        return ret;
1841}
1842
1843static struct event_subsystem *
1844create_new_subsystem(const char *name)
1845{
1846        struct event_subsystem *system;
1847
1848        /* need to create new entry */
1849        system = kmalloc(sizeof(*system), GFP_KERNEL);
1850        if (!system)
1851                return NULL;
1852
1853        system->ref_count = 1;
1854
1855        /* Only allocate if dynamic (kprobes and modules) */
1856        system->name = kstrdup_const(name, GFP_KERNEL);
1857        if (!system->name)
1858                goto out_free;
1859
1860        system->filter = NULL;
1861
1862        system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
1863        if (!system->filter)
1864                goto out_free;
1865
1866        list_add(&system->list, &event_subsystems);
1867
1868        return system;
1869
1870 out_free:
1871        kfree_const(system->name);
1872        kfree(system);
1873        return NULL;
1874}
1875
1876static struct dentry *
1877event_subsystem_dir(struct trace_array *tr, const char *name,
1878                    struct trace_event_file *file, struct dentry *parent)
1879{
1880        struct trace_subsystem_dir *dir;
1881        struct event_subsystem *system;
1882        struct dentry *entry;
1883
1884        /* First see if we did not already create this dir */
1885        list_for_each_entry(dir, &tr->systems, list) {
1886                system = dir->subsystem;
1887                if (strcmp(system->name, name) == 0) {
1888                        dir->nr_events++;
1889                        file->system = dir;
1890                        return dir->entry;
1891                }
1892        }
1893
1894        /* Now see if the system itself exists. */
1895        list_for_each_entry(system, &event_subsystems, list) {
1896                if (strcmp(system->name, name) == 0)
1897                        break;
1898        }
1899        /* Reset system variable when not found */
1900        if (&system->list == &event_subsystems)
1901                system = NULL;
1902
1903        dir = kmalloc(sizeof(*dir), GFP_KERNEL);
1904        if (!dir)
1905                goto out_fail;
1906
1907        if (!system) {
1908                system = create_new_subsystem(name);
1909                if (!system)
1910                        goto out_free;
1911        } else
1912                __get_system(system);
1913
1914        dir->entry = tracefs_create_dir(name, parent);
1915        if (!dir->entry) {
1916                pr_warn("Failed to create system directory %s\n", name);
1917                __put_system(system);
1918                goto out_free;
1919        }
1920
1921        dir->tr = tr;
1922        dir->ref_count = 1;
1923        dir->nr_events = 1;
1924        dir->subsystem = system;
1925        file->system = dir;
1926
1927        entry = tracefs_create_file("filter", 0644, dir->entry, dir,
1928                                    &ftrace_subsystem_filter_fops);
1929        if (!entry) {
1930                kfree(system->filter);
1931                system->filter = NULL;
1932                pr_warn("Could not create tracefs '%s/filter' entry\n", name);
1933        }
1934
1935        trace_create_file("enable", 0644, dir->entry, dir,
1936                          &ftrace_system_enable_fops);
1937
1938        list_add(&dir->list, &tr->systems);
1939
1940        return dir->entry;
1941
1942 out_free:
1943        kfree(dir);
1944 out_fail:
1945        /* Only print this message if failed on memory allocation */
1946        if (!dir || !system)
1947                pr_warn("No memory to create event subsystem %s\n", name);
1948        return NULL;
1949}
1950
1951static int
1952event_create_dir(struct dentry *parent, struct trace_event_file *file)
1953{
1954        struct trace_event_call *call = file->event_call;
1955        struct trace_array *tr = file->tr;
1956        struct list_head *head;
1957        struct dentry *d_events;
1958        const char *name;
1959        int ret;
1960
1961        /*
1962         * If the trace point header did not define TRACE_SYSTEM
1963         * then the system would be called "TRACE_SYSTEM".
1964         */
1965        if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
1966                d_events = event_subsystem_dir(tr, call->class->system, file, parent);
1967                if (!d_events)
1968                        return -ENOMEM;
1969        } else
1970                d_events = parent;
1971
1972        name = trace_event_name(call);
1973        file->dir = tracefs_create_dir(name, d_events);
1974        if (!file->dir) {
1975                pr_warn("Could not create tracefs '%s' directory\n", name);
1976                return -1;
1977        }
1978
1979        if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1980                trace_create_file("enable", 0644, file->dir, file,
1981                                  &ftrace_enable_fops);
1982
1983#ifdef CONFIG_PERF_EVENTS
1984        if (call->event.type && call->class->reg)
1985                trace_create_file("id", 0444, file->dir,
1986                                  (void *)(long)call->event.type,
1987                                  &ftrace_event_id_fops);
1988#endif
1989
1990        /*
1991         * Other events may have the same class. Only update
1992         * the fields if they are not already defined.
1993         */
1994        head = trace_get_fields(call);
1995        if (list_empty(head)) {
1996                ret = call->class->define_fields(call);
1997                if (ret < 0) {
1998                        pr_warn("Could not initialize trace point events/%s\n",
1999                                name);
2000                        return -1;
2001                }
2002        }
2003        trace_create_file("filter", 0644, file->dir, file,
2004                          &ftrace_event_filter_fops);
2005
2006        /*
2007         * Only event directories that can be enabled should have
2008         * triggers.
2009         */
2010        if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2011                trace_create_file("trigger", 0644, file->dir, file,
2012                                  &event_trigger_fops);
2013
2014#ifdef CONFIG_HIST_TRIGGERS
2015        trace_create_file("hist", 0444, file->dir, file,
2016                          &event_hist_fops);
2017#endif
2018        trace_create_file("format", 0444, file->dir, call,
2019                          &ftrace_event_format_fops);
2020
2021        return 0;
2022}
2023
2024static void remove_event_from_tracers(struct trace_event_call *call)
2025{
2026        struct trace_event_file *file;
2027        struct trace_array *tr;
2028
2029        do_for_each_event_file_safe(tr, file) {
2030                if (file->event_call != call)
2031                        continue;
2032
2033                remove_event_file_dir(file);
2034                /*
2035                 * The do_for_each_event_file_safe() is
2036                 * a double loop. After finding the call for this
2037                 * trace_array, we use break to jump to the next
2038                 * trace_array.
2039                 */
2040                break;
2041        } while_for_each_event_file();
2042}
2043
2044static void event_remove(struct trace_event_call *call)
2045{
2046        struct trace_array *tr;
2047        struct trace_event_file *file;
2048
2049        do_for_each_event_file(tr, file) {
2050                if (file->event_call != call)
2051                        continue;
2052                ftrace_event_enable_disable(file, 0);
2053                /*
2054                 * The do_for_each_event_file() is
2055                 * a double loop. After finding the call for this
2056                 * trace_array, we use break to jump to the next
2057                 * trace_array.
2058                 */
2059                break;
2060        } while_for_each_event_file();
2061
2062        if (call->event.funcs)
2063                __unregister_trace_event(&call->event);
2064        remove_event_from_tracers(call);
2065        list_del(&call->list);
2066}
2067
2068static int event_init(struct trace_event_call *call)
2069{
2070        int ret = 0;
2071        const char *name;
2072
2073        name = trace_event_name(call);
2074        if (WARN_ON(!name))
2075                return -EINVAL;
2076
2077        if (call->class->raw_init) {
2078                ret = call->class->raw_init(call);
2079                if (ret < 0 && ret != -ENOSYS)
2080                        pr_warn("Could not initialize trace events/%s\n", name);
2081        }
2082
2083        return ret;
2084}
2085
2086static int
2087__register_event(struct trace_event_call *call, struct module *mod)
2088{
2089        int ret;
2090
2091        ret = event_init(call);
2092        if (ret < 0)
2093                return ret;
2094
2095        list_add(&call->list, &ftrace_events);
2096        call->mod = mod;
2097
2098        return 0;
2099}
2100
2101static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
2102{
2103        int rlen;
2104        int elen;
2105
2106        /* Find the length of the enum value as a string */
2107        elen = snprintf(ptr, 0, "%ld", map->enum_value);
2108        /* Make sure there's enough room to replace the string with the value */
2109        if (len < elen)
2110                return NULL;
2111
2112        snprintf(ptr, elen + 1, "%ld", map->enum_value);
2113
2114        /* Get the rest of the string of ptr */
2115        rlen = strlen(ptr + len);
2116        memmove(ptr + elen, ptr + len, rlen);
2117        /* Make sure we end the new string */
2118        ptr[elen + rlen] = 0;
2119
2120        return ptr + elen;
2121}
2122
2123static void update_event_printk(struct trace_event_call *call,
2124                                struct trace_enum_map *map)
2125{
2126        char *ptr;
2127        int quote = 0;
2128        int len = strlen(map->enum_string);
2129
2130        for (ptr = call->print_fmt; *ptr; ptr++) {
2131                if (*ptr == '\\') {
2132                        ptr++;
2133                        /* paranoid */
2134                        if (!*ptr)
2135                                break;
2136                        continue;
2137                }
2138                if (*ptr == '"') {
2139                        quote ^= 1;
2140                        continue;
2141                }
2142                if (quote)
2143                        continue;
2144                if (isdigit(*ptr)) {
2145                        /* skip numbers */
2146                        do {
2147                                ptr++;
2148                                /* Check for alpha chars like ULL */
2149                        } while (isalnum(*ptr));
2150                        if (!*ptr)
2151                                break;
2152                        /*
2153                         * A number must have some kind of delimiter after
2154                         * it, and we can ignore that too.
2155                         */
2156                        continue;
2157                }
2158                if (isalpha(*ptr) || *ptr == '_') {
2159                        if (strncmp(map->enum_string, ptr, len) == 0 &&
2160                            !isalnum(ptr[len]) && ptr[len] != '_') {
2161                                ptr = enum_replace(ptr, map, len);
2162                                /* Hmm, enum string smaller than value */
2163                                if (WARN_ON_ONCE(!ptr))
2164                                        return;
2165                                /*
2166                                 * No need to decrement here, as enum_replace()
2167                                 * returns the pointer to the character passed
2168                                 * the enum, and two enums can not be placed
2169                                 * back to back without something in between.
2170                                 * We can skip that something in between.
2171                                 */
2172                                continue;
2173                        }
2174                skip_more:
2175                        do {
2176                                ptr++;
2177                        } while (isalnum(*ptr) || *ptr == '_');
2178                        if (!*ptr)
2179                                break;
2180                        /*
2181                         * If what comes after this variable is a '.' or
2182                         * '->' then we can continue to ignore that string.
2183                         */
2184                        if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2185                                ptr += *ptr == '.' ? 1 : 2;
2186                                if (!*ptr)
2187                                        break;
2188                                goto skip_more;
2189                        }
2190                        /*
2191                         * Once again, we can skip the delimiter that came
2192                         * after the string.
2193                         */
2194                        continue;
2195                }
2196        }
2197}
2198
2199void trace_event_enum_update(struct trace_enum_map **map, int len)
2200{
2201        struct trace_event_call *call, *p;
2202        const char *last_system = NULL;
2203        int last_i;
2204        int i;
2205
2206        down_write(&trace_event_sem);
2207        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2208                /* events are usually grouped together with systems */
2209                if (!last_system || call->class->system != last_system) {
2210                        last_i = 0;
2211                        last_system = call->class->system;
2212                }
2213
2214                for (i = last_i; i < len; i++) {
2215                        if (call->class->system == map[i]->system) {
2216                                /* Save the first system if need be */
2217                                if (!last_i)
2218                                        last_i = i;
2219                                update_event_printk(call, map[i]);
2220                        }
2221                }
2222        }
2223        up_write(&trace_event_sem);
2224}
2225
2226static struct trace_event_file *
2227trace_create_new_event(struct trace_event_call *call,
2228                       struct trace_array *tr)
2229{
2230        struct trace_event_file *file;
2231
2232        file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2233        if (!file)
2234                return NULL;
2235
2236        file->event_call = call;
2237        file->tr = tr;
2238        atomic_set(&file->sm_ref, 0);
2239        atomic_set(&file->tm_ref, 0);
2240        INIT_LIST_HEAD(&file->triggers);
2241        list_add(&file->list, &tr->events);
2242
2243        return file;
2244}
2245
2246/* Add an event to a trace directory */
2247static int
2248__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2249{
2250        struct trace_event_file *file;
2251
2252        file = trace_create_new_event(call, tr);
2253        if (!file)
2254                return -ENOMEM;
2255
2256        return event_create_dir(tr->event_dir, file);
2257}
2258
2259/*
2260 * Just create a decriptor for early init. A descriptor is required
2261 * for enabling events at boot. We want to enable events before
2262 * the filesystem is initialized.
2263 */
2264static __init int
2265__trace_early_add_new_event(struct trace_event_call *call,
2266                            struct trace_array *tr)
2267{
2268        struct trace_event_file *file;
2269
2270        file = trace_create_new_event(call, tr);
2271        if (!file)
2272                return -ENOMEM;
2273
2274        return 0;
2275}
2276
2277struct ftrace_module_file_ops;
2278static void __add_event_to_tracers(struct trace_event_call *call);
2279
2280/* Add an additional event_call dynamically */
2281int trace_add_event_call(struct trace_event_call *call)
2282{
2283        int ret;
2284        mutex_lock(&trace_types_lock);
2285        mutex_lock(&event_mutex);
2286
2287        ret = __register_event(call, NULL);
2288        if (ret >= 0)
2289                __add_event_to_tracers(call);
2290
2291        mutex_unlock(&event_mutex);
2292        mutex_unlock(&trace_types_lock);
2293        return ret;
2294}
2295
2296/*
2297 * Must be called under locking of trace_types_lock, event_mutex and
2298 * trace_event_sem.
2299 */
2300static void __trace_remove_event_call(struct trace_event_call *call)
2301{
2302        event_remove(call);
2303        trace_destroy_fields(call);
2304        free_event_filter(call->filter);
2305        call->filter = NULL;
2306}
2307
2308static int probe_remove_event_call(struct trace_event_call *call)
2309{
2310        struct trace_array *tr;
2311        struct trace_event_file *file;
2312
2313#ifdef CONFIG_PERF_EVENTS
2314        if (call->perf_refcount)
2315                return -EBUSY;
2316#endif
2317        do_for_each_event_file(tr, file) {
2318                if (file->event_call != call)
2319                        continue;
2320                /*
2321                 * We can't rely on ftrace_event_enable_disable(enable => 0)
2322                 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2323                 * TRACE_REG_UNREGISTER.
2324                 */
2325                if (file->flags & EVENT_FILE_FL_ENABLED)
2326                        return -EBUSY;
2327                /*
2328                 * The do_for_each_event_file_safe() is
2329                 * a double loop. After finding the call for this
2330                 * trace_array, we use break to jump to the next
2331                 * trace_array.
2332                 */
2333                break;
2334        } while_for_each_event_file();
2335
2336        __trace_remove_event_call(call);
2337
2338        return 0;
2339}
2340
2341/* Remove an event_call */
2342int trace_remove_event_call(struct trace_event_call *call)
2343{
2344        int ret;
2345
2346        mutex_lock(&trace_types_lock);
2347        mutex_lock(&event_mutex);
2348        down_write(&trace_event_sem);
2349        ret = probe_remove_event_call(call);
2350        up_write(&trace_event_sem);
2351        mutex_unlock(&event_mutex);
2352        mutex_unlock(&trace_types_lock);
2353
2354        return ret;
2355}
2356
2357#define for_each_event(event, start, end)                       \
2358        for (event = start;                                     \
2359             (unsigned long)event < (unsigned long)end;         \
2360             event++)
2361
2362#ifdef CONFIG_MODULES
2363
2364static void trace_module_add_events(struct module *mod)
2365{
2366        struct trace_event_call **call, **start, **end;
2367
2368        if (!mod->num_trace_events)
2369                return;
2370
2371        /* Don't add infrastructure for mods without tracepoints */
2372        if (trace_module_has_bad_taint(mod)) {
2373                pr_err("%s: module has bad taint, not creating trace events\n",
2374                       mod->name);
2375                return;
2376        }
2377
2378        start = mod->trace_events;
2379        end = mod->trace_events + mod->num_trace_events;
2380
2381        for_each_event(call, start, end) {
2382                __register_event(*call, mod);
2383                __add_event_to_tracers(*call);
2384        }
2385}
2386
2387static void trace_module_remove_events(struct module *mod)
2388{
2389        struct trace_event_call *call, *p;
2390        bool clear_trace = false;
2391
2392        down_write(&trace_event_sem);
2393        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2394                if (call->mod == mod) {
2395                        if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
2396                                clear_trace = true;
2397                        __trace_remove_event_call(call);
2398                }
2399        }
2400        up_write(&trace_event_sem);
2401
2402        /*
2403         * It is safest to reset the ring buffer if the module being unloaded
2404         * registered any events that were used. The only worry is if
2405         * a new module gets loaded, and takes on the same id as the events
2406         * of this module. When printing out the buffer, traced events left
2407         * over from this module may be passed to the new module events and
2408         * unexpected results may occur.
2409         */
2410        if (clear_trace)
2411                tracing_reset_all_online_cpus();
2412}
2413
2414static int trace_module_notify(struct notifier_block *self,
2415                               unsigned long val, void *data)
2416{
2417        struct module *mod = data;
2418
2419        mutex_lock(&trace_types_lock);
2420        mutex_lock(&event_mutex);
2421        switch (val) {
2422        case MODULE_STATE_COMING:
2423                trace_module_add_events(mod);
2424                break;
2425        case MODULE_STATE_GOING:
2426                trace_module_remove_events(mod);
2427                break;
2428        }
2429        mutex_unlock(&event_mutex);
2430        mutex_unlock(&trace_types_lock);
2431
2432        return 0;
2433}
2434
2435static struct notifier_block trace_module_nb = {
2436        .notifier_call = trace_module_notify,
2437        .priority = 1, /* higher than trace.c module notify */
2438};
2439#endif /* CONFIG_MODULES */
2440
2441/* Create a new event directory structure for a trace directory. */
2442static void
2443__trace_add_event_dirs(struct trace_array *tr)
2444{
2445        struct trace_event_call *call;
2446        int ret;
2447
2448        list_for_each_entry(call, &ftrace_events, list) {
2449                ret = __trace_add_new_event(call, tr);
2450                if (ret < 0)
2451                        pr_warn("Could not create directory for event %s\n",
2452                                trace_event_name(call));
2453        }
2454}
2455
2456struct trace_event_file *
2457find_event_file(struct trace_array *tr, const char *system,  const char *event)
2458{
2459        struct trace_event_file *file;
2460        struct trace_event_call *call;
2461        const char *name;
2462
2463        list_for_each_entry(file, &tr->events, list) {
2464
2465                call = file->event_call;
2466                name = trace_event_name(call);
2467
2468                if (!name || !call->class || !call->class->reg)
2469                        continue;
2470
2471                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2472                        continue;
2473
2474                if (strcmp(event, name) == 0 &&
2475                    strcmp(system, call->class->system) == 0)
2476                        return file;
2477        }
2478        return NULL;
2479}
2480
2481#ifdef CONFIG_DYNAMIC_FTRACE
2482
2483/* Avoid typos */
2484#define ENABLE_EVENT_STR        "enable_event"
2485#define DISABLE_EVENT_STR       "disable_event"
2486
2487struct event_probe_data {
2488        struct trace_event_file *file;
2489        unsigned long                   count;
2490        int                             ref;
2491        bool                            enable;
2492};
2493
2494static void
2495event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2496{
2497        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2498        struct event_probe_data *data = *pdata;
2499
2500        if (!data)
2501                return;
2502
2503        if (data->enable)
2504                clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2505        else
2506                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2507}
2508
2509static void
2510event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2511{
2512        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2513        struct event_probe_data *data = *pdata;
2514
2515        if (!data)
2516                return;
2517
2518        if (!data->count)
2519                return;
2520
2521        /* Skip if the event is in a state we want to switch to */
2522        if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2523                return;
2524
2525        if (data->count != -1)
2526                (data->count)--;
2527
2528        event_enable_probe(ip, parent_ip, _data);
2529}
2530
2531static int
2532event_enable_print(struct seq_file *m, unsigned long ip,
2533                      struct ftrace_probe_ops *ops, void *_data)
2534{
2535        struct event_probe_data *data = _data;
2536
2537        seq_printf(m, "%ps:", (void *)ip);
2538
2539        seq_printf(m, "%s:%s:%s",
2540                   data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2541                   data->file->event_call->class->system,
2542                   trace_event_name(data->file->event_call));
2543
2544        if (data->count == -1)
2545                seq_puts(m, ":unlimited\n");
2546        else
2547                seq_printf(m, ":count=%ld\n", data->count);
2548
2549        return 0;
2550}
2551
2552static int
2553event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
2554                  void **_data)
2555{
2556        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2557        struct event_probe_data *data = *pdata;
2558
2559        data->ref++;
2560        return 0;
2561}
2562
2563static void
2564event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
2565                  void **_data)
2566{
2567        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2568        struct event_probe_data *data = *pdata;
2569
2570        if (WARN_ON_ONCE(data->ref <= 0))
2571                return;
2572
2573        data->ref--;
2574        if (!data->ref) {
2575                /* Remove the SOFT_MODE flag */
2576                __ftrace_event_enable_disable(data->file, 0, 1);
2577                module_put(data->file->event_call->mod);
2578                kfree(data);
2579        }
2580        *pdata = NULL;
2581}
2582
2583static struct ftrace_probe_ops event_enable_probe_ops = {
2584        .func                   = event_enable_probe,
2585        .print                  = event_enable_print,
2586        .init                   = event_enable_init,
2587        .free                   = event_enable_free,
2588};
2589
2590static struct ftrace_probe_ops event_enable_count_probe_ops = {
2591        .func                   = event_enable_count_probe,
2592        .print                  = event_enable_print,
2593        .init                   = event_enable_init,
2594        .free                   = event_enable_free,
2595};
2596
2597static struct ftrace_probe_ops event_disable_probe_ops = {
2598        .func                   = event_enable_probe,
2599        .print                  = event_enable_print,
2600        .init                   = event_enable_init,
2601        .free                   = event_enable_free,
2602};
2603
2604static struct ftrace_probe_ops event_disable_count_probe_ops = {
2605        .func                   = event_enable_count_probe,
2606        .print                  = event_enable_print,
2607        .init                   = event_enable_init,
2608        .free                   = event_enable_free,
2609};
2610
2611static int
2612event_enable_func(struct ftrace_hash *hash,
2613                  char *glob, char *cmd, char *param, int enabled)
2614{
2615        struct trace_array *tr = top_trace_array();
2616        struct trace_event_file *file;
2617        struct ftrace_probe_ops *ops;
2618        struct event_probe_data *data;
2619        const char *system;
2620        const char *event;
2621        char *number;
2622        bool enable;
2623        int ret;
2624
2625        if (!tr)
2626                return -ENODEV;
2627
2628        /* hash funcs only work with set_ftrace_filter */
2629        if (!enabled || !param)
2630                return -EINVAL;
2631
2632        system = strsep(&param, ":");
2633        if (!param)
2634                return -EINVAL;
2635
2636        event = strsep(&param, ":");
2637
2638        mutex_lock(&event_mutex);
2639
2640        ret = -EINVAL;
2641        file = find_event_file(tr, system, event);
2642        if (!file)
2643                goto out;
2644
2645        enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2646
2647        if (enable)
2648                ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2649        else
2650                ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2651
2652        if (glob[0] == '!') {
2653                unregister_ftrace_function_probe_func(glob+1, ops);
2654                ret = 0;
2655                goto out;
2656        }
2657
2658        ret = -ENOMEM;
2659        data = kzalloc(sizeof(*data), GFP_KERNEL);
2660        if (!data)
2661                goto out;
2662
2663        data->enable = enable;
2664        data->count = -1;
2665        data->file = file;
2666
2667        if (!param)
2668                goto out_reg;
2669
2670        number = strsep(&param, ":");
2671
2672        ret = -EINVAL;
2673        if (!strlen(number))
2674                goto out_free;
2675
2676        /*
2677         * We use the callback data field (which is a pointer)
2678         * as our counter.
2679         */
2680        ret = kstrtoul(number, 0, &data->count);
2681        if (ret)
2682                goto out_free;
2683
2684 out_reg:
2685        /* Don't let event modules unload while probe registered */
2686        ret = try_module_get(file->event_call->mod);
2687        if (!ret) {
2688                ret = -EBUSY;
2689                goto out_free;
2690        }
2691
2692        ret = __ftrace_event_enable_disable(file, 1, 1);
2693        if (ret < 0)
2694                goto out_put;
2695        ret = register_ftrace_function_probe(glob, ops, data);
2696        /*
2697         * The above returns on success the # of functions enabled,
2698         * but if it didn't find any functions it returns zero.
2699         * Consider no functions a failure too.
2700         */
2701        if (!ret) {
2702                ret = -ENOENT;
2703                goto out_disable;
2704        } else if (ret < 0)
2705                goto out_disable;
2706        /* Just return zero, not the number of enabled functions */
2707        ret = 0;
2708 out:
2709        mutex_unlock(&event_mutex);
2710        return ret;
2711
2712 out_disable:
2713        __ftrace_event_enable_disable(file, 0, 1);
2714 out_put:
2715        module_put(file->event_call->mod);
2716 out_free:
2717        kfree(data);
2718        goto out;
2719}
2720
2721static struct ftrace_func_command event_enable_cmd = {
2722        .name                   = ENABLE_EVENT_STR,
2723        .func                   = event_enable_func,
2724};
2725
2726static struct ftrace_func_command event_disable_cmd = {
2727        .name                   = DISABLE_EVENT_STR,
2728        .func                   = event_enable_func,
2729};
2730
2731static __init int register_event_cmds(void)
2732{
2733        int ret;
2734
2735        ret = register_ftrace_command(&event_enable_cmd);
2736        if (WARN_ON(ret < 0))
2737                return ret;
2738        ret = register_ftrace_command(&event_disable_cmd);
2739        if (WARN_ON(ret < 0))
2740                unregister_ftrace_command(&event_enable_cmd);
2741        return ret;
2742}
2743#else
2744static inline int register_event_cmds(void) { return 0; }
2745#endif /* CONFIG_DYNAMIC_FTRACE */
2746
2747/*
2748 * The top level array has already had its trace_event_file
2749 * descriptors created in order to allow for early events to
2750 * be recorded. This function is called after the tracefs has been
2751 * initialized, and we now have to create the files associated
2752 * to the events.
2753 */
2754static __init void
2755__trace_early_add_event_dirs(struct trace_array *tr)
2756{
2757        struct trace_event_file *file;
2758        int ret;
2759
2760
2761        list_for_each_entry(file, &tr->events, list) {
2762                ret = event_create_dir(tr->event_dir, file);
2763                if (ret < 0)
2764                        pr_warn("Could not create directory for event %s\n",
2765                                trace_event_name(file->event_call));
2766        }
2767}
2768
2769/*
2770 * For early boot up, the top trace array requires to have
2771 * a list of events that can be enabled. This must be done before
2772 * the filesystem is set up in order to allow events to be traced
2773 * early.
2774 */
2775static __init void
2776__trace_early_add_events(struct trace_array *tr)
2777{
2778        struct trace_event_call *call;
2779        int ret;
2780
2781        list_for_each_entry(call, &ftrace_events, list) {
2782                /* Early boot up should not have any modules loaded */
2783                if (WARN_ON_ONCE(call->mod))
2784                        continue;
2785
2786                ret = __trace_early_add_new_event(call, tr);
2787                if (ret < 0)
2788                        pr_warn("Could not create early event %s\n",
2789                                trace_event_name(call));
2790        }
2791}
2792
2793/* Remove the event directory structure for a trace directory. */
2794static void
2795__trace_remove_event_dirs(struct trace_array *tr)
2796{
2797        struct trace_event_file *file, *next;
2798
2799        list_for_each_entry_safe(file, next, &tr->events, list)
2800                remove_event_file_dir(file);
2801}
2802
2803static void __add_event_to_tracers(struct trace_event_call *call)
2804{
2805        struct trace_array *tr;
2806
2807        list_for_each_entry(tr, &ftrace_trace_arrays, list)
2808                __trace_add_new_event(call, tr);
2809}
2810
2811extern struct trace_event_call *__start_ftrace_events[];
2812extern struct trace_event_call *__stop_ftrace_events[];
2813
2814static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2815
2816static __init int setup_trace_event(char *str)
2817{
2818        strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2819        ring_buffer_expanded = true;
2820        tracing_selftest_disabled = true;
2821
2822        return 1;
2823}
2824__setup("trace_event=", setup_trace_event);
2825
2826/* Expects to have event_mutex held when called */
2827static int
2828create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2829{
2830        struct dentry *d_events;
2831        struct dentry *entry;
2832
2833        entry = tracefs_create_file("set_event", 0644, parent,
2834                                    tr, &ftrace_set_event_fops);
2835        if (!entry) {
2836                pr_warn("Could not create tracefs 'set_event' entry\n");
2837                return -ENOMEM;
2838        }
2839
2840        d_events = tracefs_create_dir("events", parent);
2841        if (!d_events) {
2842                pr_warn("Could not create tracefs 'events' directory\n");
2843                return -ENOMEM;
2844        }
2845
2846        entry = tracefs_create_file("set_event_pid", 0644, parent,
2847                                    tr, &ftrace_set_event_pid_fops);
2848
2849        /* ring buffer internal formats */
2850        trace_create_file("header_page", 0444, d_events,
2851                          ring_buffer_print_page_header,
2852                          &ftrace_show_header_fops);
2853
2854        trace_create_file("header_event", 0444, d_events,
2855                          ring_buffer_print_entry_header,
2856                          &ftrace_show_header_fops);
2857
2858        trace_create_file("enable", 0644, d_events,
2859                          tr, &ftrace_tr_enable_fops);
2860
2861        tr->event_dir = d_events;
2862
2863        return 0;
2864}
2865
2866/**
2867 * event_trace_add_tracer - add a instance of a trace_array to events
2868 * @parent: The parent dentry to place the files/directories for events in
2869 * @tr: The trace array associated with these events
2870 *
2871 * When a new instance is created, it needs to set up its events
2872 * directory, as well as other files associated with events. It also
2873 * creates the event hierachry in the @parent/events directory.
2874 *
2875 * Returns 0 on success.
2876 */
2877int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
2878{
2879        int ret;
2880
2881        mutex_lock(&event_mutex);
2882
2883        ret = create_event_toplevel_files(parent, tr);
2884        if (ret)
2885                goto out_unlock;
2886
2887        down_write(&trace_event_sem);
2888        __trace_add_event_dirs(tr);
2889        up_write(&trace_event_sem);
2890
2891 out_unlock:
2892        mutex_unlock(&event_mutex);
2893
2894        return ret;
2895}
2896
2897/*
2898 * The top trace array already had its file descriptors created.
2899 * Now the files themselves need to be created.
2900 */
2901static __init int
2902early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
2903{
2904        int ret;
2905
2906        mutex_lock(&event_mutex);
2907
2908        ret = create_event_toplevel_files(parent, tr);
2909        if (ret)
2910                goto out_unlock;
2911
2912        down_write(&trace_event_sem);
2913        __trace_early_add_event_dirs(tr);
2914        up_write(&trace_event_sem);
2915
2916 out_unlock:
2917        mutex_unlock(&event_mutex);
2918
2919        return ret;
2920}
2921
2922int event_trace_del_tracer(struct trace_array *tr)
2923{
2924        mutex_lock(&event_mutex);
2925
2926        /* Disable any event triggers and associated soft-disabled events */
2927        clear_event_triggers(tr);
2928
2929        /* Clear the pid list */
2930        __ftrace_clear_event_pids(tr);
2931
2932        /* Disable any running events */
2933        __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
2934
2935        /* Access to events are within rcu_read_lock_sched() */
2936        synchronize_sched();
2937
2938        down_write(&trace_event_sem);
2939        __trace_remove_event_dirs(tr);
2940        tracefs_remove_recursive(tr->event_dir);
2941        up_write(&trace_event_sem);
2942
2943        tr->event_dir = NULL;
2944
2945        mutex_unlock(&event_mutex);
2946
2947        return 0;
2948}
2949
2950static __init int event_trace_memsetup(void)
2951{
2952        field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
2953        file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
2954        return 0;
2955}
2956
2957static __init void
2958early_enable_events(struct trace_array *tr, bool disable_first)
2959{
2960        char *buf = bootup_event_buf;
2961        char *token;
2962        int ret;
2963
2964        while (true) {
2965                token = strsep(&buf, ",");
2966
2967                if (!token)
2968                        break;
2969
2970                if (*token) {
2971                        /* Restarting syscalls requires that we stop them first */
2972                        if (disable_first)
2973                                ftrace_set_clr_event(tr, token, 0);
2974
2975                        ret = ftrace_set_clr_event(tr, token, 1);
2976                        if (ret)
2977                                pr_warn("Failed to enable trace event: %s\n", token);
2978                }
2979
2980                /* Put back the comma to allow this to be called again */
2981                if (buf)
2982                        *(buf - 1) = ',';
2983        }
2984}
2985
2986static __init int event_trace_enable(void)
2987{
2988        struct trace_array *tr = top_trace_array();
2989        struct trace_event_call **iter, *call;
2990        int ret;
2991
2992        if (!tr)
2993                return -ENODEV;
2994
2995        for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
2996
2997                call = *iter;
2998                ret = event_init(call);
2999                if (!ret)
3000                        list_add(&call->list, &ftrace_events);
3001        }
3002
3003        /*
3004         * We need the top trace array to have a working set of trace
3005         * points at early init, before the debug files and directories
3006         * are created. Create the file entries now, and attach them
3007         * to the actual file dentries later.
3008         */
3009        __trace_early_add_events(tr);
3010
3011        early_enable_events(tr, false);
3012
3013        trace_printk_start_comm();
3014
3015        register_event_cmds();
3016
3017        register_trigger_cmds();
3018
3019        return 0;
3020}
3021
3022/*
3023 * event_trace_enable() is called from trace_event_init() first to
3024 * initialize events and perhaps start any events that are on the
3025 * command line. Unfortunately, there are some events that will not
3026 * start this early, like the system call tracepoints that need
3027 * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3028 * is called before pid 1 starts, and this flag is never set, making
3029 * the syscall tracepoint never get reached, but the event is enabled
3030 * regardless (and not doing anything).
3031 */
3032static __init int event_trace_enable_again(void)
3033{
3034        struct trace_array *tr;
3035
3036        tr = top_trace_array();
3037        if (!tr)
3038                return -ENODEV;
3039
3040        early_enable_events(tr, true);
3041
3042        return 0;
3043}
3044
3045early_initcall(event_trace_enable_again);
3046
3047static __init int event_trace_init(void)
3048{
3049        struct trace_array *tr;
3050        struct dentry *d_tracer;
3051        struct dentry *entry;
3052        int ret;
3053
3054        tr = top_trace_array();
3055        if (!tr)
3056                return -ENODEV;
3057
3058        d_tracer = tracing_init_dentry();
3059        if (IS_ERR(d_tracer))
3060                return 0;
3061
3062        entry = tracefs_create_file("available_events", 0444, d_tracer,
3063                                    tr, &ftrace_avail_fops);
3064        if (!entry)
3065                pr_warn("Could not create tracefs 'available_events' entry\n");
3066
3067        if (trace_define_generic_fields())
3068                pr_warn("tracing: Failed to allocated generic fields");
3069
3070        if (trace_define_common_fields())
3071                pr_warn("tracing: Failed to allocate common fields");
3072
3073        ret = early_event_add_tracer(d_tracer, tr);
3074        if (ret)
3075                return ret;
3076
3077#ifdef CONFIG_MODULES
3078        ret = register_module_notifier(&trace_module_nb);
3079        if (ret)
3080                pr_warn("Failed to register trace events module notifier\n");
3081#endif
3082        return 0;
3083}
3084
3085void __init trace_event_init(void)
3086{
3087        event_trace_memsetup();
3088        init_ftrace_syscalls();
3089        event_trace_enable();
3090}
3091
3092fs_initcall(event_trace_init);
3093
3094#ifdef CONFIG_FTRACE_STARTUP_TEST
3095
3096static DEFINE_SPINLOCK(test_spinlock);
3097static DEFINE_SPINLOCK(test_spinlock_irq);
3098static DEFINE_MUTEX(test_mutex);
3099
3100static __init void test_work(struct work_struct *dummy)
3101{
3102        spin_lock(&test_spinlock);
3103        spin_lock_irq(&test_spinlock_irq);
3104        udelay(1);
3105        spin_unlock_irq(&test_spinlock_irq);
3106        spin_unlock(&test_spinlock);
3107
3108        mutex_lock(&test_mutex);
3109        msleep(1);
3110        mutex_unlock(&test_mutex);
3111}
3112
3113static __init int event_test_thread(void *unused)
3114{
3115        void *test_malloc;
3116
3117        test_malloc = kmalloc(1234, GFP_KERNEL);
3118        if (!test_malloc)
3119                pr_info("failed to kmalloc\n");
3120
3121        schedule_on_each_cpu(test_work);
3122
3123        kfree(test_malloc);
3124
3125        set_current_state(TASK_INTERRUPTIBLE);
3126        while (!kthread_should_stop()) {
3127                schedule();
3128                set_current_state(TASK_INTERRUPTIBLE);
3129        }
3130        __set_current_state(TASK_RUNNING);
3131
3132        return 0;
3133}
3134
3135/*
3136 * Do various things that may trigger events.
3137 */
3138static __init void event_test_stuff(void)
3139{
3140        struct task_struct *test_thread;
3141
3142        test_thread = kthread_run(event_test_thread, NULL, "test-events");
3143        msleep(1);
3144        kthread_stop(test_thread);
3145}
3146
3147/*
3148 * For every trace event defined, we will test each trace point separately,
3149 * and then by groups, and finally all trace points.
3150 */
3151static __init void event_trace_self_tests(void)
3152{
3153        struct trace_subsystem_dir *dir;
3154        struct trace_event_file *file;
3155        struct trace_event_call *call;
3156        struct event_subsystem *system;
3157        struct trace_array *tr;
3158        int ret;
3159
3160        tr = top_trace_array();
3161        if (!tr)
3162                return;
3163
3164        pr_info("Running tests on trace events:\n");
3165
3166        list_for_each_entry(file, &tr->events, list) {
3167
3168                call = file->event_call;
3169
3170                /* Only test those that have a probe */
3171                if (!call->class || !call->class->probe)
3172                        continue;
3173
3174/*
3175 * Testing syscall events here is pretty useless, but
3176 * we still do it if configured. But this is time consuming.
3177 * What we really need is a user thread to perform the
3178 * syscalls as we test.
3179 */
3180#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3181                if (call->class->system &&
3182                    strcmp(call->class->system, "syscalls") == 0)
3183                        continue;
3184#endif
3185
3186                pr_info("Testing event %s: ", trace_event_name(call));
3187
3188                /*
3189                 * If an event is already enabled, someone is using
3190                 * it and the self test should not be on.
3191                 */
3192                if (file->flags & EVENT_FILE_FL_ENABLED) {
3193                        pr_warn("Enabled event during self test!\n");
3194                        WARN_ON_ONCE(1);
3195                        continue;
3196                }
3197
3198                ftrace_event_enable_disable(file, 1);
3199                event_test_stuff();
3200                ftrace_event_enable_disable(file, 0);
3201
3202                pr_cont("OK\n");
3203        }
3204
3205        /* Now test at the sub system level */
3206
3207        pr_info("Running tests on trace event systems:\n");
3208
3209        list_for_each_entry(dir, &tr->systems, list) {
3210
3211                system = dir->subsystem;
3212
3213                /* the ftrace system is special, skip it */
3214                if (strcmp(system->name, "ftrace") == 0)
3215                        continue;
3216
3217                pr_info("Testing event system %s: ", system->name);
3218
3219                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3220                if (WARN_ON_ONCE(ret)) {
3221                        pr_warn("error enabling system %s\n",
3222                                system->name);
3223                        continue;
3224                }
3225
3226                event_test_stuff();
3227
3228                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3229                if (WARN_ON_ONCE(ret)) {
3230                        pr_warn("error disabling system %s\n",
3231                                system->name);
3232                        continue;
3233                }
3234
3235                pr_cont("OK\n");
3236        }
3237
3238        /* Test with all events enabled */
3239
3240        pr_info("Running tests on all trace events:\n");
3241        pr_info("Testing all events: ");
3242
3243        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3244        if (WARN_ON_ONCE(ret)) {
3245                pr_warn("error enabling all events\n");
3246                return;
3247        }
3248
3249        event_test_stuff();
3250
3251        /* reset sysname */
3252        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3253        if (WARN_ON_ONCE(ret)) {
3254                pr_warn("error disabling all events\n");
3255                return;
3256        }
3257
3258        pr_cont("OK\n");
3259}
3260
3261#ifdef CONFIG_FUNCTION_TRACER
3262
3263static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3264
3265static struct trace_event_file event_trace_file __initdata;
3266
3267static void __init
3268function_test_events_call(unsigned long ip, unsigned long parent_ip,
3269                          struct ftrace_ops *op, struct pt_regs *pt_regs)
3270{
3271        struct ring_buffer_event *event;
3272        struct ring_buffer *buffer;
3273        struct ftrace_entry *entry;
3274        unsigned long flags;
3275        long disabled;
3276        int cpu;
3277        int pc;
3278
3279        pc = preempt_count();
3280        preempt_disable_notrace();
3281        cpu = raw_smp_processor_id();
3282        disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3283
3284        if (disabled != 1)
3285                goto out;
3286
3287        local_save_flags(flags);
3288
3289        event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
3290                                                TRACE_FN, sizeof(*entry),
3291                                                flags, pc);
3292        if (!event)
3293                goto out;
3294        entry   = ring_buffer_event_data(event);
3295        entry->ip                       = ip;
3296        entry->parent_ip                = parent_ip;
3297
3298        event_trigger_unlock_commit(&event_trace_file, buffer, event,
3299                                    entry, flags, pc);
3300 out:
3301        atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3302        preempt_enable_notrace();
3303}
3304
3305static struct ftrace_ops trace_ops __initdata  =
3306{
3307        .func = function_test_events_call,
3308        .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3309};
3310
3311static __init void event_trace_self_test_with_function(void)
3312{
3313        int ret;
3314
3315        event_trace_file.tr = top_trace_array();
3316        if (WARN_ON(!event_trace_file.tr))
3317                return;
3318
3319        ret = register_ftrace_function(&trace_ops);
3320        if (WARN_ON(ret < 0)) {
3321                pr_info("Failed to enable function tracer for event tests\n");
3322                return;
3323        }
3324        pr_info("Running tests again, along with the function tracer\n");
3325        event_trace_self_tests();
3326        unregister_ftrace_function(&trace_ops);
3327}
3328#else
3329static __init void event_trace_self_test_with_function(void)
3330{
3331}
3332#endif
3333
3334static __init int event_trace_self_tests_init(void)
3335{
3336        if (!tracing_selftest_disabled) {
3337                event_trace_self_tests();
3338                event_trace_self_test_with_function();
3339        }
3340
3341        return 0;
3342}
3343
3344late_initcall(event_trace_self_tests_init);
3345
3346#endif
3347