linux/kernel/trace/trace_events.c
<<
>>
Prefs
   1/*
   2 * event tracer
   3 *
   4 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
   5 *
   6 *  - Added format output of fields of the trace point.
   7 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
   8 *
   9 */
  10
  11#define pr_fmt(fmt) fmt
  12
  13#include <linux/workqueue.h>
  14#include <linux/spinlock.h>
  15#include <linux/kthread.h>
  16#include <linux/tracefs.h>
  17#include <linux/uaccess.h>
  18#include <linux/bsearch.h>
  19#include <linux/module.h>
  20#include <linux/ctype.h>
  21#include <linux/sort.h>
  22#include <linux/slab.h>
  23#include <linux/delay.h>
  24
  25#include <trace/events/sched.h>
  26
  27#include <asm/setup.h>
  28
  29#include "trace_output.h"
  30
  31#undef TRACE_SYSTEM
  32#define TRACE_SYSTEM "TRACE_SYSTEM"
  33
  34DEFINE_MUTEX(event_mutex);
  35
  36LIST_HEAD(ftrace_events);
  37static LIST_HEAD(ftrace_generic_fields);
  38static LIST_HEAD(ftrace_common_fields);
  39
  40#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
  41
  42static struct kmem_cache *field_cachep;
  43static struct kmem_cache *file_cachep;
  44
  45static inline int system_refcount(struct event_subsystem *system)
  46{
  47        return system->ref_count;
  48}
  49
  50static int system_refcount_inc(struct event_subsystem *system)
  51{
  52        return system->ref_count++;
  53}
  54
  55static int system_refcount_dec(struct event_subsystem *system)
  56{
  57        return --system->ref_count;
  58}
  59
  60/* Double loops, do not use break, only goto's work */
  61#define do_for_each_event_file(tr, file)                        \
  62        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  63                list_for_each_entry(file, &tr->events, list)
  64
  65#define do_for_each_event_file_safe(tr, file)                   \
  66        list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
  67                struct trace_event_file *___n;                          \
  68                list_for_each_entry_safe(file, ___n, &tr->events, list)
  69
  70#define while_for_each_event_file()             \
  71        }
  72
  73static struct list_head *
  74trace_get_fields(struct trace_event_call *event_call)
  75{
  76        if (!event_call->class->get_fields)
  77                return &event_call->class->fields;
  78        return event_call->class->get_fields(event_call);
  79}
  80
  81static struct ftrace_event_field *
  82__find_event_field(struct list_head *head, char *name)
  83{
  84        struct ftrace_event_field *field;
  85
  86        list_for_each_entry(field, head, link) {
  87                if (!strcmp(field->name, name))
  88                        return field;
  89        }
  90
  91        return NULL;
  92}
  93
  94struct ftrace_event_field *
  95trace_find_event_field(struct trace_event_call *call, char *name)
  96{
  97        struct ftrace_event_field *field;
  98        struct list_head *head;
  99
 100        head = trace_get_fields(call);
 101        field = __find_event_field(head, name);
 102        if (field)
 103                return field;
 104
 105        field = __find_event_field(&ftrace_generic_fields, name);
 106        if (field)
 107                return field;
 108
 109        return __find_event_field(&ftrace_common_fields, name);
 110}
 111
 112static int __trace_define_field(struct list_head *head, const char *type,
 113                                const char *name, int offset, int size,
 114                                int is_signed, int filter_type)
 115{
 116        struct ftrace_event_field *field;
 117
 118        field = kmem_cache_alloc(field_cachep, GFP_TRACE);
 119        if (!field)
 120                return -ENOMEM;
 121
 122        field->name = name;
 123        field->type = type;
 124
 125        if (filter_type == FILTER_OTHER)
 126                field->filter_type = filter_assign_type(type);
 127        else
 128                field->filter_type = filter_type;
 129
 130        field->offset = offset;
 131        field->size = size;
 132        field->is_signed = is_signed;
 133
 134        list_add(&field->link, head);
 135
 136        return 0;
 137}
 138
 139int trace_define_field(struct trace_event_call *call, const char *type,
 140                       const char *name, int offset, int size, int is_signed,
 141                       int filter_type)
 142{
 143        struct list_head *head;
 144
 145        if (WARN_ON(!call->class))
 146                return 0;
 147
 148        head = trace_get_fields(call);
 149        return __trace_define_field(head, type, name, offset, size,
 150                                    is_signed, filter_type);
 151}
 152EXPORT_SYMBOL_GPL(trace_define_field);
 153
 154#define __generic_field(type, item, filter_type)                        \
 155        ret = __trace_define_field(&ftrace_generic_fields, #type,       \
 156                                   #item, 0, 0, is_signed_type(type),   \
 157                                   filter_type);                        \
 158        if (ret)                                                        \
 159                return ret;
 160
 161#define __common_field(type, item)                                      \
 162        ret = __trace_define_field(&ftrace_common_fields, #type,        \
 163                                   "common_" #item,                     \
 164                                   offsetof(typeof(ent), item),         \
 165                                   sizeof(ent.item),                    \
 166                                   is_signed_type(type), FILTER_OTHER); \
 167        if (ret)                                                        \
 168                return ret;
 169
 170static int trace_define_generic_fields(void)
 171{
 172        int ret;
 173
 174        __generic_field(int, CPU, FILTER_CPU);
 175        __generic_field(int, cpu, FILTER_CPU);
 176        __generic_field(char *, COMM, FILTER_COMM);
 177        __generic_field(char *, comm, FILTER_COMM);
 178
 179        return ret;
 180}
 181
 182static int trace_define_common_fields(void)
 183{
 184        int ret;
 185        struct trace_entry ent;
 186
 187        __common_field(unsigned short, type);
 188        __common_field(unsigned char, flags);
 189        __common_field(unsigned char, preempt_count);
 190        __common_field(int, pid);
 191
 192        return ret;
 193}
 194
 195static void trace_destroy_fields(struct trace_event_call *call)
 196{
 197        struct ftrace_event_field *field, *next;
 198        struct list_head *head;
 199
 200        head = trace_get_fields(call);
 201        list_for_each_entry_safe(field, next, head, link) {
 202                list_del(&field->link);
 203                kmem_cache_free(field_cachep, field);
 204        }
 205}
 206
 207int trace_event_raw_init(struct trace_event_call *call)
 208{
 209        int id;
 210
 211        id = register_trace_event(&call->event);
 212        if (!id)
 213                return -ENODEV;
 214
 215        return 0;
 216}
 217EXPORT_SYMBOL_GPL(trace_event_raw_init);
 218
 219bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
 220{
 221        struct trace_array *tr = trace_file->tr;
 222        struct trace_array_cpu *data;
 223        struct trace_pid_list *pid_list;
 224
 225        pid_list = rcu_dereference_sched(tr->filtered_pids);
 226        if (!pid_list)
 227                return false;
 228
 229        data = this_cpu_ptr(tr->trace_buffer.data);
 230
 231        return data->ignore_pid;
 232}
 233EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
 234
 235void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 236                                 struct trace_event_file *trace_file,
 237                                 unsigned long len)
 238{
 239        struct trace_event_call *event_call = trace_file->event_call;
 240
 241        if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
 242            trace_event_ignore_this_pid(trace_file))
 243                return NULL;
 244
 245        local_save_flags(fbuffer->flags);
 246        fbuffer->pc = preempt_count();
 247        fbuffer->trace_file = trace_file;
 248
 249        fbuffer->event =
 250                trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
 251                                                event_call->event.type, len,
 252                                                fbuffer->flags, fbuffer->pc);
 253        if (!fbuffer->event)
 254                return NULL;
 255
 256        fbuffer->entry = ring_buffer_event_data(fbuffer->event);
 257        return fbuffer->entry;
 258}
 259EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
 260
 261static DEFINE_SPINLOCK(tracepoint_iter_lock);
 262
 263static void output_printk(struct trace_event_buffer *fbuffer)
 264{
 265        struct trace_event_call *event_call;
 266        struct trace_event *event;
 267        unsigned long flags;
 268        struct trace_iterator *iter = tracepoint_print_iter;
 269
 270        if (!iter)
 271                return;
 272
 273        event_call = fbuffer->trace_file->event_call;
 274        if (!event_call || !event_call->event.funcs ||
 275            !event_call->event.funcs->trace)
 276                return;
 277
 278        event = &fbuffer->trace_file->event_call->event;
 279
 280        spin_lock_irqsave(&tracepoint_iter_lock, flags);
 281        trace_seq_init(&iter->seq);
 282        iter->ent = fbuffer->entry;
 283        event_call->event.funcs->trace(iter, 0, event);
 284        trace_seq_putc(&iter->seq, 0);
 285        printk("%s", iter->seq.buffer);
 286
 287        spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
 288}
 289
 290void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
 291{
 292        if (tracepoint_printk)
 293                output_printk(fbuffer);
 294
 295        event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
 296                                    fbuffer->event, fbuffer->entry,
 297                                    fbuffer->flags, fbuffer->pc);
 298}
 299EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
 300
 301int trace_event_reg(struct trace_event_call *call,
 302                    enum trace_reg type, void *data)
 303{
 304        struct trace_event_file *file = data;
 305
 306        WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
 307        switch (type) {
 308        case TRACE_REG_REGISTER:
 309                return tracepoint_probe_register(call->tp,
 310                                                 call->class->probe,
 311                                                 file);
 312        case TRACE_REG_UNREGISTER:
 313                tracepoint_probe_unregister(call->tp,
 314                                            call->class->probe,
 315                                            file);
 316                return 0;
 317
 318#ifdef CONFIG_PERF_EVENTS
 319        case TRACE_REG_PERF_REGISTER:
 320                return tracepoint_probe_register(call->tp,
 321                                                 call->class->perf_probe,
 322                                                 call);
 323        case TRACE_REG_PERF_UNREGISTER:
 324                tracepoint_probe_unregister(call->tp,
 325                                            call->class->perf_probe,
 326                                            call);
 327                return 0;
 328        case TRACE_REG_PERF_OPEN:
 329        case TRACE_REG_PERF_CLOSE:
 330        case TRACE_REG_PERF_ADD:
 331        case TRACE_REG_PERF_DEL:
 332                return 0;
 333#endif
 334        }
 335        return 0;
 336}
 337EXPORT_SYMBOL_GPL(trace_event_reg);
 338
 339void trace_event_enable_cmd_record(bool enable)
 340{
 341        struct trace_event_file *file;
 342        struct trace_array *tr;
 343
 344        mutex_lock(&event_mutex);
 345        do_for_each_event_file(tr, file) {
 346
 347                if (!(file->flags & EVENT_FILE_FL_ENABLED))
 348                        continue;
 349
 350                if (enable) {
 351                        tracing_start_cmdline_record();
 352                        set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 353                } else {
 354                        tracing_stop_cmdline_record();
 355                        clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 356                }
 357        } while_for_each_event_file();
 358        mutex_unlock(&event_mutex);
 359}
 360
 361static int __ftrace_event_enable_disable(struct trace_event_file *file,
 362                                         int enable, int soft_disable)
 363{
 364        struct trace_event_call *call = file->event_call;
 365        struct trace_array *tr = file->tr;
 366        int ret = 0;
 367        int disable;
 368
 369        switch (enable) {
 370        case 0:
 371                /*
 372                 * When soft_disable is set and enable is cleared, the sm_ref
 373                 * reference counter is decremented. If it reaches 0, we want
 374                 * to clear the SOFT_DISABLED flag but leave the event in the
 375                 * state that it was. That is, if the event was enabled and
 376                 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
 377                 * is set we do not want the event to be enabled before we
 378                 * clear the bit.
 379                 *
 380                 * When soft_disable is not set but the SOFT_MODE flag is,
 381                 * we do nothing. Do not disable the tracepoint, otherwise
 382                 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
 383                 */
 384                if (soft_disable) {
 385                        if (atomic_dec_return(&file->sm_ref) > 0)
 386                                break;
 387                        disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
 388                        clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 389                } else
 390                        disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
 391
 392                if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
 393                        clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 394                        if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
 395                                tracing_stop_cmdline_record();
 396                                clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 397                        }
 398                        call->class->reg(call, TRACE_REG_UNREGISTER, file);
 399                }
 400                /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
 401                if (file->flags & EVENT_FILE_FL_SOFT_MODE)
 402                        set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 403                else
 404                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 405                break;
 406        case 1:
 407                /*
 408                 * When soft_disable is set and enable is set, we want to
 409                 * register the tracepoint for the event, but leave the event
 410                 * as is. That means, if the event was already enabled, we do
 411                 * nothing (but set SOFT_MODE). If the event is disabled, we
 412                 * set SOFT_DISABLED before enabling the event tracepoint, so
 413                 * it still seems to be disabled.
 414                 */
 415                if (!soft_disable)
 416                        clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 417                else {
 418                        if (atomic_inc_return(&file->sm_ref) > 1)
 419                                break;
 420                        set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
 421                }
 422
 423                if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
 424
 425                        /* Keep the event disabled, when going to SOFT_MODE. */
 426                        if (soft_disable)
 427                                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
 428
 429                        if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
 430                                tracing_start_cmdline_record();
 431                                set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
 432                        }
 433                        ret = call->class->reg(call, TRACE_REG_REGISTER, file);
 434                        if (ret) {
 435                                tracing_stop_cmdline_record();
 436                                pr_info("event trace: Could not enable event "
 437                                        "%s\n", trace_event_name(call));
 438                                break;
 439                        }
 440                        set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
 441
 442                        /* WAS_ENABLED gets set but never cleared. */
 443                        call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
 444                }
 445                break;
 446        }
 447
 448        return ret;
 449}
 450
 451int trace_event_enable_disable(struct trace_event_file *file,
 452                               int enable, int soft_disable)
 453{
 454        return __ftrace_event_enable_disable(file, enable, soft_disable);
 455}
 456
 457static int ftrace_event_enable_disable(struct trace_event_file *file,
 458                                       int enable)
 459{
 460        return __ftrace_event_enable_disable(file, enable, 0);
 461}
 462
 463static void ftrace_clear_events(struct trace_array *tr)
 464{
 465        struct trace_event_file *file;
 466
 467        mutex_lock(&event_mutex);
 468        list_for_each_entry(file, &tr->events, list) {
 469                ftrace_event_enable_disable(file, 0);
 470        }
 471        mutex_unlock(&event_mutex);
 472}
 473
 474static int cmp_pid(const void *key, const void *elt)
 475{
 476        const pid_t *search_pid = key;
 477        const pid_t *pid = elt;
 478
 479        if (*search_pid == *pid)
 480                return 0;
 481        if (*search_pid < *pid)
 482                return -1;
 483        return 1;
 484}
 485
 486static bool
 487check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
 488{
 489        pid_t search_pid;
 490        pid_t *pid;
 491
 492        /*
 493         * Return false, because if filtered_pids does not exist,
 494         * all pids are good to trace.
 495         */
 496        if (!filtered_pids)
 497                return false;
 498
 499        search_pid = task->pid;
 500
 501        pid = bsearch(&search_pid, filtered_pids->pids,
 502                      filtered_pids->nr_pids, sizeof(pid_t),
 503                      cmp_pid);
 504        if (!pid)
 505                return true;
 506
 507        return false;
 508}
 509
 510static void
 511event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
 512                    struct task_struct *prev, struct task_struct *next)
 513{
 514        struct trace_array *tr = data;
 515        struct trace_pid_list *pid_list;
 516
 517        pid_list = rcu_dereference_sched(tr->filtered_pids);
 518
 519        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 520                       check_ignore_pid(pid_list, prev) &&
 521                       check_ignore_pid(pid_list, next));
 522}
 523
 524static void
 525event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
 526                    struct task_struct *prev, struct task_struct *next)
 527{
 528        struct trace_array *tr = data;
 529        struct trace_pid_list *pid_list;
 530
 531        pid_list = rcu_dereference_sched(tr->filtered_pids);
 532
 533        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 534                       check_ignore_pid(pid_list, next));
 535}
 536
 537static void
 538event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 539{
 540        struct trace_array *tr = data;
 541        struct trace_pid_list *pid_list;
 542
 543        /* Nothing to do if we are already tracing */
 544        if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
 545                return;
 546
 547        pid_list = rcu_dereference_sched(tr->filtered_pids);
 548
 549        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 550                       check_ignore_pid(pid_list, task));
 551}
 552
 553static void
 554event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 555{
 556        struct trace_array *tr = data;
 557        struct trace_pid_list *pid_list;
 558
 559        /* Nothing to do if we are not tracing */
 560        if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
 561                return;
 562
 563        pid_list = rcu_dereference_sched(tr->filtered_pids);
 564
 565        /* Set tracing if current is enabled */
 566        this_cpu_write(tr->trace_buffer.data->ignore_pid,
 567                       check_ignore_pid(pid_list, current));
 568}
 569
 570static void __ftrace_clear_event_pids(struct trace_array *tr)
 571{
 572        struct trace_pid_list *pid_list;
 573        struct trace_event_file *file;
 574        int cpu;
 575
 576        pid_list = rcu_dereference_protected(tr->filtered_pids,
 577                                             lockdep_is_held(&event_mutex));
 578        if (!pid_list)
 579                return;
 580
 581        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
 582        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
 583
 584        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 585        unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 586
 587        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
 588        unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
 589
 590        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
 591        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
 592
 593        list_for_each_entry(file, &tr->events, list) {
 594                clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 595        }
 596
 597        for_each_possible_cpu(cpu)
 598                per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
 599
 600        rcu_assign_pointer(tr->filtered_pids, NULL);
 601
 602        /* Wait till all users are no longer using pid filtering */
 603        synchronize_sched();
 604
 605        free_pages((unsigned long)pid_list->pids, pid_list->order);
 606        kfree(pid_list);
 607}
 608
 609static void ftrace_clear_event_pids(struct trace_array *tr)
 610{
 611        mutex_lock(&event_mutex);
 612        __ftrace_clear_event_pids(tr);
 613        mutex_unlock(&event_mutex);
 614}
 615
 616static void __put_system(struct event_subsystem *system)
 617{
 618        struct event_filter *filter = system->filter;
 619
 620        WARN_ON_ONCE(system_refcount(system) == 0);
 621        if (system_refcount_dec(system))
 622                return;
 623
 624        list_del(&system->list);
 625
 626        if (filter) {
 627                kfree(filter->filter_string);
 628                kfree(filter);
 629        }
 630        kfree_const(system->name);
 631        kfree(system);
 632}
 633
 634static void __get_system(struct event_subsystem *system)
 635{
 636        WARN_ON_ONCE(system_refcount(system) == 0);
 637        system_refcount_inc(system);
 638}
 639
 640static void __get_system_dir(struct trace_subsystem_dir *dir)
 641{
 642        WARN_ON_ONCE(dir->ref_count == 0);
 643        dir->ref_count++;
 644        __get_system(dir->subsystem);
 645}
 646
 647static void __put_system_dir(struct trace_subsystem_dir *dir)
 648{
 649        WARN_ON_ONCE(dir->ref_count == 0);
 650        /* If the subsystem is about to be freed, the dir must be too */
 651        WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
 652
 653        __put_system(dir->subsystem);
 654        if (!--dir->ref_count)
 655                kfree(dir);
 656}
 657
 658static void put_system(struct trace_subsystem_dir *dir)
 659{
 660        mutex_lock(&event_mutex);
 661        __put_system_dir(dir);
 662        mutex_unlock(&event_mutex);
 663}
 664
 665static void remove_subsystem(struct trace_subsystem_dir *dir)
 666{
 667        if (!dir)
 668                return;
 669
 670        if (!--dir->nr_events) {
 671                tracefs_remove_recursive(dir->entry);
 672                list_del(&dir->list);
 673                __put_system_dir(dir);
 674        }
 675}
 676
 677static void remove_event_file_dir(struct trace_event_file *file)
 678{
 679        struct dentry *dir = file->dir;
 680        struct dentry *child;
 681
 682        if (dir) {
 683                spin_lock(&dir->d_lock);        /* probably unneeded */
 684                list_for_each_entry(child, &dir->d_subdirs, d_child) {
 685                        if (d_really_is_positive(child))        /* probably unneeded */
 686                                d_inode(child)->i_private = NULL;
 687                }
 688                spin_unlock(&dir->d_lock);
 689
 690                tracefs_remove_recursive(dir);
 691        }
 692
 693        list_del(&file->list);
 694        remove_subsystem(file->system);
 695        free_event_filter(file->filter);
 696        kmem_cache_free(file_cachep, file);
 697}
 698
 699/*
 700 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
 701 */
 702static int
 703__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
 704                              const char *sub, const char *event, int set)
 705{
 706        struct trace_event_file *file;
 707        struct trace_event_call *call;
 708        const char *name;
 709        int ret = -EINVAL;
 710
 711        list_for_each_entry(file, &tr->events, list) {
 712
 713                call = file->event_call;
 714                name = trace_event_name(call);
 715
 716                if (!name || !call->class || !call->class->reg)
 717                        continue;
 718
 719                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
 720                        continue;
 721
 722                if (match &&
 723                    strcmp(match, name) != 0 &&
 724                    strcmp(match, call->class->system) != 0)
 725                        continue;
 726
 727                if (sub && strcmp(sub, call->class->system) != 0)
 728                        continue;
 729
 730                if (event && strcmp(event, name) != 0)
 731                        continue;
 732
 733                ftrace_event_enable_disable(file, set);
 734
 735                ret = 0;
 736        }
 737
 738        return ret;
 739}
 740
 741static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 742                                  const char *sub, const char *event, int set)
 743{
 744        int ret;
 745
 746        mutex_lock(&event_mutex);
 747        ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
 748        mutex_unlock(&event_mutex);
 749
 750        return ret;
 751}
 752
 753static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 754{
 755        char *event = NULL, *sub = NULL, *match;
 756        int ret;
 757
 758        /*
 759         * The buf format can be <subsystem>:<event-name>
 760         *  *:<event-name> means any event by that name.
 761         *  :<event-name> is the same.
 762         *
 763         *  <subsystem>:* means all events in that subsystem
 764         *  <subsystem>: means the same.
 765         *
 766         *  <name> (no ':') means all events in a subsystem with
 767         *  the name <name> or any event that matches <name>
 768         */
 769
 770        match = strsep(&buf, ":");
 771        if (buf) {
 772                sub = match;
 773                event = buf;
 774                match = NULL;
 775
 776                if (!strlen(sub) || strcmp(sub, "*") == 0)
 777                        sub = NULL;
 778                if (!strlen(event) || strcmp(event, "*") == 0)
 779                        event = NULL;
 780        }
 781
 782        ret = __ftrace_set_clr_event(tr, match, sub, event, set);
 783
 784        /* Put back the colon to allow this to be called again */
 785        if (buf)
 786                *(buf - 1) = ':';
 787
 788        return ret;
 789}
 790
 791/**
 792 * trace_set_clr_event - enable or disable an event
 793 * @system: system name to match (NULL for any system)
 794 * @event: event name to match (NULL for all events, within system)
 795 * @set: 1 to enable, 0 to disable
 796 *
 797 * This is a way for other parts of the kernel to enable or disable
 798 * event recording.
 799 *
 800 * Returns 0 on success, -EINVAL if the parameters do not match any
 801 * registered events.
 802 */
 803int trace_set_clr_event(const char *system, const char *event, int set)
 804{
 805        struct trace_array *tr = top_trace_array();
 806
 807        if (!tr)
 808                return -ENODEV;
 809
 810        return __ftrace_set_clr_event(tr, NULL, system, event, set);
 811}
 812EXPORT_SYMBOL_GPL(trace_set_clr_event);
 813
 814/* 128 should be much more than enough */
 815#define EVENT_BUF_SIZE          127
 816
 817static ssize_t
 818ftrace_event_write(struct file *file, const char __user *ubuf,
 819                   size_t cnt, loff_t *ppos)
 820{
 821        struct trace_parser parser;
 822        struct seq_file *m = file->private_data;
 823        struct trace_array *tr = m->private;
 824        ssize_t read, ret;
 825
 826        if (!cnt)
 827                return 0;
 828
 829        ret = tracing_update_buffers();
 830        if (ret < 0)
 831                return ret;
 832
 833        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
 834                return -ENOMEM;
 835
 836        read = trace_get_user(&parser, ubuf, cnt, ppos);
 837
 838        if (read >= 0 && trace_parser_loaded((&parser))) {
 839                int set = 1;
 840
 841                if (*parser.buffer == '!')
 842                        set = 0;
 843
 844                parser.buffer[parser.idx] = 0;
 845
 846                ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
 847                if (ret)
 848                        goto out_put;
 849        }
 850
 851        ret = read;
 852
 853 out_put:
 854        trace_parser_put(&parser);
 855
 856        return ret;
 857}
 858
 859static void *
 860t_next(struct seq_file *m, void *v, loff_t *pos)
 861{
 862        struct trace_event_file *file = v;
 863        struct trace_event_call *call;
 864        struct trace_array *tr = m->private;
 865
 866        (*pos)++;
 867
 868        list_for_each_entry_continue(file, &tr->events, list) {
 869                call = file->event_call;
 870                /*
 871                 * The ftrace subsystem is for showing formats only.
 872                 * They can not be enabled or disabled via the event files.
 873                 */
 874                if (call->class && call->class->reg &&
 875                    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 876                        return file;
 877        }
 878
 879        return NULL;
 880}
 881
 882static void *t_start(struct seq_file *m, loff_t *pos)
 883{
 884        struct trace_event_file *file;
 885        struct trace_array *tr = m->private;
 886        loff_t l;
 887
 888        mutex_lock(&event_mutex);
 889
 890        file = list_entry(&tr->events, struct trace_event_file, list);
 891        for (l = 0; l <= *pos; ) {
 892                file = t_next(m, file, &l);
 893                if (!file)
 894                        break;
 895        }
 896        return file;
 897}
 898
 899static void *
 900s_next(struct seq_file *m, void *v, loff_t *pos)
 901{
 902        struct trace_event_file *file = v;
 903        struct trace_array *tr = m->private;
 904
 905        (*pos)++;
 906
 907        list_for_each_entry_continue(file, &tr->events, list) {
 908                if (file->flags & EVENT_FILE_FL_ENABLED)
 909                        return file;
 910        }
 911
 912        return NULL;
 913}
 914
 915static void *s_start(struct seq_file *m, loff_t *pos)
 916{
 917        struct trace_event_file *file;
 918        struct trace_array *tr = m->private;
 919        loff_t l;
 920
 921        mutex_lock(&event_mutex);
 922
 923        file = list_entry(&tr->events, struct trace_event_file, list);
 924        for (l = 0; l <= *pos; ) {
 925                file = s_next(m, file, &l);
 926                if (!file)
 927                        break;
 928        }
 929        return file;
 930}
 931
 932static int t_show(struct seq_file *m, void *v)
 933{
 934        struct trace_event_file *file = v;
 935        struct trace_event_call *call = file->event_call;
 936
 937        if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
 938                seq_printf(m, "%s:", call->class->system);
 939        seq_printf(m, "%s\n", trace_event_name(call));
 940
 941        return 0;
 942}
 943
 944static void t_stop(struct seq_file *m, void *p)
 945{
 946        mutex_unlock(&event_mutex);
 947}
 948
 949static void *p_start(struct seq_file *m, loff_t *pos)
 950        __acquires(RCU)
 951{
 952        struct trace_pid_list *pid_list;
 953        struct trace_array *tr = m->private;
 954
 955        /*
 956         * Grab the mutex, to keep calls to p_next() having the same
 957         * tr->filtered_pids as p_start() has.
 958         * If we just passed the tr->filtered_pids around, then RCU would
 959         * have been enough, but doing that makes things more complex.
 960         */
 961        mutex_lock(&event_mutex);
 962        rcu_read_lock_sched();
 963
 964        pid_list = rcu_dereference_sched(tr->filtered_pids);
 965
 966        if (!pid_list || *pos >= pid_list->nr_pids)
 967                return NULL;
 968
 969        return (void *)&pid_list->pids[*pos];
 970}
 971
 972static void p_stop(struct seq_file *m, void *p)
 973        __releases(RCU)
 974{
 975        rcu_read_unlock_sched();
 976        mutex_unlock(&event_mutex);
 977}
 978
 979static void *
 980p_next(struct seq_file *m, void *v, loff_t *pos)
 981{
 982        struct trace_array *tr = m->private;
 983        struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
 984
 985        (*pos)++;
 986
 987        if (*pos >= pid_list->nr_pids)
 988                return NULL;
 989
 990        return (void *)&pid_list->pids[*pos];
 991}
 992
 993static int p_show(struct seq_file *m, void *v)
 994{
 995        pid_t *pid = v;
 996
 997        seq_printf(m, "%d\n", *pid);
 998        return 0;
 999}
1000
1001static ssize_t
1002event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1003                  loff_t *ppos)
1004{
1005        struct trace_event_file *file;
1006        unsigned long flags;
1007        char buf[4] = "0";
1008
1009        mutex_lock(&event_mutex);
1010        file = event_file_data(filp);
1011        if (likely(file))
1012                flags = file->flags;
1013        mutex_unlock(&event_mutex);
1014
1015        if (!file)
1016                return -ENODEV;
1017
1018        if (flags & EVENT_FILE_FL_ENABLED &&
1019            !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1020                strcpy(buf, "1");
1021
1022        if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1023            flags & EVENT_FILE_FL_SOFT_MODE)
1024                strcat(buf, "*");
1025
1026        strcat(buf, "\n");
1027
1028        return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1029}
1030
1031static ssize_t
1032event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1033                   loff_t *ppos)
1034{
1035        struct trace_event_file *file;
1036        unsigned long val;
1037        int ret;
1038
1039        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1040        if (ret)
1041                return ret;
1042
1043        ret = tracing_update_buffers();
1044        if (ret < 0)
1045                return ret;
1046
1047        switch (val) {
1048        case 0:
1049        case 1:
1050                ret = -ENODEV;
1051                mutex_lock(&event_mutex);
1052                file = event_file_data(filp);
1053                if (likely(file))
1054                        ret = ftrace_event_enable_disable(file, val);
1055                mutex_unlock(&event_mutex);
1056                break;
1057
1058        default:
1059                return -EINVAL;
1060        }
1061
1062        *ppos += cnt;
1063
1064        return ret ? ret : cnt;
1065}
1066
1067static ssize_t
1068system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1069                   loff_t *ppos)
1070{
1071        const char set_to_char[4] = { '?', '0', '1', 'X' };
1072        struct trace_subsystem_dir *dir = filp->private_data;
1073        struct event_subsystem *system = dir->subsystem;
1074        struct trace_event_call *call;
1075        struct trace_event_file *file;
1076        struct trace_array *tr = dir->tr;
1077        char buf[2];
1078        int set = 0;
1079        int ret;
1080
1081        mutex_lock(&event_mutex);
1082        list_for_each_entry(file, &tr->events, list) {
1083                call = file->event_call;
1084                if (!trace_event_name(call) || !call->class || !call->class->reg)
1085                        continue;
1086
1087                if (system && strcmp(call->class->system, system->name) != 0)
1088                        continue;
1089
1090                /*
1091                 * We need to find out if all the events are set
1092                 * or if all events or cleared, or if we have
1093                 * a mixture.
1094                 */
1095                set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1096
1097                /*
1098                 * If we have a mixture, no need to look further.
1099                 */
1100                if (set == 3)
1101                        break;
1102        }
1103        mutex_unlock(&event_mutex);
1104
1105        buf[0] = set_to_char[set];
1106        buf[1] = '\n';
1107
1108        ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1109
1110        return ret;
1111}
1112
1113static ssize_t
1114system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1115                    loff_t *ppos)
1116{
1117        struct trace_subsystem_dir *dir = filp->private_data;
1118        struct event_subsystem *system = dir->subsystem;
1119        const char *name = NULL;
1120        unsigned long val;
1121        ssize_t ret;
1122
1123        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1124        if (ret)
1125                return ret;
1126
1127        ret = tracing_update_buffers();
1128        if (ret < 0)
1129                return ret;
1130
1131        if (val != 0 && val != 1)
1132                return -EINVAL;
1133
1134        /*
1135         * Opening of "enable" adds a ref count to system,
1136         * so the name is safe to use.
1137         */
1138        if (system)
1139                name = system->name;
1140
1141        ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1142        if (ret)
1143                goto out;
1144
1145        ret = cnt;
1146
1147out:
1148        *ppos += cnt;
1149
1150        return ret;
1151}
1152
1153enum {
1154        FORMAT_HEADER           = 1,
1155        FORMAT_FIELD_SEPERATOR  = 2,
1156        FORMAT_PRINTFMT         = 3,
1157};
1158
1159static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1160{
1161        struct trace_event_call *call = event_file_data(m->private);
1162        struct list_head *common_head = &ftrace_common_fields;
1163        struct list_head *head = trace_get_fields(call);
1164        struct list_head *node = v;
1165
1166        (*pos)++;
1167
1168        switch ((unsigned long)v) {
1169        case FORMAT_HEADER:
1170                node = common_head;
1171                break;
1172
1173        case FORMAT_FIELD_SEPERATOR:
1174                node = head;
1175                break;
1176
1177        case FORMAT_PRINTFMT:
1178                /* all done */
1179                return NULL;
1180        }
1181
1182        node = node->prev;
1183        if (node == common_head)
1184                return (void *)FORMAT_FIELD_SEPERATOR;
1185        else if (node == head)
1186                return (void *)FORMAT_PRINTFMT;
1187        else
1188                return node;
1189}
1190
1191static int f_show(struct seq_file *m, void *v)
1192{
1193        struct trace_event_call *call = event_file_data(m->private);
1194        struct ftrace_event_field *field;
1195        const char *array_descriptor;
1196
1197        switch ((unsigned long)v) {
1198        case FORMAT_HEADER:
1199                seq_printf(m, "name: %s\n", trace_event_name(call));
1200                seq_printf(m, "ID: %d\n", call->event.type);
1201                seq_puts(m, "format:\n");
1202                return 0;
1203
1204        case FORMAT_FIELD_SEPERATOR:
1205                seq_putc(m, '\n');
1206                return 0;
1207
1208        case FORMAT_PRINTFMT:
1209                seq_printf(m, "\nprint fmt: %s\n",
1210                           call->print_fmt);
1211                return 0;
1212        }
1213
1214        field = list_entry(v, struct ftrace_event_field, link);
1215        /*
1216         * Smartly shows the array type(except dynamic array).
1217         * Normal:
1218         *      field:TYPE VAR
1219         * If TYPE := TYPE[LEN], it is shown:
1220         *      field:TYPE VAR[LEN]
1221         */
1222        array_descriptor = strchr(field->type, '[');
1223
1224        if (!strncmp(field->type, "__data_loc", 10))
1225                array_descriptor = NULL;
1226
1227        if (!array_descriptor)
1228                seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1229                           field->type, field->name, field->offset,
1230                           field->size, !!field->is_signed);
1231        else
1232                seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1233                           (int)(array_descriptor - field->type),
1234                           field->type, field->name,
1235                           array_descriptor, field->offset,
1236                           field->size, !!field->is_signed);
1237
1238        return 0;
1239}
1240
1241static void *f_start(struct seq_file *m, loff_t *pos)
1242{
1243        void *p = (void *)FORMAT_HEADER;
1244        loff_t l = 0;
1245
1246        /* ->stop() is called even if ->start() fails */
1247        mutex_lock(&event_mutex);
1248        if (!event_file_data(m->private))
1249                return ERR_PTR(-ENODEV);
1250
1251        while (l < *pos && p)
1252                p = f_next(m, p, &l);
1253
1254        return p;
1255}
1256
1257static void f_stop(struct seq_file *m, void *p)
1258{
1259        mutex_unlock(&event_mutex);
1260}
1261
1262static const struct seq_operations trace_format_seq_ops = {
1263        .start          = f_start,
1264        .next           = f_next,
1265        .stop           = f_stop,
1266        .show           = f_show,
1267};
1268
1269static int trace_format_open(struct inode *inode, struct file *file)
1270{
1271        struct seq_file *m;
1272        int ret;
1273
1274        ret = seq_open(file, &trace_format_seq_ops);
1275        if (ret < 0)
1276                return ret;
1277
1278        m = file->private_data;
1279        m->private = file;
1280
1281        return 0;
1282}
1283
1284static ssize_t
1285event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1286{
1287        int id = (long)event_file_data(filp);
1288        char buf[32];
1289        int len;
1290
1291        if (*ppos)
1292                return 0;
1293
1294        if (unlikely(!id))
1295                return -ENODEV;
1296
1297        len = sprintf(buf, "%d\n", id);
1298
1299        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1300}
1301
1302static ssize_t
1303event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1304                  loff_t *ppos)
1305{
1306        struct trace_event_file *file;
1307        struct trace_seq *s;
1308        int r = -ENODEV;
1309
1310        if (*ppos)
1311                return 0;
1312
1313        s = kmalloc(sizeof(*s), GFP_KERNEL);
1314
1315        if (!s)
1316                return -ENOMEM;
1317
1318        trace_seq_init(s);
1319
1320        mutex_lock(&event_mutex);
1321        file = event_file_data(filp);
1322        if (file)
1323                print_event_filter(file, s);
1324        mutex_unlock(&event_mutex);
1325
1326        if (file)
1327                r = simple_read_from_buffer(ubuf, cnt, ppos,
1328                                            s->buffer, trace_seq_used(s));
1329
1330        kfree(s);
1331
1332        return r;
1333}
1334
1335static ssize_t
1336event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1337                   loff_t *ppos)
1338{
1339        struct trace_event_file *file;
1340        char *buf;
1341        int err = -ENODEV;
1342
1343        if (cnt >= PAGE_SIZE)
1344                return -EINVAL;
1345
1346        buf = memdup_user_nul(ubuf, cnt);
1347        if (IS_ERR(buf))
1348                return PTR_ERR(buf);
1349
1350        mutex_lock(&event_mutex);
1351        file = event_file_data(filp);
1352        if (file)
1353                err = apply_event_filter(file, buf);
1354        mutex_unlock(&event_mutex);
1355
1356        kfree(buf);
1357        if (err < 0)
1358                return err;
1359
1360        *ppos += cnt;
1361
1362        return cnt;
1363}
1364
1365static LIST_HEAD(event_subsystems);
1366
1367static int subsystem_open(struct inode *inode, struct file *filp)
1368{
1369        struct event_subsystem *system = NULL;
1370        struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1371        struct trace_array *tr;
1372        int ret;
1373
1374        if (tracing_is_disabled())
1375                return -ENODEV;
1376
1377        /* Make sure the system still exists */
1378        mutex_lock(&trace_types_lock);
1379        mutex_lock(&event_mutex);
1380        list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1381                list_for_each_entry(dir, &tr->systems, list) {
1382                        if (dir == inode->i_private) {
1383                                /* Don't open systems with no events */
1384                                if (dir->nr_events) {
1385                                        __get_system_dir(dir);
1386                                        system = dir->subsystem;
1387                                }
1388                                goto exit_loop;
1389                        }
1390                }
1391        }
1392 exit_loop:
1393        mutex_unlock(&event_mutex);
1394        mutex_unlock(&trace_types_lock);
1395
1396        if (!system)
1397                return -ENODEV;
1398
1399        /* Some versions of gcc think dir can be uninitialized here */
1400        WARN_ON(!dir);
1401
1402        /* Still need to increment the ref count of the system */
1403        if (trace_array_get(tr) < 0) {
1404                put_system(dir);
1405                return -ENODEV;
1406        }
1407
1408        ret = tracing_open_generic(inode, filp);
1409        if (ret < 0) {
1410                trace_array_put(tr);
1411                put_system(dir);
1412        }
1413
1414        return ret;
1415}
1416
1417static int system_tr_open(struct inode *inode, struct file *filp)
1418{
1419        struct trace_subsystem_dir *dir;
1420        struct trace_array *tr = inode->i_private;
1421        int ret;
1422
1423        if (tracing_is_disabled())
1424                return -ENODEV;
1425
1426        if (trace_array_get(tr) < 0)
1427                return -ENODEV;
1428
1429        /* Make a temporary dir that has no system but points to tr */
1430        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1431        if (!dir) {
1432                trace_array_put(tr);
1433                return -ENOMEM;
1434        }
1435
1436        dir->tr = tr;
1437
1438        ret = tracing_open_generic(inode, filp);
1439        if (ret < 0) {
1440                trace_array_put(tr);
1441                kfree(dir);
1442                return ret;
1443        }
1444
1445        filp->private_data = dir;
1446
1447        return 0;
1448}
1449
1450static int subsystem_release(struct inode *inode, struct file *file)
1451{
1452        struct trace_subsystem_dir *dir = file->private_data;
1453
1454        trace_array_put(dir->tr);
1455
1456        /*
1457         * If dir->subsystem is NULL, then this is a temporary
1458         * descriptor that was made for a trace_array to enable
1459         * all subsystems.
1460         */
1461        if (dir->subsystem)
1462                put_system(dir);
1463        else
1464                kfree(dir);
1465
1466        return 0;
1467}
1468
1469static ssize_t
1470subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1471                      loff_t *ppos)
1472{
1473        struct trace_subsystem_dir *dir = filp->private_data;
1474        struct event_subsystem *system = dir->subsystem;
1475        struct trace_seq *s;
1476        int r;
1477
1478        if (*ppos)
1479                return 0;
1480
1481        s = kmalloc(sizeof(*s), GFP_KERNEL);
1482        if (!s)
1483                return -ENOMEM;
1484
1485        trace_seq_init(s);
1486
1487        print_subsystem_event_filter(system, s);
1488        r = simple_read_from_buffer(ubuf, cnt, ppos,
1489                                    s->buffer, trace_seq_used(s));
1490
1491        kfree(s);
1492
1493        return r;
1494}
1495
1496static ssize_t
1497subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1498                       loff_t *ppos)
1499{
1500        struct trace_subsystem_dir *dir = filp->private_data;
1501        char *buf;
1502        int err;
1503
1504        if (cnt >= PAGE_SIZE)
1505                return -EINVAL;
1506
1507        buf = memdup_user_nul(ubuf, cnt);
1508        if (IS_ERR(buf))
1509                return PTR_ERR(buf);
1510
1511        err = apply_subsystem_event_filter(dir, buf);
1512        kfree(buf);
1513        if (err < 0)
1514                return err;
1515
1516        *ppos += cnt;
1517
1518        return cnt;
1519}
1520
1521static ssize_t
1522show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1523{
1524        int (*func)(struct trace_seq *s) = filp->private_data;
1525        struct trace_seq *s;
1526        int r;
1527
1528        if (*ppos)
1529                return 0;
1530
1531        s = kmalloc(sizeof(*s), GFP_KERNEL);
1532        if (!s)
1533                return -ENOMEM;
1534
1535        trace_seq_init(s);
1536
1537        func(s);
1538        r = simple_read_from_buffer(ubuf, cnt, ppos,
1539                                    s->buffer, trace_seq_used(s));
1540
1541        kfree(s);
1542
1543        return r;
1544}
1545
1546static int max_pids(struct trace_pid_list *pid_list)
1547{
1548        return (PAGE_SIZE << pid_list->order) / sizeof(pid_t);
1549}
1550
1551static void ignore_task_cpu(void *data)
1552{
1553        struct trace_array *tr = data;
1554        struct trace_pid_list *pid_list;
1555
1556        /*
1557         * This function is called by on_each_cpu() while the
1558         * event_mutex is held.
1559         */
1560        pid_list = rcu_dereference_protected(tr->filtered_pids,
1561                                             mutex_is_locked(&event_mutex));
1562
1563        this_cpu_write(tr->trace_buffer.data->ignore_pid,
1564                       check_ignore_pid(pid_list, current));
1565}
1566
1567static ssize_t
1568ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1569                       size_t cnt, loff_t *ppos)
1570{
1571        struct seq_file *m = filp->private_data;
1572        struct trace_array *tr = m->private;
1573        struct trace_pid_list *filtered_pids = NULL;
1574        struct trace_pid_list *pid_list = NULL;
1575        struct trace_event_file *file;
1576        struct trace_parser parser;
1577        unsigned long val;
1578        loff_t this_pos;
1579        ssize_t read = 0;
1580        ssize_t ret = 0;
1581        pid_t pid;
1582        int i;
1583
1584        if (!cnt)
1585                return 0;
1586
1587        ret = tracing_update_buffers();
1588        if (ret < 0)
1589                return ret;
1590
1591        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1592                return -ENOMEM;
1593
1594        mutex_lock(&event_mutex);
1595        /*
1596         * Load as many pids into the array before doing a
1597         * swap from the tr->filtered_pids to the new list.
1598         */
1599        while (cnt > 0) {
1600
1601                this_pos = 0;
1602
1603                ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
1604                if (ret < 0 || !trace_parser_loaded(&parser))
1605                        break;
1606
1607                read += ret;
1608                ubuf += ret;
1609                cnt -= ret;
1610
1611                parser.buffer[parser.idx] = 0;
1612
1613                ret = -EINVAL;
1614                if (kstrtoul(parser.buffer, 0, &val))
1615                        break;
1616                if (val > INT_MAX)
1617                        break;
1618
1619                pid = (pid_t)val;
1620
1621                ret = -ENOMEM;
1622                if (!pid_list) {
1623                        pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
1624                        if (!pid_list)
1625                                break;
1626
1627                        filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1628                                                        lockdep_is_held(&event_mutex));
1629                        if (filtered_pids)
1630                                pid_list->order = filtered_pids->order;
1631                        else
1632                                pid_list->order = 0;
1633
1634                        pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
1635                                                                  pid_list->order);
1636                        if (!pid_list->pids)
1637                                break;
1638
1639                        if (filtered_pids) {
1640                                pid_list->nr_pids = filtered_pids->nr_pids;
1641                                memcpy(pid_list->pids, filtered_pids->pids,
1642                                       pid_list->nr_pids * sizeof(pid_t));
1643                        } else
1644                                pid_list->nr_pids = 0;
1645                }
1646
1647                if (pid_list->nr_pids >= max_pids(pid_list)) {
1648                        pid_t *pid_page;
1649
1650                        pid_page = (void *)__get_free_pages(GFP_KERNEL,
1651                                                            pid_list->order + 1);
1652                        if (!pid_page)
1653                                break;
1654                        memcpy(pid_page, pid_list->pids,
1655                               pid_list->nr_pids * sizeof(pid_t));
1656                        free_pages((unsigned long)pid_list->pids, pid_list->order);
1657
1658                        pid_list->order++;
1659                        pid_list->pids = pid_page;
1660                }
1661
1662                pid_list->pids[pid_list->nr_pids++] = pid;
1663                trace_parser_clear(&parser);
1664                ret = 0;
1665        }
1666        trace_parser_put(&parser);
1667
1668        if (ret < 0) {
1669                if (pid_list)
1670                        free_pages((unsigned long)pid_list->pids, pid_list->order);
1671                kfree(pid_list);
1672                mutex_unlock(&event_mutex);
1673                return ret;
1674        }
1675
1676        if (!pid_list) {
1677                mutex_unlock(&event_mutex);
1678                return ret;
1679        }
1680
1681        sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL);
1682
1683        /* Remove duplicates */
1684        for (i = 1; i < pid_list->nr_pids; i++) {
1685                int start = i;
1686
1687                while (i < pid_list->nr_pids &&
1688                       pid_list->pids[i - 1] == pid_list->pids[i])
1689                        i++;
1690
1691                if (start != i) {
1692                        if (i < pid_list->nr_pids) {
1693                                memmove(&pid_list->pids[start], &pid_list->pids[i],
1694                                        (pid_list->nr_pids - i) * sizeof(pid_t));
1695                                pid_list->nr_pids -= i - start;
1696                                i = start;
1697                        } else
1698                                pid_list->nr_pids = start;
1699                }
1700        }
1701
1702        rcu_assign_pointer(tr->filtered_pids, pid_list);
1703
1704        list_for_each_entry(file, &tr->events, list) {
1705                set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1706        }
1707
1708        if (filtered_pids) {
1709                synchronize_sched();
1710
1711                free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
1712                kfree(filtered_pids);
1713        } else {
1714                /*
1715                 * Register a probe that is called before all other probes
1716                 * to set ignore_pid if next or prev do not match.
1717                 * Register a probe this is called after all other probes
1718                 * to only keep ignore_pid set if next pid matches.
1719                 */
1720                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1721                                                 tr, INT_MAX);
1722                register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1723                                                 tr, 0);
1724
1725                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1726                                                 tr, INT_MAX);
1727                register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1728                                                 tr, 0);
1729
1730                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1731                                                     tr, INT_MAX);
1732                register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1733                                                     tr, 0);
1734
1735                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1736                                                 tr, INT_MAX);
1737                register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1738                                                 tr, 0);
1739        }
1740
1741        /*
1742         * Ignoring of pids is done at task switch. But we have to
1743         * check for those tasks that are currently running.
1744         * Always do this in case a pid was appended or removed.
1745         */
1746        on_each_cpu(ignore_task_cpu, tr, 1);
1747
1748        mutex_unlock(&event_mutex);
1749
1750        ret = read;
1751        *ppos += read;
1752
1753        return ret;
1754}
1755
1756static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1757static int ftrace_event_set_open(struct inode *inode, struct file *file);
1758static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1759static int ftrace_event_release(struct inode *inode, struct file *file);
1760
1761static const struct seq_operations show_event_seq_ops = {
1762        .start = t_start,
1763        .next = t_next,
1764        .show = t_show,
1765        .stop = t_stop,
1766};
1767
1768static const struct seq_operations show_set_event_seq_ops = {
1769        .start = s_start,
1770        .next = s_next,
1771        .show = t_show,
1772        .stop = t_stop,
1773};
1774
1775static const struct seq_operations show_set_pid_seq_ops = {
1776        .start = p_start,
1777        .next = p_next,
1778        .show = p_show,
1779        .stop = p_stop,
1780};
1781
1782static const struct file_operations ftrace_avail_fops = {
1783        .open = ftrace_event_avail_open,
1784        .read = seq_read,
1785        .llseek = seq_lseek,
1786        .release = seq_release,
1787};
1788
1789static const struct file_operations ftrace_set_event_fops = {
1790        .open = ftrace_event_set_open,
1791        .read = seq_read,
1792        .write = ftrace_event_write,
1793        .llseek = seq_lseek,
1794        .release = ftrace_event_release,
1795};
1796
1797static const struct file_operations ftrace_set_event_pid_fops = {
1798        .open = ftrace_event_set_pid_open,
1799        .read = seq_read,
1800        .write = ftrace_event_pid_write,
1801        .llseek = seq_lseek,
1802        .release = ftrace_event_release,
1803};
1804
1805static const struct file_operations ftrace_enable_fops = {
1806        .open = tracing_open_generic,
1807        .read = event_enable_read,
1808        .write = event_enable_write,
1809        .llseek = default_llseek,
1810};
1811
1812static const struct file_operations ftrace_event_format_fops = {
1813        .open = trace_format_open,
1814        .read = seq_read,
1815        .llseek = seq_lseek,
1816        .release = seq_release,
1817};
1818
1819static const struct file_operations ftrace_event_id_fops = {
1820        .read = event_id_read,
1821        .llseek = default_llseek,
1822};
1823
1824static const struct file_operations ftrace_event_filter_fops = {
1825        .open = tracing_open_generic,
1826        .read = event_filter_read,
1827        .write = event_filter_write,
1828        .llseek = default_llseek,
1829};
1830
1831static const struct file_operations ftrace_subsystem_filter_fops = {
1832        .open = subsystem_open,
1833        .read = subsystem_filter_read,
1834        .write = subsystem_filter_write,
1835        .llseek = default_llseek,
1836        .release = subsystem_release,
1837};
1838
1839static const struct file_operations ftrace_system_enable_fops = {
1840        .open = subsystem_open,
1841        .read = system_enable_read,
1842        .write = system_enable_write,
1843        .llseek = default_llseek,
1844        .release = subsystem_release,
1845};
1846
1847static const struct file_operations ftrace_tr_enable_fops = {
1848        .open = system_tr_open,
1849        .read = system_enable_read,
1850        .write = system_enable_write,
1851        .llseek = default_llseek,
1852        .release = subsystem_release,
1853};
1854
1855static const struct file_operations ftrace_show_header_fops = {
1856        .open = tracing_open_generic,
1857        .read = show_header,
1858        .llseek = default_llseek,
1859};
1860
1861static int
1862ftrace_event_open(struct inode *inode, struct file *file,
1863                  const struct seq_operations *seq_ops)
1864{
1865        struct seq_file *m;
1866        int ret;
1867
1868        ret = seq_open(file, seq_ops);
1869        if (ret < 0)
1870                return ret;
1871        m = file->private_data;
1872        /* copy tr over to seq ops */
1873        m->private = inode->i_private;
1874
1875        return ret;
1876}
1877
1878static int ftrace_event_release(struct inode *inode, struct file *file)
1879{
1880        struct trace_array *tr = inode->i_private;
1881
1882        trace_array_put(tr);
1883
1884        return seq_release(inode, file);
1885}
1886
1887static int
1888ftrace_event_avail_open(struct inode *inode, struct file *file)
1889{
1890        const struct seq_operations *seq_ops = &show_event_seq_ops;
1891
1892        return ftrace_event_open(inode, file, seq_ops);
1893}
1894
1895static int
1896ftrace_event_set_open(struct inode *inode, struct file *file)
1897{
1898        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1899        struct trace_array *tr = inode->i_private;
1900        int ret;
1901
1902        if (trace_array_get(tr) < 0)
1903                return -ENODEV;
1904
1905        if ((file->f_mode & FMODE_WRITE) &&
1906            (file->f_flags & O_TRUNC))
1907                ftrace_clear_events(tr);
1908
1909        ret = ftrace_event_open(inode, file, seq_ops);
1910        if (ret < 0)
1911                trace_array_put(tr);
1912        return ret;
1913}
1914
1915static int
1916ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1917{
1918        const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1919        struct trace_array *tr = inode->i_private;
1920        int ret;
1921
1922        if (trace_array_get(tr) < 0)
1923                return -ENODEV;
1924
1925        if ((file->f_mode & FMODE_WRITE) &&
1926            (file->f_flags & O_TRUNC))
1927                ftrace_clear_event_pids(tr);
1928
1929        ret = ftrace_event_open(inode, file, seq_ops);
1930        if (ret < 0)
1931                trace_array_put(tr);
1932        return ret;
1933}
1934
1935static struct event_subsystem *
1936create_new_subsystem(const char *name)
1937{
1938        struct event_subsystem *system;
1939
1940        /* need to create new entry */
1941        system = kmalloc(sizeof(*system), GFP_KERNEL);
1942        if (!system)
1943                return NULL;
1944
1945        system->ref_count = 1;
1946
1947        /* Only allocate if dynamic (kprobes and modules) */
1948        system->name = kstrdup_const(name, GFP_KERNEL);
1949        if (!system->name)
1950                goto out_free;
1951
1952        system->filter = NULL;
1953
1954        system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
1955        if (!system->filter)
1956                goto out_free;
1957
1958        list_add(&system->list, &event_subsystems);
1959
1960        return system;
1961
1962 out_free:
1963        kfree_const(system->name);
1964        kfree(system);
1965        return NULL;
1966}
1967
1968static struct dentry *
1969event_subsystem_dir(struct trace_array *tr, const char *name,
1970                    struct trace_event_file *file, struct dentry *parent)
1971{
1972        struct trace_subsystem_dir *dir;
1973        struct event_subsystem *system;
1974        struct dentry *entry;
1975
1976        /* First see if we did not already create this dir */
1977        list_for_each_entry(dir, &tr->systems, list) {
1978                system = dir->subsystem;
1979                if (strcmp(system->name, name) == 0) {
1980                        dir->nr_events++;
1981                        file->system = dir;
1982                        return dir->entry;
1983                }
1984        }
1985
1986        /* Now see if the system itself exists. */
1987        list_for_each_entry(system, &event_subsystems, list) {
1988                if (strcmp(system->name, name) == 0)
1989                        break;
1990        }
1991        /* Reset system variable when not found */
1992        if (&system->list == &event_subsystems)
1993                system = NULL;
1994
1995        dir = kmalloc(sizeof(*dir), GFP_KERNEL);
1996        if (!dir)
1997                goto out_fail;
1998
1999        if (!system) {
2000                system = create_new_subsystem(name);
2001                if (!system)
2002                        goto out_free;
2003        } else
2004                __get_system(system);
2005
2006        dir->entry = tracefs_create_dir(name, parent);
2007        if (!dir->entry) {
2008                pr_warn("Failed to create system directory %s\n", name);
2009                __put_system(system);
2010                goto out_free;
2011        }
2012
2013        dir->tr = tr;
2014        dir->ref_count = 1;
2015        dir->nr_events = 1;
2016        dir->subsystem = system;
2017        file->system = dir;
2018
2019        entry = tracefs_create_file("filter", 0644, dir->entry, dir,
2020                                    &ftrace_subsystem_filter_fops);
2021        if (!entry) {
2022                kfree(system->filter);
2023                system->filter = NULL;
2024                pr_warn("Could not create tracefs '%s/filter' entry\n", name);
2025        }
2026
2027        trace_create_file("enable", 0644, dir->entry, dir,
2028                          &ftrace_system_enable_fops);
2029
2030        list_add(&dir->list, &tr->systems);
2031
2032        return dir->entry;
2033
2034 out_free:
2035        kfree(dir);
2036 out_fail:
2037        /* Only print this message if failed on memory allocation */
2038        if (!dir || !system)
2039                pr_warn("No memory to create event subsystem %s\n", name);
2040        return NULL;
2041}
2042
2043static int
2044event_create_dir(struct dentry *parent, struct trace_event_file *file)
2045{
2046        struct trace_event_call *call = file->event_call;
2047        struct trace_array *tr = file->tr;
2048        struct list_head *head;
2049        struct dentry *d_events;
2050        const char *name;
2051        int ret;
2052
2053        /*
2054         * If the trace point header did not define TRACE_SYSTEM
2055         * then the system would be called "TRACE_SYSTEM".
2056         */
2057        if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
2058                d_events = event_subsystem_dir(tr, call->class->system, file, parent);
2059                if (!d_events)
2060                        return -ENOMEM;
2061        } else
2062                d_events = parent;
2063
2064        name = trace_event_name(call);
2065        file->dir = tracefs_create_dir(name, d_events);
2066        if (!file->dir) {
2067                pr_warn("Could not create tracefs '%s' directory\n", name);
2068                return -1;
2069        }
2070
2071        if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2072                trace_create_file("enable", 0644, file->dir, file,
2073                                  &ftrace_enable_fops);
2074
2075#ifdef CONFIG_PERF_EVENTS
2076        if (call->event.type && call->class->reg)
2077                trace_create_file("id", 0444, file->dir,
2078                                  (void *)(long)call->event.type,
2079                                  &ftrace_event_id_fops);
2080#endif
2081
2082        /*
2083         * Other events may have the same class. Only update
2084         * the fields if they are not already defined.
2085         */
2086        head = trace_get_fields(call);
2087        if (list_empty(head)) {
2088                ret = call->class->define_fields(call);
2089                if (ret < 0) {
2090                        pr_warn("Could not initialize trace point events/%s\n",
2091                                name);
2092                        return -1;
2093                }
2094        }
2095        trace_create_file("filter", 0644, file->dir, file,
2096                          &ftrace_event_filter_fops);
2097
2098        /*
2099         * Only event directories that can be enabled should have
2100         * triggers.
2101         */
2102        if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2103                trace_create_file("trigger", 0644, file->dir, file,
2104                                  &event_trigger_fops);
2105
2106        trace_create_file("format", 0444, file->dir, call,
2107                          &ftrace_event_format_fops);
2108
2109        return 0;
2110}
2111
2112static void remove_event_from_tracers(struct trace_event_call *call)
2113{
2114        struct trace_event_file *file;
2115        struct trace_array *tr;
2116
2117        do_for_each_event_file_safe(tr, file) {
2118                if (file->event_call != call)
2119                        continue;
2120
2121                remove_event_file_dir(file);
2122                /*
2123                 * The do_for_each_event_file_safe() is
2124                 * a double loop. After finding the call for this
2125                 * trace_array, we use break to jump to the next
2126                 * trace_array.
2127                 */
2128                break;
2129        } while_for_each_event_file();
2130}
2131
2132static void event_remove(struct trace_event_call *call)
2133{
2134        struct trace_array *tr;
2135        struct trace_event_file *file;
2136
2137        do_for_each_event_file(tr, file) {
2138                if (file->event_call != call)
2139                        continue;
2140                ftrace_event_enable_disable(file, 0);
2141                /*
2142                 * The do_for_each_event_file() is
2143                 * a double loop. After finding the call for this
2144                 * trace_array, we use break to jump to the next
2145                 * trace_array.
2146                 */
2147                break;
2148        } while_for_each_event_file();
2149
2150        if (call->event.funcs)
2151                __unregister_trace_event(&call->event);
2152        remove_event_from_tracers(call);
2153        list_del(&call->list);
2154}
2155
2156static int event_init(struct trace_event_call *call)
2157{
2158        int ret = 0;
2159        const char *name;
2160
2161        name = trace_event_name(call);
2162        if (WARN_ON(!name))
2163                return -EINVAL;
2164
2165        if (call->class->raw_init) {
2166                ret = call->class->raw_init(call);
2167                if (ret < 0 && ret != -ENOSYS)
2168                        pr_warn("Could not initialize trace events/%s\n", name);
2169        }
2170
2171        return ret;
2172}
2173
2174static int
2175__register_event(struct trace_event_call *call, struct module *mod)
2176{
2177        int ret;
2178
2179        ret = event_init(call);
2180        if (ret < 0)
2181                return ret;
2182
2183        list_add(&call->list, &ftrace_events);
2184        call->mod = mod;
2185
2186        return 0;
2187}
2188
2189static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
2190{
2191        int rlen;
2192        int elen;
2193
2194        /* Find the length of the enum value as a string */
2195        elen = snprintf(ptr, 0, "%ld", map->enum_value);
2196        /* Make sure there's enough room to replace the string with the value */
2197        if (len < elen)
2198                return NULL;
2199
2200        snprintf(ptr, elen + 1, "%ld", map->enum_value);
2201
2202        /* Get the rest of the string of ptr */
2203        rlen = strlen(ptr + len);
2204        memmove(ptr + elen, ptr + len, rlen);
2205        /* Make sure we end the new string */
2206        ptr[elen + rlen] = 0;
2207
2208        return ptr + elen;
2209}
2210
2211static void update_event_printk(struct trace_event_call *call,
2212                                struct trace_enum_map *map)
2213{
2214        char *ptr;
2215        int quote = 0;
2216        int len = strlen(map->enum_string);
2217
2218        for (ptr = call->print_fmt; *ptr; ptr++) {
2219                if (*ptr == '\\') {
2220                        ptr++;
2221                        /* paranoid */
2222                        if (!*ptr)
2223                                break;
2224                        continue;
2225                }
2226                if (*ptr == '"') {
2227                        quote ^= 1;
2228                        continue;
2229                }
2230                if (quote)
2231                        continue;
2232                if (isdigit(*ptr)) {
2233                        /* skip numbers */
2234                        do {
2235                                ptr++;
2236                                /* Check for alpha chars like ULL */
2237                        } while (isalnum(*ptr));
2238                        if (!*ptr)
2239                                break;
2240                        /*
2241                         * A number must have some kind of delimiter after
2242                         * it, and we can ignore that too.
2243                         */
2244                        continue;
2245                }
2246                if (isalpha(*ptr) || *ptr == '_') {
2247                        if (strncmp(map->enum_string, ptr, len) == 0 &&
2248                            !isalnum(ptr[len]) && ptr[len] != '_') {
2249                                ptr = enum_replace(ptr, map, len);
2250                                /* Hmm, enum string smaller than value */
2251                                if (WARN_ON_ONCE(!ptr))
2252                                        return;
2253                                /*
2254                                 * No need to decrement here, as enum_replace()
2255                                 * returns the pointer to the character passed
2256                                 * the enum, and two enums can not be placed
2257                                 * back to back without something in between.
2258                                 * We can skip that something in between.
2259                                 */
2260                                continue;
2261                        }
2262                skip_more:
2263                        do {
2264                                ptr++;
2265                        } while (isalnum(*ptr) || *ptr == '_');
2266                        if (!*ptr)
2267                                break;
2268                        /*
2269                         * If what comes after this variable is a '.' or
2270                         * '->' then we can continue to ignore that string.
2271                         */
2272                        if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2273                                ptr += *ptr == '.' ? 1 : 2;
2274                                if (!*ptr)
2275                                        break;
2276                                goto skip_more;
2277                        }
2278                        /*
2279                         * Once again, we can skip the delimiter that came
2280                         * after the string.
2281                         */
2282                        continue;
2283                }
2284        }
2285}
2286
2287void trace_event_enum_update(struct trace_enum_map **map, int len)
2288{
2289        struct trace_event_call *call, *p;
2290        const char *last_system = NULL;
2291        int last_i;
2292        int i;
2293
2294        down_write(&trace_event_sem);
2295        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2296                /* events are usually grouped together with systems */
2297                if (!last_system || call->class->system != last_system) {
2298                        last_i = 0;
2299                        last_system = call->class->system;
2300                }
2301
2302                for (i = last_i; i < len; i++) {
2303                        if (call->class->system == map[i]->system) {
2304                                /* Save the first system if need be */
2305                                if (!last_i)
2306                                        last_i = i;
2307                                update_event_printk(call, map[i]);
2308                        }
2309                }
2310        }
2311        up_write(&trace_event_sem);
2312}
2313
2314static struct trace_event_file *
2315trace_create_new_event(struct trace_event_call *call,
2316                       struct trace_array *tr)
2317{
2318        struct trace_event_file *file;
2319
2320        file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2321        if (!file)
2322                return NULL;
2323
2324        file->event_call = call;
2325        file->tr = tr;
2326        atomic_set(&file->sm_ref, 0);
2327        atomic_set(&file->tm_ref, 0);
2328        INIT_LIST_HEAD(&file->triggers);
2329        list_add(&file->list, &tr->events);
2330
2331        return file;
2332}
2333
2334/* Add an event to a trace directory */
2335static int
2336__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2337{
2338        struct trace_event_file *file;
2339
2340        file = trace_create_new_event(call, tr);
2341        if (!file)
2342                return -ENOMEM;
2343
2344        return event_create_dir(tr->event_dir, file);
2345}
2346
2347/*
2348 * Just create a decriptor for early init. A descriptor is required
2349 * for enabling events at boot. We want to enable events before
2350 * the filesystem is initialized.
2351 */
2352static __init int
2353__trace_early_add_new_event(struct trace_event_call *call,
2354                            struct trace_array *tr)
2355{
2356        struct trace_event_file *file;
2357
2358        file = trace_create_new_event(call, tr);
2359        if (!file)
2360                return -ENOMEM;
2361
2362        return 0;
2363}
2364
2365struct ftrace_module_file_ops;
2366static void __add_event_to_tracers(struct trace_event_call *call);
2367
2368/* Add an additional event_call dynamically */
2369int trace_add_event_call(struct trace_event_call *call)
2370{
2371        int ret;
2372        mutex_lock(&trace_types_lock);
2373        mutex_lock(&event_mutex);
2374
2375        ret = __register_event(call, NULL);
2376        if (ret >= 0)
2377                __add_event_to_tracers(call);
2378
2379        mutex_unlock(&event_mutex);
2380        mutex_unlock(&trace_types_lock);
2381        return ret;
2382}
2383
2384/*
2385 * Must be called under locking of trace_types_lock, event_mutex and
2386 * trace_event_sem.
2387 */
2388static void __trace_remove_event_call(struct trace_event_call *call)
2389{
2390        event_remove(call);
2391        trace_destroy_fields(call);
2392        free_event_filter(call->filter);
2393        call->filter = NULL;
2394}
2395
2396static int probe_remove_event_call(struct trace_event_call *call)
2397{
2398        struct trace_array *tr;
2399        struct trace_event_file *file;
2400
2401#ifdef CONFIG_PERF_EVENTS
2402        if (call->perf_refcount)
2403                return -EBUSY;
2404#endif
2405        do_for_each_event_file(tr, file) {
2406                if (file->event_call != call)
2407                        continue;
2408                /*
2409                 * We can't rely on ftrace_event_enable_disable(enable => 0)
2410                 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2411                 * TRACE_REG_UNREGISTER.
2412                 */
2413                if (file->flags & EVENT_FILE_FL_ENABLED)
2414                        return -EBUSY;
2415                /*
2416                 * The do_for_each_event_file_safe() is
2417                 * a double loop. After finding the call for this
2418                 * trace_array, we use break to jump to the next
2419                 * trace_array.
2420                 */
2421                break;
2422        } while_for_each_event_file();
2423
2424        __trace_remove_event_call(call);
2425
2426        return 0;
2427}
2428
2429/* Remove an event_call */
2430int trace_remove_event_call(struct trace_event_call *call)
2431{
2432        int ret;
2433
2434        mutex_lock(&trace_types_lock);
2435        mutex_lock(&event_mutex);
2436        down_write(&trace_event_sem);
2437        ret = probe_remove_event_call(call);
2438        up_write(&trace_event_sem);
2439        mutex_unlock(&event_mutex);
2440        mutex_unlock(&trace_types_lock);
2441
2442        return ret;
2443}
2444
2445#define for_each_event(event, start, end)                       \
2446        for (event = start;                                     \
2447             (unsigned long)event < (unsigned long)end;         \
2448             event++)
2449
2450#ifdef CONFIG_MODULES
2451
2452static void trace_module_add_events(struct module *mod)
2453{
2454        struct trace_event_call **call, **start, **end;
2455
2456        if (!mod->num_trace_events)
2457                return;
2458
2459        /* Don't add infrastructure for mods without tracepoints */
2460        if (trace_module_has_bad_taint(mod)) {
2461                pr_err("%s: module has bad taint, not creating trace events\n",
2462                       mod->name);
2463                return;
2464        }
2465
2466        start = mod->trace_events;
2467        end = mod->trace_events + mod->num_trace_events;
2468
2469        for_each_event(call, start, end) {
2470                __register_event(*call, mod);
2471                __add_event_to_tracers(*call);
2472        }
2473}
2474
2475static void trace_module_remove_events(struct module *mod)
2476{
2477        struct trace_event_call *call, *p;
2478        bool clear_trace = false;
2479
2480        down_write(&trace_event_sem);
2481        list_for_each_entry_safe(call, p, &ftrace_events, list) {
2482                if (call->mod == mod) {
2483                        if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
2484                                clear_trace = true;
2485                        __trace_remove_event_call(call);
2486                }
2487        }
2488        up_write(&trace_event_sem);
2489
2490        /*
2491         * It is safest to reset the ring buffer if the module being unloaded
2492         * registered any events that were used. The only worry is if
2493         * a new module gets loaded, and takes on the same id as the events
2494         * of this module. When printing out the buffer, traced events left
2495         * over from this module may be passed to the new module events and
2496         * unexpected results may occur.
2497         */
2498        if (clear_trace)
2499                tracing_reset_all_online_cpus();
2500}
2501
2502static int trace_module_notify(struct notifier_block *self,
2503                               unsigned long val, void *data)
2504{
2505        struct module *mod = data;
2506
2507        mutex_lock(&trace_types_lock);
2508        mutex_lock(&event_mutex);
2509        switch (val) {
2510        case MODULE_STATE_COMING:
2511                trace_module_add_events(mod);
2512                break;
2513        case MODULE_STATE_GOING:
2514                trace_module_remove_events(mod);
2515                break;
2516        }
2517        mutex_unlock(&event_mutex);
2518        mutex_unlock(&trace_types_lock);
2519
2520        return 0;
2521}
2522
2523static struct notifier_block trace_module_nb = {
2524        .notifier_call = trace_module_notify,
2525        .priority = 1, /* higher than trace.c module notify */
2526};
2527#endif /* CONFIG_MODULES */
2528
2529/* Create a new event directory structure for a trace directory. */
2530static void
2531__trace_add_event_dirs(struct trace_array *tr)
2532{
2533        struct trace_event_call *call;
2534        int ret;
2535
2536        list_for_each_entry(call, &ftrace_events, list) {
2537                ret = __trace_add_new_event(call, tr);
2538                if (ret < 0)
2539                        pr_warn("Could not create directory for event %s\n",
2540                                trace_event_name(call));
2541        }
2542}
2543
2544struct trace_event_file *
2545find_event_file(struct trace_array *tr, const char *system,  const char *event)
2546{
2547        struct trace_event_file *file;
2548        struct trace_event_call *call;
2549        const char *name;
2550
2551        list_for_each_entry(file, &tr->events, list) {
2552
2553                call = file->event_call;
2554                name = trace_event_name(call);
2555
2556                if (!name || !call->class || !call->class->reg)
2557                        continue;
2558
2559                if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2560                        continue;
2561
2562                if (strcmp(event, name) == 0 &&
2563                    strcmp(system, call->class->system) == 0)
2564                        return file;
2565        }
2566        return NULL;
2567}
2568
2569#ifdef CONFIG_DYNAMIC_FTRACE
2570
2571/* Avoid typos */
2572#define ENABLE_EVENT_STR        "enable_event"
2573#define DISABLE_EVENT_STR       "disable_event"
2574
2575struct event_probe_data {
2576        struct trace_event_file *file;
2577        unsigned long                   count;
2578        int                             ref;
2579        bool                            enable;
2580};
2581
2582static void
2583event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2584{
2585        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2586        struct event_probe_data *data = *pdata;
2587
2588        if (!data)
2589                return;
2590
2591        if (data->enable)
2592                clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2593        else
2594                set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2595}
2596
2597static void
2598event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2599{
2600        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2601        struct event_probe_data *data = *pdata;
2602
2603        if (!data)
2604                return;
2605
2606        if (!data->count)
2607                return;
2608
2609        /* Skip if the event is in a state we want to switch to */
2610        if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2611                return;
2612
2613        if (data->count != -1)
2614                (data->count)--;
2615
2616        event_enable_probe(ip, parent_ip, _data);
2617}
2618
2619static int
2620event_enable_print(struct seq_file *m, unsigned long ip,
2621                      struct ftrace_probe_ops *ops, void *_data)
2622{
2623        struct event_probe_data *data = _data;
2624
2625        seq_printf(m, "%ps:", (void *)ip);
2626
2627        seq_printf(m, "%s:%s:%s",
2628                   data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2629                   data->file->event_call->class->system,
2630                   trace_event_name(data->file->event_call));
2631
2632        if (data->count == -1)
2633                seq_puts(m, ":unlimited\n");
2634        else
2635                seq_printf(m, ":count=%ld\n", data->count);
2636
2637        return 0;
2638}
2639
2640static int
2641event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
2642                  void **_data)
2643{
2644        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2645        struct event_probe_data *data = *pdata;
2646
2647        data->ref++;
2648        return 0;
2649}
2650
2651static void
2652event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
2653                  void **_data)
2654{
2655        struct event_probe_data **pdata = (struct event_probe_data **)_data;
2656        struct event_probe_data *data = *pdata;
2657
2658        if (WARN_ON_ONCE(data->ref <= 0))
2659                return;
2660
2661        data->ref--;
2662        if (!data->ref) {
2663                /* Remove the SOFT_MODE flag */
2664                __ftrace_event_enable_disable(data->file, 0, 1);
2665                module_put(data->file->event_call->mod);
2666                kfree(data);
2667        }
2668        *pdata = NULL;
2669}
2670
2671static struct ftrace_probe_ops event_enable_probe_ops = {
2672        .func                   = event_enable_probe,
2673        .print                  = event_enable_print,
2674        .init                   = event_enable_init,
2675        .free                   = event_enable_free,
2676};
2677
2678static struct ftrace_probe_ops event_enable_count_probe_ops = {
2679        .func                   = event_enable_count_probe,
2680        .print                  = event_enable_print,
2681        .init                   = event_enable_init,
2682        .free                   = event_enable_free,
2683};
2684
2685static struct ftrace_probe_ops event_disable_probe_ops = {
2686        .func                   = event_enable_probe,
2687        .print                  = event_enable_print,
2688        .init                   = event_enable_init,
2689        .free                   = event_enable_free,
2690};
2691
2692static struct ftrace_probe_ops event_disable_count_probe_ops = {
2693        .func                   = event_enable_count_probe,
2694        .print                  = event_enable_print,
2695        .init                   = event_enable_init,
2696        .free                   = event_enable_free,
2697};
2698
2699static int
2700event_enable_func(struct ftrace_hash *hash,
2701                  char *glob, char *cmd, char *param, int enabled)
2702{
2703        struct trace_array *tr = top_trace_array();
2704        struct trace_event_file *file;
2705        struct ftrace_probe_ops *ops;
2706        struct event_probe_data *data;
2707        const char *system;
2708        const char *event;
2709        char *number;
2710        bool enable;
2711        int ret;
2712
2713        if (!tr)
2714                return -ENODEV;
2715
2716        /* hash funcs only work with set_ftrace_filter */
2717        if (!enabled || !param)
2718                return -EINVAL;
2719
2720        system = strsep(&param, ":");
2721        if (!param)
2722                return -EINVAL;
2723
2724        event = strsep(&param, ":");
2725
2726        mutex_lock(&event_mutex);
2727
2728        ret = -EINVAL;
2729        file = find_event_file(tr, system, event);
2730        if (!file)
2731                goto out;
2732
2733        enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2734
2735        if (enable)
2736                ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2737        else
2738                ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2739
2740        if (glob[0] == '!') {
2741                unregister_ftrace_function_probe_func(glob+1, ops);
2742                ret = 0;
2743                goto out;
2744        }
2745
2746        ret = -ENOMEM;
2747        data = kzalloc(sizeof(*data), GFP_KERNEL);
2748        if (!data)
2749                goto out;
2750
2751        data->enable = enable;
2752        data->count = -1;
2753        data->file = file;
2754
2755        if (!param)
2756                goto out_reg;
2757
2758        number = strsep(&param, ":");
2759
2760        ret = -EINVAL;
2761        if (!strlen(number))
2762                goto out_free;
2763
2764        /*
2765         * We use the callback data field (which is a pointer)
2766         * as our counter.
2767         */
2768        ret = kstrtoul(number, 0, &data->count);
2769        if (ret)
2770                goto out_free;
2771
2772 out_reg:
2773        /* Don't let event modules unload while probe registered */
2774        ret = try_module_get(file->event_call->mod);
2775        if (!ret) {
2776                ret = -EBUSY;
2777                goto out_free;
2778        }
2779
2780        ret = __ftrace_event_enable_disable(file, 1, 1);
2781        if (ret < 0)
2782                goto out_put;
2783        ret = register_ftrace_function_probe(glob, ops, data);
2784        /*
2785         * The above returns on success the # of functions enabled,
2786         * but if it didn't find any functions it returns zero.
2787         * Consider no functions a failure too.
2788         */
2789        if (!ret) {
2790                ret = -ENOENT;
2791                goto out_disable;
2792        } else if (ret < 0)
2793                goto out_disable;
2794        /* Just return zero, not the number of enabled functions */
2795        ret = 0;
2796 out:
2797        mutex_unlock(&event_mutex);
2798        return ret;
2799
2800 out_disable:
2801        __ftrace_event_enable_disable(file, 0, 1);
2802 out_put:
2803        module_put(file->event_call->mod);
2804 out_free:
2805        kfree(data);
2806        goto out;
2807}
2808
2809static struct ftrace_func_command event_enable_cmd = {
2810        .name                   = ENABLE_EVENT_STR,
2811        .func                   = event_enable_func,
2812};
2813
2814static struct ftrace_func_command event_disable_cmd = {
2815        .name                   = DISABLE_EVENT_STR,
2816        .func                   = event_enable_func,
2817};
2818
2819static __init int register_event_cmds(void)
2820{
2821        int ret;
2822
2823        ret = register_ftrace_command(&event_enable_cmd);
2824        if (WARN_ON(ret < 0))
2825                return ret;
2826        ret = register_ftrace_command(&event_disable_cmd);
2827        if (WARN_ON(ret < 0))
2828                unregister_ftrace_command(&event_enable_cmd);
2829        return ret;
2830}
2831#else
2832static inline int register_event_cmds(void) { return 0; }
2833#endif /* CONFIG_DYNAMIC_FTRACE */
2834
2835/*
2836 * The top level array has already had its trace_event_file
2837 * descriptors created in order to allow for early events to
2838 * be recorded. This function is called after the tracefs has been
2839 * initialized, and we now have to create the files associated
2840 * to the events.
2841 */
2842static __init void
2843__trace_early_add_event_dirs(struct trace_array *tr)
2844{
2845        struct trace_event_file *file;
2846        int ret;
2847
2848
2849        list_for_each_entry(file, &tr->events, list) {
2850                ret = event_create_dir(tr->event_dir, file);
2851                if (ret < 0)
2852                        pr_warn("Could not create directory for event %s\n",
2853                                trace_event_name(file->event_call));
2854        }
2855}
2856
2857/*
2858 * For early boot up, the top trace array requires to have
2859 * a list of events that can be enabled. This must be done before
2860 * the filesystem is set up in order to allow events to be traced
2861 * early.
2862 */
2863static __init void
2864__trace_early_add_events(struct trace_array *tr)
2865{
2866        struct trace_event_call *call;
2867        int ret;
2868
2869        list_for_each_entry(call, &ftrace_events, list) {
2870                /* Early boot up should not have any modules loaded */
2871                if (WARN_ON_ONCE(call->mod))
2872                        continue;
2873
2874                ret = __trace_early_add_new_event(call, tr);
2875                if (ret < 0)
2876                        pr_warn("Could not create early event %s\n",
2877                                trace_event_name(call));
2878        }
2879}
2880
2881/* Remove the event directory structure for a trace directory. */
2882static void
2883__trace_remove_event_dirs(struct trace_array *tr)
2884{
2885        struct trace_event_file *file, *next;
2886
2887        list_for_each_entry_safe(file, next, &tr->events, list)
2888                remove_event_file_dir(file);
2889}
2890
2891static void __add_event_to_tracers(struct trace_event_call *call)
2892{
2893        struct trace_array *tr;
2894
2895        list_for_each_entry(tr, &ftrace_trace_arrays, list)
2896                __trace_add_new_event(call, tr);
2897}
2898
2899extern struct trace_event_call *__start_ftrace_events[];
2900extern struct trace_event_call *__stop_ftrace_events[];
2901
2902static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2903
2904static __init int setup_trace_event(char *str)
2905{
2906        strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2907        ring_buffer_expanded = true;
2908        tracing_selftest_disabled = true;
2909
2910        return 1;
2911}
2912__setup("trace_event=", setup_trace_event);
2913
2914/* Expects to have event_mutex held when called */
2915static int
2916create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2917{
2918        struct dentry *d_events;
2919        struct dentry *entry;
2920
2921        entry = tracefs_create_file("set_event", 0644, parent,
2922                                    tr, &ftrace_set_event_fops);
2923        if (!entry) {
2924                pr_warn("Could not create tracefs 'set_event' entry\n");
2925                return -ENOMEM;
2926        }
2927
2928        d_events = tracefs_create_dir("events", parent);
2929        if (!d_events) {
2930                pr_warn("Could not create tracefs 'events' directory\n");
2931                return -ENOMEM;
2932        }
2933
2934        entry = tracefs_create_file("set_event_pid", 0644, parent,
2935                                    tr, &ftrace_set_event_pid_fops);
2936
2937        /* ring buffer internal formats */
2938        trace_create_file("header_page", 0444, d_events,
2939                          ring_buffer_print_page_header,
2940                          &ftrace_show_header_fops);
2941
2942        trace_create_file("header_event", 0444, d_events,
2943                          ring_buffer_print_entry_header,
2944                          &ftrace_show_header_fops);
2945
2946        trace_create_file("enable", 0644, d_events,
2947                          tr, &ftrace_tr_enable_fops);
2948
2949        tr->event_dir = d_events;
2950
2951        return 0;
2952}
2953
2954/**
2955 * event_trace_add_tracer - add a instance of a trace_array to events
2956 * @parent: The parent dentry to place the files/directories for events in
2957 * @tr: The trace array associated with these events
2958 *
2959 * When a new instance is created, it needs to set up its events
2960 * directory, as well as other files associated with events. It also
2961 * creates the event hierachry in the @parent/events directory.
2962 *
2963 * Returns 0 on success.
2964 */
2965int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
2966{
2967        int ret;
2968
2969        mutex_lock(&event_mutex);
2970
2971        ret = create_event_toplevel_files(parent, tr);
2972        if (ret)
2973                goto out_unlock;
2974
2975        down_write(&trace_event_sem);
2976        __trace_add_event_dirs(tr);
2977        up_write(&trace_event_sem);
2978
2979 out_unlock:
2980        mutex_unlock(&event_mutex);
2981
2982        return ret;
2983}
2984
2985/*
2986 * The top trace array already had its file descriptors created.
2987 * Now the files themselves need to be created.
2988 */
2989static __init int
2990early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
2991{
2992        int ret;
2993
2994        mutex_lock(&event_mutex);
2995
2996        ret = create_event_toplevel_files(parent, tr);
2997        if (ret)
2998                goto out_unlock;
2999
3000        down_write(&trace_event_sem);
3001        __trace_early_add_event_dirs(tr);
3002        up_write(&trace_event_sem);
3003
3004 out_unlock:
3005        mutex_unlock(&event_mutex);
3006
3007        return ret;
3008}
3009
3010int event_trace_del_tracer(struct trace_array *tr)
3011{
3012        mutex_lock(&event_mutex);
3013
3014        /* Disable any event triggers and associated soft-disabled events */
3015        clear_event_triggers(tr);
3016
3017        /* Clear the pid list */
3018        __ftrace_clear_event_pids(tr);
3019
3020        /* Disable any running events */
3021        __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3022
3023        /* Access to events are within rcu_read_lock_sched() */
3024        synchronize_sched();
3025
3026        down_write(&trace_event_sem);
3027        __trace_remove_event_dirs(tr);
3028        tracefs_remove_recursive(tr->event_dir);
3029        up_write(&trace_event_sem);
3030
3031        tr->event_dir = NULL;
3032
3033        mutex_unlock(&event_mutex);
3034
3035        return 0;
3036}
3037
3038static __init int event_trace_memsetup(void)
3039{
3040        field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3041        file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3042        return 0;
3043}
3044
3045static __init void
3046early_enable_events(struct trace_array *tr, bool disable_first)
3047{
3048        char *buf = bootup_event_buf;
3049        char *token;
3050        int ret;
3051
3052        while (true) {
3053                token = strsep(&buf, ",");
3054
3055                if (!token)
3056                        break;
3057
3058                if (*token) {
3059                        /* Restarting syscalls requires that we stop them first */
3060                        if (disable_first)
3061                                ftrace_set_clr_event(tr, token, 0);
3062
3063                        ret = ftrace_set_clr_event(tr, token, 1);
3064                        if (ret)
3065                                pr_warn("Failed to enable trace event: %s\n", token);
3066                }
3067
3068                /* Put back the comma to allow this to be called again */
3069                if (buf)
3070                        *(buf - 1) = ',';
3071        }
3072}
3073
3074static __init int event_trace_enable(void)
3075{
3076        struct trace_array *tr = top_trace_array();
3077        struct trace_event_call **iter, *call;
3078        int ret;
3079
3080        if (!tr)
3081                return -ENODEV;
3082
3083        for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3084
3085                call = *iter;
3086                ret = event_init(call);
3087                if (!ret)
3088                        list_add(&call->list, &ftrace_events);
3089        }
3090
3091        /*
3092         * We need the top trace array to have a working set of trace
3093         * points at early init, before the debug files and directories
3094         * are created. Create the file entries now, and attach them
3095         * to the actual file dentries later.
3096         */
3097        __trace_early_add_events(tr);
3098
3099        early_enable_events(tr, false);
3100
3101        trace_printk_start_comm();
3102
3103        register_event_cmds();
3104
3105        register_trigger_cmds();
3106
3107        return 0;
3108}
3109
3110/*
3111 * event_trace_enable() is called from trace_event_init() first to
3112 * initialize events and perhaps start any events that are on the
3113 * command line. Unfortunately, there are some events that will not
3114 * start this early, like the system call tracepoints that need
3115 * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3116 * is called before pid 1 starts, and this flag is never set, making
3117 * the syscall tracepoint never get reached, but the event is enabled
3118 * regardless (and not doing anything).
3119 */
3120static __init int event_trace_enable_again(void)
3121{
3122        struct trace_array *tr;
3123
3124        tr = top_trace_array();
3125        if (!tr)
3126                return -ENODEV;
3127
3128        early_enable_events(tr, true);
3129
3130        return 0;
3131}
3132
3133early_initcall(event_trace_enable_again);
3134
3135static __init int event_trace_init(void)
3136{
3137        struct trace_array *tr;
3138        struct dentry *d_tracer;
3139        struct dentry *entry;
3140        int ret;
3141
3142        tr = top_trace_array();
3143        if (!tr)
3144                return -ENODEV;
3145
3146        d_tracer = tracing_init_dentry();
3147        if (IS_ERR(d_tracer))
3148                return 0;
3149
3150        entry = tracefs_create_file("available_events", 0444, d_tracer,
3151                                    tr, &ftrace_avail_fops);
3152        if (!entry)
3153                pr_warn("Could not create tracefs 'available_events' entry\n");
3154
3155        if (trace_define_generic_fields())
3156                pr_warn("tracing: Failed to allocated generic fields");
3157
3158        if (trace_define_common_fields())
3159                pr_warn("tracing: Failed to allocate common fields");
3160
3161        ret = early_event_add_tracer(d_tracer, tr);
3162        if (ret)
3163                return ret;
3164
3165#ifdef CONFIG_MODULES
3166        ret = register_module_notifier(&trace_module_nb);
3167        if (ret)
3168                pr_warn("Failed to register trace events module notifier\n");
3169#endif
3170        return 0;
3171}
3172
3173void __init trace_event_init(void)
3174{
3175        event_trace_memsetup();
3176        init_ftrace_syscalls();
3177        event_trace_enable();
3178}
3179
3180fs_initcall(event_trace_init);
3181
3182#ifdef CONFIG_FTRACE_STARTUP_TEST
3183
3184static DEFINE_SPINLOCK(test_spinlock);
3185static DEFINE_SPINLOCK(test_spinlock_irq);
3186static DEFINE_MUTEX(test_mutex);
3187
3188static __init void test_work(struct work_struct *dummy)
3189{
3190        spin_lock(&test_spinlock);
3191        spin_lock_irq(&test_spinlock_irq);
3192        udelay(1);
3193        spin_unlock_irq(&test_spinlock_irq);
3194        spin_unlock(&test_spinlock);
3195
3196        mutex_lock(&test_mutex);
3197        msleep(1);
3198        mutex_unlock(&test_mutex);
3199}
3200
3201static __init int event_test_thread(void *unused)
3202{
3203        void *test_malloc;
3204
3205        test_malloc = kmalloc(1234, GFP_KERNEL);
3206        if (!test_malloc)
3207                pr_info("failed to kmalloc\n");
3208
3209        schedule_on_each_cpu(test_work);
3210
3211        kfree(test_malloc);
3212
3213        set_current_state(TASK_INTERRUPTIBLE);
3214        while (!kthread_should_stop()) {
3215                schedule();
3216                set_current_state(TASK_INTERRUPTIBLE);
3217        }
3218        __set_current_state(TASK_RUNNING);
3219
3220        return 0;
3221}
3222
3223/*
3224 * Do various things that may trigger events.
3225 */
3226static __init void event_test_stuff(void)
3227{
3228        struct task_struct *test_thread;
3229
3230        test_thread = kthread_run(event_test_thread, NULL, "test-events");
3231        msleep(1);
3232        kthread_stop(test_thread);
3233}
3234
3235/*
3236 * For every trace event defined, we will test each trace point separately,
3237 * and then by groups, and finally all trace points.
3238 */
3239static __init void event_trace_self_tests(void)
3240{
3241        struct trace_subsystem_dir *dir;
3242        struct trace_event_file *file;
3243        struct trace_event_call *call;
3244        struct event_subsystem *system;
3245        struct trace_array *tr;
3246        int ret;
3247
3248        tr = top_trace_array();
3249        if (!tr)
3250                return;
3251
3252        pr_info("Running tests on trace events:\n");
3253
3254        list_for_each_entry(file, &tr->events, list) {
3255
3256                call = file->event_call;
3257
3258                /* Only test those that have a probe */
3259                if (!call->class || !call->class->probe)
3260                        continue;
3261
3262/*
3263 * Testing syscall events here is pretty useless, but
3264 * we still do it if configured. But this is time consuming.
3265 * What we really need is a user thread to perform the
3266 * syscalls as we test.
3267 */
3268#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3269                if (call->class->system &&
3270                    strcmp(call->class->system, "syscalls") == 0)
3271                        continue;
3272#endif
3273
3274                pr_info("Testing event %s: ", trace_event_name(call));
3275
3276                /*
3277                 * If an event is already enabled, someone is using
3278                 * it and the self test should not be on.
3279                 */
3280                if (file->flags & EVENT_FILE_FL_ENABLED) {
3281                        pr_warn("Enabled event during self test!\n");
3282                        WARN_ON_ONCE(1);
3283                        continue;
3284                }
3285
3286                ftrace_event_enable_disable(file, 1);
3287                event_test_stuff();
3288                ftrace_event_enable_disable(file, 0);
3289
3290                pr_cont("OK\n");
3291        }
3292
3293        /* Now test at the sub system level */
3294
3295        pr_info("Running tests on trace event systems:\n");
3296
3297        list_for_each_entry(dir, &tr->systems, list) {
3298
3299                system = dir->subsystem;
3300
3301                /* the ftrace system is special, skip it */
3302                if (strcmp(system->name, "ftrace") == 0)
3303                        continue;
3304
3305                pr_info("Testing event system %s: ", system->name);
3306
3307                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3308                if (WARN_ON_ONCE(ret)) {
3309                        pr_warn("error enabling system %s\n",
3310                                system->name);
3311                        continue;
3312                }
3313
3314                event_test_stuff();
3315
3316                ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3317                if (WARN_ON_ONCE(ret)) {
3318                        pr_warn("error disabling system %s\n",
3319                                system->name);
3320                        continue;
3321                }
3322
3323                pr_cont("OK\n");
3324        }
3325
3326        /* Test with all events enabled */
3327
3328        pr_info("Running tests on all trace events:\n");
3329        pr_info("Testing all events: ");
3330
3331        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3332        if (WARN_ON_ONCE(ret)) {
3333                pr_warn("error enabling all events\n");
3334                return;
3335        }
3336
3337        event_test_stuff();
3338
3339        /* reset sysname */
3340        ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3341        if (WARN_ON_ONCE(ret)) {
3342                pr_warn("error disabling all events\n");
3343                return;
3344        }
3345
3346        pr_cont("OK\n");
3347}
3348
3349#ifdef CONFIG_FUNCTION_TRACER
3350
3351static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3352
3353static struct trace_array *event_tr;
3354
3355static void __init
3356function_test_events_call(unsigned long ip, unsigned long parent_ip,
3357                          struct ftrace_ops *op, struct pt_regs *pt_regs)
3358{
3359        struct ring_buffer_event *event;
3360        struct ring_buffer *buffer;
3361        struct ftrace_entry *entry;
3362        unsigned long flags;
3363        long disabled;
3364        int cpu;
3365        int pc;
3366
3367        pc = preempt_count();
3368        preempt_disable_notrace();
3369        cpu = raw_smp_processor_id();
3370        disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3371
3372        if (disabled != 1)
3373                goto out;
3374
3375        local_save_flags(flags);
3376
3377        event = trace_current_buffer_lock_reserve(&buffer,
3378                                                  TRACE_FN, sizeof(*entry),
3379                                                  flags, pc);
3380        if (!event)
3381                goto out;
3382        entry   = ring_buffer_event_data(event);
3383        entry->ip                       = ip;
3384        entry->parent_ip                = parent_ip;
3385
3386        trace_buffer_unlock_commit(event_tr, buffer, event, flags, pc);
3387
3388 out:
3389        atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3390        preempt_enable_notrace();
3391}
3392
3393static struct ftrace_ops trace_ops __initdata  =
3394{
3395        .func = function_test_events_call,
3396        .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3397};
3398
3399static __init void event_trace_self_test_with_function(void)
3400{
3401        int ret;
3402        event_tr = top_trace_array();
3403        if (WARN_ON(!event_tr))
3404                return;
3405        ret = register_ftrace_function(&trace_ops);
3406        if (WARN_ON(ret < 0)) {
3407                pr_info("Failed to enable function tracer for event tests\n");
3408                return;
3409        }
3410        pr_info("Running tests again, along with the function tracer\n");
3411        event_trace_self_tests();
3412        unregister_ftrace_function(&trace_ops);
3413}
3414#else
3415static __init void event_trace_self_test_with_function(void)
3416{
3417}
3418#endif
3419
3420static __init int event_trace_self_tests_init(void)
3421{
3422        if (!tracing_selftest_disabled) {
3423                event_trace_self_tests();
3424                event_trace_self_test_with_function();
3425        }
3426
3427        return 0;
3428}
3429
3430late_initcall(event_trace_self_tests_init);
3431
3432#endif
3433