linux/kernel/trace/trace_event_perf.c
<<
>>
Prefs
   1/*
   2 * trace event based perf event profiling/tracing
   3 *
   4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
   5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
   6 */
   7
   8#include <linux/module.h>
   9#include <linux/kprobes.h>
  10#include "trace.h"
  11
  12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
  13
  14/*
  15 * Force it to be aligned to unsigned long to avoid misaligned accesses
  16 * suprises
  17 */
  18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
  19        perf_trace_t;
  20
  21/* Count the events in use (per event id, not per instance) */
  22static int      total_ref_count;
  23
  24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
  25                                 struct perf_event *p_event)
  26{
  27        /* No tracing, just counting, so no obvious leak */
  28        if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
  29                return 0;
  30
  31        /* Some events are ok to be traced by non-root users... */
  32        if (p_event->attach_state == PERF_ATTACH_TASK) {
  33                if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
  34                        return 0;
  35        }
  36
  37        /*
  38         * ...otherwise raw tracepoint data can be a severe data leak,
  39         * only allow root to have these.
  40         */
  41        if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
  42                return -EPERM;
  43
  44        return 0;
  45}
  46
  47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
  48                                 struct perf_event *p_event)
  49{
  50        struct hlist_head __percpu *list;
  51        int ret;
  52        int cpu;
  53
  54        ret = perf_trace_event_perm(tp_event, p_event);
  55        if (ret)
  56                return ret;
  57
  58        p_event->tp_event = tp_event;
  59        if (tp_event->perf_refcount++ > 0)
  60                return 0;
  61
  62        ret = -ENOMEM;
  63
  64        list = alloc_percpu(struct hlist_head);
  65        if (!list)
  66                goto fail;
  67
  68        for_each_possible_cpu(cpu)
  69                INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
  70
  71        tp_event->perf_events = list;
  72
  73        if (!total_ref_count) {
  74                char __percpu *buf;
  75                int i;
  76
  77                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
  78                        buf = (char __percpu *)alloc_percpu(perf_trace_t);
  79                        if (!buf)
  80                                goto fail;
  81
  82                        perf_trace_buf[i] = buf;
  83                }
  84        }
  85
  86        ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
  87        if (ret)
  88                goto fail;
  89
  90        total_ref_count++;
  91        return 0;
  92
  93fail:
  94        if (!total_ref_count) {
  95                int i;
  96
  97                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
  98                        free_percpu(perf_trace_buf[i]);
  99                        perf_trace_buf[i] = NULL;
 100                }
 101        }
 102
 103        if (!--tp_event->perf_refcount) {
 104                free_percpu(tp_event->perf_events);
 105                tp_event->perf_events = NULL;
 106        }
 107
 108        return ret;
 109}
 110
 111int perf_trace_init(struct perf_event *p_event)
 112{
 113        struct ftrace_event_call *tp_event;
 114        int event_id = p_event->attr.config;
 115        int ret = -EINVAL;
 116
 117        mutex_lock(&event_mutex);
 118        list_for_each_entry(tp_event, &ftrace_events, list) {
 119                if (tp_event->event.type == event_id &&
 120                    tp_event->class && tp_event->class->reg &&
 121                    try_module_get(tp_event->mod)) {
 122                        ret = perf_trace_event_init(tp_event, p_event);
 123                        if (ret)
 124                                module_put(tp_event->mod);
 125                        break;
 126                }
 127        }
 128        mutex_unlock(&event_mutex);
 129
 130        return ret;
 131}
 132
 133int perf_trace_add(struct perf_event *p_event, int flags)
 134{
 135        struct ftrace_event_call *tp_event = p_event->tp_event;
 136        struct hlist_head __percpu *pcpu_list;
 137        struct hlist_head *list;
 138
 139        pcpu_list = tp_event->perf_events;
 140        if (WARN_ON_ONCE(!pcpu_list))
 141                return -EINVAL;
 142
 143        if (!(flags & PERF_EF_START))
 144                p_event->hw.state = PERF_HES_STOPPED;
 145
 146        list = this_cpu_ptr(pcpu_list);
 147        hlist_add_head_rcu(&p_event->hlist_entry, list);
 148
 149        return 0;
 150}
 151
 152void perf_trace_del(struct perf_event *p_event, int flags)
 153{
 154        hlist_del_rcu(&p_event->hlist_entry);
 155}
 156
 157void perf_trace_destroy(struct perf_event *p_event)
 158{
 159        struct ftrace_event_call *tp_event = p_event->tp_event;
 160        int i;
 161
 162        mutex_lock(&event_mutex);
 163        if (--tp_event->perf_refcount > 0)
 164                goto out;
 165
 166        tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
 167
 168        /*
 169         * Ensure our callback won't be called anymore. The buffers
 170         * will be freed after that.
 171         */
 172        tracepoint_synchronize_unregister();
 173
 174        free_percpu(tp_event->perf_events);
 175        tp_event->perf_events = NULL;
 176
 177        if (!--total_ref_count) {
 178                for (i = 0; i < PERF_NR_CONTEXTS; i++) {
 179                        free_percpu(perf_trace_buf[i]);
 180                        perf_trace_buf[i] = NULL;
 181                }
 182        }
 183out:
 184        module_put(tp_event->mod);
 185        mutex_unlock(&event_mutex);
 186}
 187
 188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 189                                       struct pt_regs *regs, int *rctxp)
 190{
 191        struct trace_entry *entry;
 192        unsigned long flags;
 193        char *raw_data;
 194        int pc;
 195
 196        BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 197
 198        pc = preempt_count();
 199
 200        *rctxp = perf_swevent_get_recursion_context();
 201        if (*rctxp < 0)
 202                return NULL;
 203
 204        raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
 205
 206        /* zero the dead bytes from align to not leak stack to user */
 207        memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
 208
 209        entry = (struct trace_entry *)raw_data;
 210        local_save_flags(flags);
 211        tracing_generic_entry_update(entry, flags, pc);
 212        entry->type = type;
 213
 214        return raw_data;
 215}
 216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
 217