linux/include/linux/ftrace_event.h
<<
>>
Prefs
   1#ifndef _LINUX_FTRACE_EVENT_H
   2#define _LINUX_FTRACE_EVENT_H
   3
   4#include <linux/ring_buffer.h>
   5#include <linux/trace_seq.h>
   6#include <linux/percpu.h>
   7#include <linux/hardirq.h>
   8#include <linux/perf_event.h>
   9
  10struct trace_array;
  11struct trace_buffer;
  12struct tracer;
  13struct dentry;
  14
  15struct trace_print_flags {
  16        unsigned long           mask;
  17        const char              *name;
  18};
  19
  20struct trace_print_flags_u64 {
  21        unsigned long long      mask;
  22        const char              *name;
  23};
  24
  25const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
  26                                   unsigned long flags,
  27                                   const struct trace_print_flags *flag_array);
  28
  29const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
  30                                     const struct trace_print_flags *symbol_array);
  31
  32#if BITS_PER_LONG == 32
  33const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
  34                                         unsigned long long val,
  35                                         const struct trace_print_flags_u64
  36                                                                 *symbol_array);
  37#endif
  38
  39const char *ftrace_print_hex_seq(struct trace_seq *p,
  40                                 const unsigned char *buf, int len);
  41
  42struct trace_iterator;
  43struct trace_event;
  44
  45int ftrace_raw_output_prep(struct trace_iterator *iter,
  46                           struct trace_event *event);
  47
  48/*
  49 * The trace entry - the most basic unit of tracing. This is what
  50 * is printed in the end as a single line in the trace output, such as:
  51 *
  52 *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  53 */
  54struct trace_entry {
  55        unsigned short          type;
  56        unsigned char           flags;
  57        unsigned char           preempt_count;
  58        int                     pid;
  59};
  60
  61#define FTRACE_MAX_EVENT                                                \
  62        ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
  63
  64/*
  65 * Trace iterator - used by printout routines who present trace
  66 * results to users and which routines might sleep, etc:
  67 */
  68struct trace_iterator {
  69        struct trace_array      *tr;
  70        struct tracer           *trace;
  71        struct trace_buffer     *trace_buffer;
  72        void                    *private;
  73        int                     cpu_file;
  74        struct mutex            mutex;
  75        struct ring_buffer_iter **buffer_iter;
  76        unsigned long           iter_flags;
  77
  78        /* trace_seq for __print_flags() and __print_symbolic() etc. */
  79        struct trace_seq        tmp_seq;
  80
  81        cpumask_var_t           started;
  82
  83        /* it's true when current open file is snapshot */
  84        bool                    snapshot;
  85
  86        /* The below is zeroed out in pipe_read */
  87        struct trace_seq        seq;
  88        struct trace_entry      *ent;
  89        unsigned long           lost_events;
  90        int                     leftover;
  91        int                     ent_size;
  92        int                     cpu;
  93        u64                     ts;
  94
  95        loff_t                  pos;
  96        long                    idx;
  97
  98        /* All new field here will be zeroed out in pipe_read */
  99};
 100
 101enum trace_iter_flags {
 102        TRACE_FILE_LAT_FMT      = 1,
 103        TRACE_FILE_ANNOTATE     = 2,
 104        TRACE_FILE_TIME_IN_NS   = 4,
 105};
 106
 107
 108typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 109                                      int flags, struct trace_event *event);
 110
 111struct trace_event_functions {
 112        trace_print_func        trace;
 113        trace_print_func        raw;
 114        trace_print_func        hex;
 115        trace_print_func        binary;
 116};
 117
 118struct trace_event {
 119        struct hlist_node               node;
 120        struct list_head                list;
 121        int                             type;
 122        struct trace_event_functions    *funcs;
 123};
 124
 125extern int register_ftrace_event(struct trace_event *event);
 126extern int unregister_ftrace_event(struct trace_event *event);
 127
 128/* Return values for print_line callback */
 129enum print_line_t {
 130        TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
 131        TRACE_TYPE_HANDLED      = 1,
 132        TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
 133        TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
 134};
 135
 136void tracing_generic_entry_update(struct trace_entry *entry,
 137                                  unsigned long flags,
 138                                  int pc);
 139struct ftrace_event_file;
 140
 141struct ring_buffer_event *
 142trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
 143                                struct ftrace_event_file *ftrace_file,
 144                                int type, unsigned long len,
 145                                unsigned long flags, int pc);
 146struct ring_buffer_event *
 147trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
 148                                  int type, unsigned long len,
 149                                  unsigned long flags, int pc);
 150void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 151                                        struct ring_buffer_event *event,
 152                                        unsigned long flags, int pc);
 153void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 154                                struct ring_buffer_event *event,
 155                                unsigned long flags, int pc);
 156void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
 157                                     struct ring_buffer_event *event,
 158                                     unsigned long flags, int pc,
 159                                     struct pt_regs *regs);
 160void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 161                                         struct ring_buffer_event *event);
 162
 163void tracing_record_cmdline(struct task_struct *tsk);
 164
 165struct event_filter;
 166
 167enum trace_reg {
 168        TRACE_REG_REGISTER,
 169        TRACE_REG_UNREGISTER,
 170#ifdef CONFIG_PERF_EVENTS
 171        TRACE_REG_PERF_REGISTER,
 172        TRACE_REG_PERF_UNREGISTER,
 173        TRACE_REG_PERF_OPEN,
 174        TRACE_REG_PERF_CLOSE,
 175        TRACE_REG_PERF_ADD,
 176        TRACE_REG_PERF_DEL,
 177#endif
 178};
 179
 180struct ftrace_event_call;
 181
 182struct ftrace_event_class {
 183        char                    *system;
 184        void                    *probe;
 185#ifdef CONFIG_PERF_EVENTS
 186        void                    *perf_probe;
 187#endif
 188        int                     (*reg)(struct ftrace_event_call *event,
 189                                       enum trace_reg type, void *data);
 190        int                     (*define_fields)(struct ftrace_event_call *);
 191        struct list_head        *(*get_fields)(struct ftrace_event_call *);
 192        struct list_head        fields;
 193        int                     (*raw_init)(struct ftrace_event_call *);
 194};
 195
 196extern int ftrace_event_reg(struct ftrace_event_call *event,
 197                            enum trace_reg type, void *data);
 198
 199enum {
 200        TRACE_EVENT_FL_FILTERED_BIT,
 201        TRACE_EVENT_FL_CAP_ANY_BIT,
 202        TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 203        TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 204        TRACE_EVENT_FL_WAS_ENABLED_BIT,
 205        TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 206};
 207
 208/*
 209 * Event flags:
 210 *  FILTERED      - The event has a filter attached
 211 *  CAP_ANY       - Any user can enable for perf
 212 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 213 *  IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file
 214 *  WAS_ENABLED   - Set and stays set when an event was ever enabled
 215 *                    (used for module unloading, if a module event is enabled,
 216 *                     it is best to clear the buffers that used it).
 217 *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
 218 */
 219enum {
 220        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
 221        TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 222        TRACE_EVENT_FL_NO_SET_FILTER    = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 223        TRACE_EVENT_FL_IGNORE_ENABLE    = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 224        TRACE_EVENT_FL_WAS_ENABLED      = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 225        TRACE_EVENT_FL_USE_CALL_FILTER  = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 226};
 227
 228struct ftrace_event_call {
 229        struct list_head        list;
 230        struct ftrace_event_class *class;
 231        char                    *name;
 232        struct trace_event      event;
 233        const char              *print_fmt;
 234        struct event_filter     *filter;
 235        struct list_head        *files;
 236        void                    *mod;
 237        void                    *data;
 238        /*
 239         *   bit 0:             filter_active
 240         *   bit 1:             allow trace by non root (cap any)
 241         *   bit 2:             failed to apply filter
 242         *   bit 3:             ftrace internal event (do not enable)
 243         *   bit 4:             Event was enabled by module
 244         *   bit 5:             use call filter rather than file filter
 245         */
 246        int                     flags; /* static flags of different events */
 247
 248#ifdef CONFIG_PERF_EVENTS
 249        int                             perf_refcount;
 250        struct hlist_head __percpu      *perf_events;
 251
 252        int     (*perf_perm)(struct ftrace_event_call *,
 253                             struct perf_event *);
 254#endif
 255};
 256
 257struct trace_array;
 258struct ftrace_subsystem_dir;
 259
 260enum {
 261        FTRACE_EVENT_FL_ENABLED_BIT,
 262        FTRACE_EVENT_FL_RECORDED_CMD_BIT,
 263        FTRACE_EVENT_FL_FILTERED_BIT,
 264        FTRACE_EVENT_FL_NO_SET_FILTER_BIT,
 265        FTRACE_EVENT_FL_SOFT_MODE_BIT,
 266        FTRACE_EVENT_FL_SOFT_DISABLED_BIT,
 267};
 268
 269/*
 270 * Ftrace event file flags:
 271 *  ENABLED       - The event is enabled
 272 *  RECORDED_CMD  - The comms should be recorded at sched_switch
 273 *  FILTERED      - The event has a filter attached
 274 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 275 *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
 276 *  SOFT_DISABLED - When set, do not trace the event (even though its
 277 *                   tracepoint may be enabled)
 278 */
 279enum {
 280        FTRACE_EVENT_FL_ENABLED         = (1 << FTRACE_EVENT_FL_ENABLED_BIT),
 281        FTRACE_EVENT_FL_RECORDED_CMD    = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT),
 282        FTRACE_EVENT_FL_FILTERED        = (1 << FTRACE_EVENT_FL_FILTERED_BIT),
 283        FTRACE_EVENT_FL_NO_SET_FILTER   = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT),
 284        FTRACE_EVENT_FL_SOFT_MODE       = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT),
 285        FTRACE_EVENT_FL_SOFT_DISABLED   = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT),
 286};
 287
 288struct ftrace_event_file {
 289        struct list_head                list;
 290        struct ftrace_event_call        *event_call;
 291        struct event_filter             *filter;
 292        struct dentry                   *dir;
 293        struct trace_array              *tr;
 294        struct ftrace_subsystem_dir     *system;
 295
 296        /*
 297         * 32 bit flags:
 298         *   bit 0:             enabled
 299         *   bit 1:             enabled cmd record
 300         *   bit 2:             enable/disable with the soft disable bit
 301         *   bit 3:             soft disabled
 302         *
 303         * Note: The bits must be set atomically to prevent races
 304         * from other writers. Reads of flags do not need to be in
 305         * sync as they occur in critical sections. But the way flags
 306         * is currently used, these changes do not affect the code
 307         * except that when a change is made, it may have a slight
 308         * delay in propagating the changes to other CPUs due to
 309         * caching and such. Which is mostly OK ;-)
 310         */
 311        unsigned long           flags;
 312        atomic_t                sm_ref; /* soft-mode reference counter */
 313};
 314
 315#define __TRACE_EVENT_FLAGS(name, value)                                \
 316        static int __init trace_init_flags_##name(void)                 \
 317        {                                                               \
 318                event_##name.flags = value;                             \
 319                return 0;                                               \
 320        }                                                               \
 321        early_initcall(trace_init_flags_##name);
 322
 323#define __TRACE_EVENT_PERF_PERM(name, expr...)                          \
 324        static int perf_perm_##name(struct ftrace_event_call *tp_event, \
 325                                    struct perf_event *p_event)         \
 326        {                                                               \
 327                return ({ expr; });                                     \
 328        }                                                               \
 329        static int __init trace_init_perf_perm_##name(void)             \
 330        {                                                               \
 331                event_##name.perf_perm = &perf_perm_##name;             \
 332                return 0;                                               \
 333        }                                                               \
 334        early_initcall(trace_init_perf_perm_##name);
 335
 336#define PERF_MAX_TRACE_SIZE     2048
 337
 338#define MAX_FILTER_STR_VAL      256     /* Should handle KSYM_SYMBOL_LEN */
 339
 340extern void destroy_preds(struct ftrace_event_file *file);
 341extern void destroy_call_preds(struct ftrace_event_call *call);
 342extern int filter_match_preds(struct event_filter *filter, void *rec);
 343
 344extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
 345                                struct ring_buffer *buffer,
 346                                struct ring_buffer_event *event);
 347extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
 348                                     struct ring_buffer *buffer,
 349                                     struct ring_buffer_event *event);
 350
 351enum {
 352        FILTER_OTHER = 0,
 353        FILTER_STATIC_STRING,
 354        FILTER_DYN_STRING,
 355        FILTER_PTR_STRING,
 356        FILTER_TRACE_FN,
 357};
 358
 359#define EVENT_STORAGE_SIZE 128
 360extern struct mutex event_storage_mutex;
 361extern char event_storage[EVENT_STORAGE_SIZE];
 362
 363extern int trace_event_raw_init(struct ftrace_event_call *call);
 364extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 365                              const char *name, int offset, int size,
 366                              int is_signed, int filter_type);
 367extern int trace_add_event_call(struct ftrace_event_call *call);
 368extern int trace_remove_event_call(struct ftrace_event_call *call);
 369
 370#define is_signed_type(type)    (((type)(-1)) < (type)1)
 371
 372int trace_set_clr_event(const char *system, const char *event, int set);
 373
 374/*
 375 * The double __builtin_constant_p is because gcc will give us an error
 376 * if we try to allocate the static variable to fmt if it is not a
 377 * constant. Even with the outer if statement optimizing out.
 378 */
 379#define event_trace_printk(ip, fmt, args...)                            \
 380do {                                                                    \
 381        __trace_printk_check_format(fmt, ##args);                       \
 382        tracing_record_cmdline(current);                                \
 383        if (__builtin_constant_p(fmt)) {                                \
 384                static const char *trace_printk_fmt                     \
 385                  __attribute__((section("__trace_printk_fmt"))) =      \
 386                        __builtin_constant_p(fmt) ? fmt : NULL;         \
 387                                                                        \
 388                __trace_bprintk(ip, trace_printk_fmt, ##args);          \
 389        } else                                                          \
 390                __trace_printk(ip, fmt, ##args);                        \
 391} while (0)
 392
 393/**
 394 * tracepoint_string - register constant persistent string to trace system
 395 * @str - a constant persistent string that will be referenced in tracepoints
 396 *
 397 * If constant strings are being used in tracepoints, it is faster and
 398 * more efficient to just save the pointer to the string and reference
 399 * that with a printf "%s" instead of saving the string in the ring buffer
 400 * and wasting space and time.
 401 *
 402 * The problem with the above approach is that userspace tools that read
 403 * the binary output of the trace buffers do not have access to the string.
 404 * Instead they just show the address of the string which is not very
 405 * useful to users.
 406 *
 407 * With tracepoint_string(), the string will be registered to the tracing
 408 * system and exported to userspace via the debugfs/tracing/printk_formats
 409 * file that maps the string address to the string text. This way userspace
 410 * tools that read the binary buffers have a way to map the pointers to
 411 * the ASCII strings they represent.
 412 *
 413 * The @str used must be a constant string and persistent as it would not
 414 * make sense to show a string that no longer exists. But it is still fine
 415 * to be used with modules, because when modules are unloaded, if they
 416 * had tracepoints, the ring buffers are cleared too. As long as the string
 417 * does not change during the life of the module, it is fine to use
 418 * tracepoint_string() within a module.
 419 */
 420#define tracepoint_string(str)                                          \
 421        ({                                                              \
 422                static const char *___tp_str __tracepoint_string = str; \
 423                ___tp_str;                                              \
 424        })
 425#define __tracepoint_string     __attribute__((section("__tracepoint_str")))
 426
 427#ifdef CONFIG_PERF_EVENTS
 428struct perf_event;
 429
 430DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 431
 432extern int  perf_trace_init(struct perf_event *event);
 433extern void perf_trace_destroy(struct perf_event *event);
 434extern int  perf_trace_add(struct perf_event *event, int flags);
 435extern void perf_trace_del(struct perf_event *event, int flags);
 436extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 437                                     char *filter_str);
 438extern void ftrace_profile_free_filter(struct perf_event *event);
 439extern void *perf_trace_buf_prepare(int size, unsigned short type,
 440                                    struct pt_regs *regs, int *rctxp);
 441
 442static inline void
 443perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 444                       u64 count, struct pt_regs *regs, void *head,
 445                       struct task_struct *task)
 446{
 447        perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
 448}
 449#endif
 450
 451#endif /* _LINUX_FTRACE_EVENT_H */
 452