linux/include/linux/trace_events.h
<<
>>
Prefs
   1
   2#ifndef _LINUX_TRACE_EVENT_H
   3#define _LINUX_TRACE_EVENT_H
   4
   5#include <linux/ring_buffer.h>
   6#include <linux/trace_seq.h>
   7#include <linux/percpu.h>
   8#include <linux/hardirq.h>
   9#include <linux/perf_event.h>
  10#include <linux/tracepoint.h>
  11
  12struct trace_array;
  13struct trace_buffer;
  14struct tracer;
  15struct dentry;
  16struct bpf_prog;
  17
  18const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
  19                                  unsigned long flags,
  20                                  const struct trace_print_flags *flag_array);
  21
  22const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
  23                                    const struct trace_print_flags *symbol_array);
  24
  25#if BITS_PER_LONG == 32
  26const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
  27                      unsigned long long flags,
  28                      const struct trace_print_flags_u64 *flag_array);
  29
  30const char *trace_print_symbols_seq_u64(struct trace_seq *p,
  31                                        unsigned long long val,
  32                                        const struct trace_print_flags_u64
  33                                                                 *symbol_array);
  34#endif
  35
  36const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
  37                                    unsigned int bitmask_size);
  38
  39const char *trace_print_hex_seq(struct trace_seq *p,
  40                                const unsigned char *buf, int len,
  41                                bool concatenate);
  42
  43const char *trace_print_array_seq(struct trace_seq *p,
  44                                   const void *buf, int count,
  45                                   size_t el_size);
  46
  47struct trace_iterator;
  48struct trace_event;
  49
  50int trace_raw_output_prep(struct trace_iterator *iter,
  51                          struct trace_event *event);
  52
  53/*
  54 * The trace entry - the most basic unit of tracing. This is what
  55 * is printed in the end as a single line in the trace output, such as:
  56 *
  57 *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  58 */
  59struct trace_entry {
  60        unsigned short          type;
  61        unsigned char           flags;
  62        unsigned char           preempt_count;
  63        int                     pid;
  64};
  65
  66#define TRACE_EVENT_TYPE_MAX                                            \
  67        ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
  68
  69/*
  70 * Trace iterator - used by printout routines who present trace
  71 * results to users and which routines might sleep, etc:
  72 */
  73struct trace_iterator {
  74        struct trace_array      *tr;
  75        struct tracer           *trace;
  76        struct trace_buffer     *trace_buffer;
  77        void                    *private;
  78        int                     cpu_file;
  79        struct mutex            mutex;
  80        struct ring_buffer_iter **buffer_iter;
  81        unsigned long           iter_flags;
  82
  83        /* trace_seq for __print_flags() and __print_symbolic() etc. */
  84        struct trace_seq        tmp_seq;
  85
  86        cpumask_var_t           started;
  87
  88        /* it's true when current open file is snapshot */
  89        bool                    snapshot;
  90
  91        /* The below is zeroed out in pipe_read */
  92        struct trace_seq        seq;
  93        struct trace_entry      *ent;
  94        unsigned long           lost_events;
  95        int                     leftover;
  96        int                     ent_size;
  97        int                     cpu;
  98        u64                     ts;
  99
 100        loff_t                  pos;
 101        long                    idx;
 102
 103        /* All new field here will be zeroed out in pipe_read */
 104};
 105
 106enum trace_iter_flags {
 107        TRACE_FILE_LAT_FMT      = 1,
 108        TRACE_FILE_ANNOTATE     = 2,
 109        TRACE_FILE_TIME_IN_NS   = 4,
 110};
 111
 112
 113typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 114                                      int flags, struct trace_event *event);
 115
 116struct trace_event_functions {
 117        trace_print_func        trace;
 118        trace_print_func        raw;
 119        trace_print_func        hex;
 120        trace_print_func        binary;
 121};
 122
 123struct trace_event {
 124        struct hlist_node               node;
 125        struct list_head                list;
 126        int                             type;
 127        struct trace_event_functions    *funcs;
 128};
 129
 130extern int register_trace_event(struct trace_event *event);
 131extern int unregister_trace_event(struct trace_event *event);
 132
 133/* Return values for print_line callback */
 134enum print_line_t {
 135        TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
 136        TRACE_TYPE_HANDLED      = 1,
 137        TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
 138        TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
 139};
 140
 141enum print_line_t trace_handle_return(struct trace_seq *s);
 142
 143void tracing_generic_entry_update(struct trace_entry *entry,
 144                                  unsigned long flags,
 145                                  int pc);
 146struct trace_event_file;
 147
 148struct ring_buffer_event *
 149trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
 150                                struct trace_event_file *trace_file,
 151                                int type, unsigned long len,
 152                                unsigned long flags, int pc);
 153
 154#define TRACE_RECORD_CMDLINE    BIT(0)
 155#define TRACE_RECORD_TGID       BIT(1)
 156
 157void tracing_record_taskinfo(struct task_struct *task, int flags);
 158void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
 159                                          struct task_struct *next, int flags);
 160
 161void tracing_record_cmdline(struct task_struct *task);
 162void tracing_record_tgid(struct task_struct *task);
 163
 164int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
 165
 166struct event_filter;
 167
 168enum trace_reg {
 169        TRACE_REG_REGISTER,
 170        TRACE_REG_UNREGISTER,
 171#ifdef CONFIG_PERF_EVENTS
 172        TRACE_REG_PERF_REGISTER,
 173        TRACE_REG_PERF_UNREGISTER,
 174        TRACE_REG_PERF_OPEN,
 175        TRACE_REG_PERF_CLOSE,
 176        TRACE_REG_PERF_ADD,
 177        TRACE_REG_PERF_DEL,
 178#endif
 179};
 180
 181struct trace_event_call;
 182
 183struct trace_event_class {
 184        const char              *system;
 185        void                    *probe;
 186#ifdef CONFIG_PERF_EVENTS
 187        void                    *perf_probe;
 188#endif
 189        int                     (*reg)(struct trace_event_call *event,
 190                                       enum trace_reg type, void *data);
 191        int                     (*define_fields)(struct trace_event_call *);
 192        struct list_head        *(*get_fields)(struct trace_event_call *);
 193        struct list_head        fields;
 194        int                     (*raw_init)(struct trace_event_call *);
 195};
 196
 197extern int trace_event_reg(struct trace_event_call *event,
 198                            enum trace_reg type, void *data);
 199
 200struct trace_event_buffer {
 201        struct ring_buffer              *buffer;
 202        struct ring_buffer_event        *event;
 203        struct trace_event_file         *trace_file;
 204        void                            *entry;
 205        unsigned long                   flags;
 206        int                             pc;
 207};
 208
 209void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 210                                  struct trace_event_file *trace_file,
 211                                  unsigned long len);
 212
 213void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
 214
 215enum {
 216        TRACE_EVENT_FL_FILTERED_BIT,
 217        TRACE_EVENT_FL_CAP_ANY_BIT,
 218        TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 219        TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 220        TRACE_EVENT_FL_WAS_ENABLED_BIT,
 221        TRACE_EVENT_FL_TRACEPOINT_BIT,
 222        TRACE_EVENT_FL_KPROBE_BIT,
 223        TRACE_EVENT_FL_UPROBE_BIT,
 224};
 225
 226/*
 227 * Event flags:
 228 *  FILTERED      - The event has a filter attached
 229 *  CAP_ANY       - Any user can enable for perf
 230 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 231 *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
 232 *  WAS_ENABLED   - Set and stays set when an event was ever enabled
 233 *                    (used for module unloading, if a module event is enabled,
 234 *                     it is best to clear the buffers that used it).
 235 *  TRACEPOINT    - Event is a tracepoint
 236 *  KPROBE        - Event is a kprobe
 237 *  UPROBE        - Event is a uprobe
 238 */
 239enum {
 240        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
 241        TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 242        TRACE_EVENT_FL_NO_SET_FILTER    = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 243        TRACE_EVENT_FL_IGNORE_ENABLE    = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 244        TRACE_EVENT_FL_WAS_ENABLED      = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 245        TRACE_EVENT_FL_TRACEPOINT       = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 246        TRACE_EVENT_FL_KPROBE           = (1 << TRACE_EVENT_FL_KPROBE_BIT),
 247        TRACE_EVENT_FL_UPROBE           = (1 << TRACE_EVENT_FL_UPROBE_BIT),
 248};
 249
 250#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
 251
 252struct trace_event_call {
 253        struct list_head        list;
 254        struct trace_event_class *class;
 255        union {
 256                char                    *name;
 257                /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
 258                struct tracepoint       *tp;
 259        };
 260        struct trace_event      event;
 261        char                    *print_fmt;
 262        struct event_filter     *filter;
 263        void                    *mod;
 264        void                    *data;
 265        /*
 266         *   bit 0:             filter_active
 267         *   bit 1:             allow trace by non root (cap any)
 268         *   bit 2:             failed to apply filter
 269         *   bit 3:             trace internal event (do not enable)
 270         *   bit 4:             Event was enabled by module
 271         *   bit 5:             use call filter rather than file filter
 272         *   bit 6:             Event is a tracepoint
 273         */
 274        int                     flags; /* static flags of different events */
 275
 276#ifdef CONFIG_PERF_EVENTS
 277        int                             perf_refcount;
 278        struct hlist_head __percpu      *perf_events;
 279        struct bpf_prog                 *prog;
 280
 281        int     (*perf_perm)(struct trace_event_call *,
 282                             struct perf_event *);
 283#endif
 284};
 285
 286static inline const char *
 287trace_event_name(struct trace_event_call *call)
 288{
 289        if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
 290                return call->tp ? call->tp->name : NULL;
 291        else
 292                return call->name;
 293}
 294
 295struct trace_array;
 296struct trace_subsystem_dir;
 297
 298enum {
 299        EVENT_FILE_FL_ENABLED_BIT,
 300        EVENT_FILE_FL_RECORDED_CMD_BIT,
 301        EVENT_FILE_FL_RECORDED_TGID_BIT,
 302        EVENT_FILE_FL_FILTERED_BIT,
 303        EVENT_FILE_FL_NO_SET_FILTER_BIT,
 304        EVENT_FILE_FL_SOFT_MODE_BIT,
 305        EVENT_FILE_FL_SOFT_DISABLED_BIT,
 306        EVENT_FILE_FL_TRIGGER_MODE_BIT,
 307        EVENT_FILE_FL_TRIGGER_COND_BIT,
 308        EVENT_FILE_FL_PID_FILTER_BIT,
 309};
 310
 311/*
 312 * Event file flags:
 313 *  ENABLED       - The event is enabled
 314 *  RECORDED_CMD  - The comms should be recorded at sched_switch
 315 *  RECORDED_TGID - The tgids should be recorded at sched_switch
 316 *  FILTERED      - The event has a filter attached
 317 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 318 *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
 319 *  SOFT_DISABLED - When set, do not trace the event (even though its
 320 *                   tracepoint may be enabled)
 321 *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
 322 *  TRIGGER_COND  - When set, one or more triggers has an associated filter
 323 *  PID_FILTER    - When set, the event is filtered based on pid
 324 */
 325enum {
 326        EVENT_FILE_FL_ENABLED           = (1 << EVENT_FILE_FL_ENABLED_BIT),
 327        EVENT_FILE_FL_RECORDED_CMD      = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
 328        EVENT_FILE_FL_RECORDED_TGID     = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
 329        EVENT_FILE_FL_FILTERED          = (1 << EVENT_FILE_FL_FILTERED_BIT),
 330        EVENT_FILE_FL_NO_SET_FILTER     = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
 331        EVENT_FILE_FL_SOFT_MODE         = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
 332        EVENT_FILE_FL_SOFT_DISABLED     = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
 333        EVENT_FILE_FL_TRIGGER_MODE      = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 334        EVENT_FILE_FL_TRIGGER_COND      = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
 335        EVENT_FILE_FL_PID_FILTER        = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 336};
 337
 338struct trace_event_file {
 339        struct list_head                list;
 340        struct trace_event_call         *event_call;
 341        struct event_filter __rcu       *filter;
 342        struct dentry                   *dir;
 343        struct trace_array              *tr;
 344        struct trace_subsystem_dir      *system;
 345        struct list_head                triggers;
 346
 347        /*
 348         * 32 bit flags:
 349         *   bit 0:             enabled
 350         *   bit 1:             enabled cmd record
 351         *   bit 2:             enable/disable with the soft disable bit
 352         *   bit 3:             soft disabled
 353         *   bit 4:             trigger enabled
 354         *
 355         * Note: The bits must be set atomically to prevent races
 356         * from other writers. Reads of flags do not need to be in
 357         * sync as they occur in critical sections. But the way flags
 358         * is currently used, these changes do not affect the code
 359         * except that when a change is made, it may have a slight
 360         * delay in propagating the changes to other CPUs due to
 361         * caching and such. Which is mostly OK ;-)
 362         */
 363        unsigned long           flags;
 364        atomic_t                sm_ref; /* soft-mode reference counter */
 365        atomic_t                tm_ref; /* trigger-mode reference counter */
 366};
 367
 368#define __TRACE_EVENT_FLAGS(name, value)                                \
 369        static int __init trace_init_flags_##name(void)                 \
 370        {                                                               \
 371                event_##name.flags |= value;                            \
 372                return 0;                                               \
 373        }                                                               \
 374        early_initcall(trace_init_flags_##name);
 375
 376#define __TRACE_EVENT_PERF_PERM(name, expr...)                          \
 377        static int perf_perm_##name(struct trace_event_call *tp_event, \
 378                                    struct perf_event *p_event)         \
 379        {                                                               \
 380                return ({ expr; });                                     \
 381        }                                                               \
 382        static int __init trace_init_perf_perm_##name(void)             \
 383        {                                                               \
 384                event_##name.perf_perm = &perf_perm_##name;             \
 385                return 0;                                               \
 386        }                                                               \
 387        early_initcall(trace_init_perf_perm_##name);
 388
 389#define PERF_MAX_TRACE_SIZE     2048
 390
 391#define MAX_FILTER_STR_VAL      256     /* Should handle KSYM_SYMBOL_LEN */
 392
 393enum event_trigger_type {
 394        ETT_NONE                = (0),
 395        ETT_TRACE_ONOFF         = (1 << 0),
 396        ETT_SNAPSHOT            = (1 << 1),
 397        ETT_STACKTRACE          = (1 << 2),
 398        ETT_EVENT_ENABLE        = (1 << 3),
 399        ETT_EVENT_HIST          = (1 << 4),
 400        ETT_HIST_ENABLE         = (1 << 5),
 401};
 402
 403extern int filter_match_preds(struct event_filter *filter, void *rec);
 404
 405extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 406                                                   void *rec);
 407extern void event_triggers_post_call(struct trace_event_file *file,
 408                                     enum event_trigger_type tt,
 409                                     void *rec);
 410
 411bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 412
 413/**
 414 * trace_trigger_soft_disabled - do triggers and test if soft disabled
 415 * @file: The file pointer of the event to test
 416 *
 417 * If any triggers without filters are attached to this event, they
 418 * will be called here. If the event is soft disabled and has no
 419 * triggers that require testing the fields, it will return true,
 420 * otherwise false.
 421 */
 422static inline bool
 423trace_trigger_soft_disabled(struct trace_event_file *file)
 424{
 425        unsigned long eflags = file->flags;
 426
 427        if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
 428                if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
 429                        event_triggers_call(file, NULL);
 430                if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 431                        return true;
 432                if (eflags & EVENT_FILE_FL_PID_FILTER)
 433                        return trace_event_ignore_this_pid(file);
 434        }
 435        return false;
 436}
 437
 438#ifdef CONFIG_BPF_EVENTS
 439unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 440#else
 441static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 442{
 443        return 1;
 444}
 445#endif
 446
 447enum {
 448        FILTER_OTHER = 0,
 449        FILTER_STATIC_STRING,
 450        FILTER_DYN_STRING,
 451        FILTER_PTR_STRING,
 452        FILTER_TRACE_FN,
 453        FILTER_COMM,
 454        FILTER_CPU,
 455};
 456
 457extern int trace_event_raw_init(struct trace_event_call *call);
 458extern int trace_define_field(struct trace_event_call *call, const char *type,
 459                              const char *name, int offset, int size,
 460                              int is_signed, int filter_type);
 461extern int trace_add_event_call(struct trace_event_call *call);
 462extern int trace_remove_event_call(struct trace_event_call *call);
 463extern int trace_event_get_offsets(struct trace_event_call *call);
 464
 465#define is_signed_type(type)    (((type)(-1)) < (type)1)
 466
 467int trace_set_clr_event(const char *system, const char *event, int set);
 468
 469/*
 470 * The double __builtin_constant_p is because gcc will give us an error
 471 * if we try to allocate the static variable to fmt if it is not a
 472 * constant. Even with the outer if statement optimizing out.
 473 */
 474#define event_trace_printk(ip, fmt, args...)                            \
 475do {                                                                    \
 476        __trace_printk_check_format(fmt, ##args);                       \
 477        tracing_record_cmdline(current);                                \
 478        if (__builtin_constant_p(fmt)) {                                \
 479                static const char *trace_printk_fmt                     \
 480                  __attribute__((section("__trace_printk_fmt"))) =      \
 481                        __builtin_constant_p(fmt) ? fmt : NULL;         \
 482                                                                        \
 483                __trace_bprintk(ip, trace_printk_fmt, ##args);          \
 484        } else                                                          \
 485                __trace_printk(ip, fmt, ##args);                        \
 486} while (0)
 487
 488#ifdef CONFIG_PERF_EVENTS
 489struct perf_event;
 490
 491DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 492
 493extern int  perf_trace_init(struct perf_event *event);
 494extern void perf_trace_destroy(struct perf_event *event);
 495extern int  perf_trace_add(struct perf_event *event, int flags);
 496extern void perf_trace_del(struct perf_event *event, int flags);
 497extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 498                                     char *filter_str);
 499extern void ftrace_profile_free_filter(struct perf_event *event);
 500void perf_trace_buf_update(void *record, u16 type);
 501void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 502
 503void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 504                               struct trace_event_call *call, u64 count,
 505                               struct pt_regs *regs, struct hlist_head *head,
 506                               struct task_struct *task);
 507
 508static inline void
 509perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
 510                       u64 count, struct pt_regs *regs, void *head,
 511                       struct task_struct *task, struct perf_event *event)
 512{
 513        perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
 514}
 515#endif
 516
 517#endif /* _LINUX_TRACE_EVENT_H */
 518