linux/include/linux/trace_events.h
<<
>>
Prefs
   1
   2#ifndef _LINUX_TRACE_EVENT_H
   3#define _LINUX_TRACE_EVENT_H
   4
   5#include <linux/ring_buffer.h>
   6#include <linux/trace_seq.h>
   7#include <linux/percpu.h>
   8#include <linux/hardirq.h>
   9#include <linux/perf_event.h>
  10#include <linux/tracepoint.h>
  11
  12struct trace_array;
  13struct trace_buffer;
  14struct tracer;
  15struct dentry;
  16struct bpf_prog;
  17
  18const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
  19                                  unsigned long flags,
  20                                  const struct trace_print_flags *flag_array);
  21
  22const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
  23                                    const struct trace_print_flags *symbol_array);
  24
  25#if BITS_PER_LONG == 32
  26const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
  27                      unsigned long long flags,
  28                      const struct trace_print_flags_u64 *flag_array);
  29
  30const char *trace_print_symbols_seq_u64(struct trace_seq *p,
  31                                        unsigned long long val,
  32                                        const struct trace_print_flags_u64
  33                                                                 *symbol_array);
  34#endif
  35
  36const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
  37                                    unsigned int bitmask_size);
  38
  39const char *trace_print_hex_seq(struct trace_seq *p,
  40                                const unsigned char *buf, int len,
  41                                bool concatenate);
  42
  43const char *trace_print_array_seq(struct trace_seq *p,
  44                                   const void *buf, int count,
  45                                   size_t el_size);
  46
  47struct trace_iterator;
  48struct trace_event;
  49
  50int trace_raw_output_prep(struct trace_iterator *iter,
  51                          struct trace_event *event);
  52
  53/*
  54 * The trace entry - the most basic unit of tracing. This is what
  55 * is printed in the end as a single line in the trace output, such as:
  56 *
  57 *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  58 */
  59struct trace_entry {
  60        unsigned short          type;
  61        unsigned char           flags;
  62        unsigned char           preempt_count;
  63        int                     pid;
  64};
  65
  66#define TRACE_EVENT_TYPE_MAX                                            \
  67        ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
  68
  69/*
  70 * Trace iterator - used by printout routines who present trace
  71 * results to users and which routines might sleep, etc:
  72 */
  73struct trace_iterator {
  74        struct trace_array      *tr;
  75        struct tracer           *trace;
  76        struct trace_buffer     *trace_buffer;
  77        void                    *private;
  78        int                     cpu_file;
  79        struct mutex            mutex;
  80        struct ring_buffer_iter **buffer_iter;
  81        unsigned long           iter_flags;
  82
  83        /* trace_seq for __print_flags() and __print_symbolic() etc. */
  84        struct trace_seq        tmp_seq;
  85
  86        cpumask_var_t           started;
  87
  88        /* it's true when current open file is snapshot */
  89        bool                    snapshot;
  90
  91        /* The below is zeroed out in pipe_read */
  92        struct trace_seq        seq;
  93        struct trace_entry      *ent;
  94        unsigned long           lost_events;
  95        int                     leftover;
  96        int                     ent_size;
  97        int                     cpu;
  98        u64                     ts;
  99
 100        loff_t                  pos;
 101        long                    idx;
 102
 103        /* All new field here will be zeroed out in pipe_read */
 104};
 105
 106enum trace_iter_flags {
 107        TRACE_FILE_LAT_FMT      = 1,
 108        TRACE_FILE_ANNOTATE     = 2,
 109        TRACE_FILE_TIME_IN_NS   = 4,
 110};
 111
 112
 113typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 114                                      int flags, struct trace_event *event);
 115
 116struct trace_event_functions {
 117        trace_print_func        trace;
 118        trace_print_func        raw;
 119        trace_print_func        hex;
 120        trace_print_func        binary;
 121};
 122
 123struct trace_event {
 124        struct hlist_node               node;
 125        struct list_head                list;
 126        int                             type;
 127        struct trace_event_functions    *funcs;
 128};
 129
 130extern int register_trace_event(struct trace_event *event);
 131extern int unregister_trace_event(struct trace_event *event);
 132
 133/* Return values for print_line callback */
 134enum print_line_t {
 135        TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
 136        TRACE_TYPE_HANDLED      = 1,
 137        TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
 138        TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
 139};
 140
 141/*
 142 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
 143 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
 144 * simplifies those functions and keeps them in sync.
 145 */
 146static inline enum print_line_t trace_handle_return(struct trace_seq *s)
 147{
 148        return trace_seq_has_overflowed(s) ?
 149                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
 150}
 151
 152void tracing_generic_entry_update(struct trace_entry *entry,
 153                                  unsigned long flags,
 154                                  int pc);
 155struct trace_event_file;
 156
 157struct ring_buffer_event *
 158trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
 159                                struct trace_event_file *trace_file,
 160                                int type, unsigned long len,
 161                                unsigned long flags, int pc);
 162
 163void tracing_record_cmdline(struct task_struct *tsk);
 164
 165int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
 166
 167struct event_filter;
 168
 169enum trace_reg {
 170        TRACE_REG_REGISTER,
 171        TRACE_REG_UNREGISTER,
 172#ifdef CONFIG_PERF_EVENTS
 173        TRACE_REG_PERF_REGISTER,
 174        TRACE_REG_PERF_UNREGISTER,
 175        TRACE_REG_PERF_OPEN,
 176        TRACE_REG_PERF_CLOSE,
 177        TRACE_REG_PERF_ADD,
 178        TRACE_REG_PERF_DEL,
 179#endif
 180};
 181
 182struct trace_event_call;
 183
 184struct trace_event_class {
 185        const char              *system;
 186        void                    *probe;
 187#ifdef CONFIG_PERF_EVENTS
 188        void                    *perf_probe;
 189#endif
 190        int                     (*reg)(struct trace_event_call *event,
 191                                       enum trace_reg type, void *data);
 192        int                     (*define_fields)(struct trace_event_call *);
 193        struct list_head        *(*get_fields)(struct trace_event_call *);
 194        struct list_head        fields;
 195        int                     (*raw_init)(struct trace_event_call *);
 196};
 197
 198extern int trace_event_reg(struct trace_event_call *event,
 199                            enum trace_reg type, void *data);
 200
 201struct trace_event_buffer {
 202        struct ring_buffer              *buffer;
 203        struct ring_buffer_event        *event;
 204        struct trace_event_file         *trace_file;
 205        void                            *entry;
 206        unsigned long                   flags;
 207        int                             pc;
 208};
 209
 210void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 211                                  struct trace_event_file *trace_file,
 212                                  unsigned long len);
 213
 214void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
 215
 216enum {
 217        TRACE_EVENT_FL_FILTERED_BIT,
 218        TRACE_EVENT_FL_CAP_ANY_BIT,
 219        TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 220        TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 221        TRACE_EVENT_FL_WAS_ENABLED_BIT,
 222        TRACE_EVENT_FL_TRACEPOINT_BIT,
 223        TRACE_EVENT_FL_KPROBE_BIT,
 224        TRACE_EVENT_FL_UPROBE_BIT,
 225};
 226
 227/*
 228 * Event flags:
 229 *  FILTERED      - The event has a filter attached
 230 *  CAP_ANY       - Any user can enable for perf
 231 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 232 *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
 233 *  WAS_ENABLED   - Set and stays set when an event was ever enabled
 234 *                    (used for module unloading, if a module event is enabled,
 235 *                     it is best to clear the buffers that used it).
 236 *  TRACEPOINT    - Event is a tracepoint
 237 *  KPROBE        - Event is a kprobe
 238 *  UPROBE        - Event is a uprobe
 239 */
 240enum {
 241        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
 242        TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 243        TRACE_EVENT_FL_NO_SET_FILTER    = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 244        TRACE_EVENT_FL_IGNORE_ENABLE    = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 245        TRACE_EVENT_FL_WAS_ENABLED      = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 246        TRACE_EVENT_FL_TRACEPOINT       = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 247        TRACE_EVENT_FL_KPROBE           = (1 << TRACE_EVENT_FL_KPROBE_BIT),
 248        TRACE_EVENT_FL_UPROBE           = (1 << TRACE_EVENT_FL_UPROBE_BIT),
 249};
 250
 251#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
 252
 253struct trace_event_call {
 254        struct list_head        list;
 255        struct trace_event_class *class;
 256        union {
 257                char                    *name;
 258                /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
 259                struct tracepoint       *tp;
 260        };
 261        struct trace_event      event;
 262        char                    *print_fmt;
 263        struct event_filter     *filter;
 264        void                    *mod;
 265        void                    *data;
 266        /*
 267         *   bit 0:             filter_active
 268         *   bit 1:             allow trace by non root (cap any)
 269         *   bit 2:             failed to apply filter
 270         *   bit 3:             trace internal event (do not enable)
 271         *   bit 4:             Event was enabled by module
 272         *   bit 5:             use call filter rather than file filter
 273         *   bit 6:             Event is a tracepoint
 274         */
 275        int                     flags; /* static flags of different events */
 276
 277#ifdef CONFIG_PERF_EVENTS
 278        int                             perf_refcount;
 279        struct hlist_head __percpu      *perf_events;
 280        struct bpf_prog                 *prog;
 281
 282        int     (*perf_perm)(struct trace_event_call *,
 283                             struct perf_event *);
 284#endif
 285};
 286
 287static inline const char *
 288trace_event_name(struct trace_event_call *call)
 289{
 290        if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
 291                return call->tp ? call->tp->name : NULL;
 292        else
 293                return call->name;
 294}
 295
 296struct trace_array;
 297struct trace_subsystem_dir;
 298
 299enum {
 300        EVENT_FILE_FL_ENABLED_BIT,
 301        EVENT_FILE_FL_RECORDED_CMD_BIT,
 302        EVENT_FILE_FL_FILTERED_BIT,
 303        EVENT_FILE_FL_NO_SET_FILTER_BIT,
 304        EVENT_FILE_FL_SOFT_MODE_BIT,
 305        EVENT_FILE_FL_SOFT_DISABLED_BIT,
 306        EVENT_FILE_FL_TRIGGER_MODE_BIT,
 307        EVENT_FILE_FL_TRIGGER_COND_BIT,
 308        EVENT_FILE_FL_PID_FILTER_BIT,
 309};
 310
 311/*
 312 * Event file flags:
 313 *  ENABLED       - The event is enabled
 314 *  RECORDED_CMD  - The comms should be recorded at sched_switch
 315 *  FILTERED      - The event has a filter attached
 316 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 317 *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
 318 *  SOFT_DISABLED - When set, do not trace the event (even though its
 319 *                   tracepoint may be enabled)
 320 *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
 321 *  TRIGGER_COND  - When set, one or more triggers has an associated filter
 322 *  PID_FILTER    - When set, the event is filtered based on pid
 323 */
 324enum {
 325        EVENT_FILE_FL_ENABLED           = (1 << EVENT_FILE_FL_ENABLED_BIT),
 326        EVENT_FILE_FL_RECORDED_CMD      = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
 327        EVENT_FILE_FL_FILTERED          = (1 << EVENT_FILE_FL_FILTERED_BIT),
 328        EVENT_FILE_FL_NO_SET_FILTER     = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
 329        EVENT_FILE_FL_SOFT_MODE         = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
 330        EVENT_FILE_FL_SOFT_DISABLED     = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
 331        EVENT_FILE_FL_TRIGGER_MODE      = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 332        EVENT_FILE_FL_TRIGGER_COND      = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
 333        EVENT_FILE_FL_PID_FILTER        = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 334};
 335
 336struct trace_event_file {
 337        struct list_head                list;
 338        struct trace_event_call         *event_call;
 339        struct event_filter             *filter;
 340        struct dentry                   *dir;
 341        struct trace_array              *tr;
 342        struct trace_subsystem_dir      *system;
 343        struct list_head                triggers;
 344
 345        /*
 346         * 32 bit flags:
 347         *   bit 0:             enabled
 348         *   bit 1:             enabled cmd record
 349         *   bit 2:             enable/disable with the soft disable bit
 350         *   bit 3:             soft disabled
 351         *   bit 4:             trigger enabled
 352         *
 353         * Note: The bits must be set atomically to prevent races
 354         * from other writers. Reads of flags do not need to be in
 355         * sync as they occur in critical sections. But the way flags
 356         * is currently used, these changes do not affect the code
 357         * except that when a change is made, it may have a slight
 358         * delay in propagating the changes to other CPUs due to
 359         * caching and such. Which is mostly OK ;-)
 360         */
 361        unsigned long           flags;
 362        atomic_t                sm_ref; /* soft-mode reference counter */
 363        atomic_t                tm_ref; /* trigger-mode reference counter */
 364};
 365
 366#define __TRACE_EVENT_FLAGS(name, value)                                \
 367        static int __init trace_init_flags_##name(void)                 \
 368        {                                                               \
 369                event_##name.flags |= value;                            \
 370                return 0;                                               \
 371        }                                                               \
 372        early_initcall(trace_init_flags_##name);
 373
 374#define __TRACE_EVENT_PERF_PERM(name, expr...)                          \
 375        static int perf_perm_##name(struct trace_event_call *tp_event, \
 376                                    struct perf_event *p_event)         \
 377        {                                                               \
 378                return ({ expr; });                                     \
 379        }                                                               \
 380        static int __init trace_init_perf_perm_##name(void)             \
 381        {                                                               \
 382                event_##name.perf_perm = &perf_perm_##name;             \
 383                return 0;                                               \
 384        }                                                               \
 385        early_initcall(trace_init_perf_perm_##name);
 386
 387#define PERF_MAX_TRACE_SIZE     2048
 388
 389#define MAX_FILTER_STR_VAL      256     /* Should handle KSYM_SYMBOL_LEN */
 390
 391enum event_trigger_type {
 392        ETT_NONE                = (0),
 393        ETT_TRACE_ONOFF         = (1 << 0),
 394        ETT_SNAPSHOT            = (1 << 1),
 395        ETT_STACKTRACE          = (1 << 2),
 396        ETT_EVENT_ENABLE        = (1 << 3),
 397        ETT_EVENT_HIST          = (1 << 4),
 398        ETT_HIST_ENABLE         = (1 << 5),
 399};
 400
 401extern int filter_match_preds(struct event_filter *filter, void *rec);
 402
 403extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 404                                                   void *rec);
 405extern void event_triggers_post_call(struct trace_event_file *file,
 406                                     enum event_trigger_type tt,
 407                                     void *rec);
 408
 409bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 410
 411/**
 412 * trace_trigger_soft_disabled - do triggers and test if soft disabled
 413 * @file: The file pointer of the event to test
 414 *
 415 * If any triggers without filters are attached to this event, they
 416 * will be called here. If the event is soft disabled and has no
 417 * triggers that require testing the fields, it will return true,
 418 * otherwise false.
 419 */
 420static inline bool
 421trace_trigger_soft_disabled(struct trace_event_file *file)
 422{
 423        unsigned long eflags = file->flags;
 424
 425        if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
 426                if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
 427                        event_triggers_call(file, NULL);
 428                if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 429                        return true;
 430                if (eflags & EVENT_FILE_FL_PID_FILTER)
 431                        return trace_event_ignore_this_pid(file);
 432        }
 433        return false;
 434}
 435
 436#ifdef CONFIG_BPF_EVENTS
 437unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 438#else
 439static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 440{
 441        return 1;
 442}
 443#endif
 444
 445enum {
 446        FILTER_OTHER = 0,
 447        FILTER_STATIC_STRING,
 448        FILTER_DYN_STRING,
 449        FILTER_PTR_STRING,
 450        FILTER_TRACE_FN,
 451        FILTER_COMM,
 452        FILTER_CPU,
 453};
 454
 455extern int trace_event_raw_init(struct trace_event_call *call);
 456extern int trace_define_field(struct trace_event_call *call, const char *type,
 457                              const char *name, int offset, int size,
 458                              int is_signed, int filter_type);
 459extern int trace_add_event_call(struct trace_event_call *call);
 460extern int trace_remove_event_call(struct trace_event_call *call);
 461extern int trace_event_get_offsets(struct trace_event_call *call);
 462
 463#define is_signed_type(type)    (((type)(-1)) < (type)1)
 464
 465int trace_set_clr_event(const char *system, const char *event, int set);
 466
 467/*
 468 * The double __builtin_constant_p is because gcc will give us an error
 469 * if we try to allocate the static variable to fmt if it is not a
 470 * constant. Even with the outer if statement optimizing out.
 471 */
 472#define event_trace_printk(ip, fmt, args...)                            \
 473do {                                                                    \
 474        __trace_printk_check_format(fmt, ##args);                       \
 475        tracing_record_cmdline(current);                                \
 476        if (__builtin_constant_p(fmt)) {                                \
 477                static const char *trace_printk_fmt                     \
 478                  __attribute__((section("__trace_printk_fmt"))) =      \
 479                        __builtin_constant_p(fmt) ? fmt : NULL;         \
 480                                                                        \
 481                __trace_bprintk(ip, trace_printk_fmt, ##args);          \
 482        } else                                                          \
 483                __trace_printk(ip, fmt, ##args);                        \
 484} while (0)
 485
 486#ifdef CONFIG_PERF_EVENTS
 487struct perf_event;
 488
 489DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 490
 491extern int  perf_trace_init(struct perf_event *event);
 492extern void perf_trace_destroy(struct perf_event *event);
 493extern int  perf_trace_add(struct perf_event *event, int flags);
 494extern void perf_trace_del(struct perf_event *event, int flags);
 495extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 496                                     char *filter_str);
 497extern void ftrace_profile_free_filter(struct perf_event *event);
 498void perf_trace_buf_update(void *record, u16 type);
 499void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 500
 501void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 502                               struct trace_event_call *call, u64 count,
 503                               struct pt_regs *regs, struct hlist_head *head,
 504                               struct task_struct *task);
 505
 506static inline void
 507perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
 508                       u64 count, struct pt_regs *regs, void *head,
 509                       struct task_struct *task)
 510{
 511        perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
 512}
 513#endif
 514
 515#endif /* _LINUX_TRACE_EVENT_H */
 516