linux/include/linux/trace_events.h
<<
>>
Prefs
   1
   2#ifndef _LINUX_TRACE_EVENT_H
   3#define _LINUX_TRACE_EVENT_H
   4
   5#include <linux/ring_buffer.h>
   6#include <linux/trace_seq.h>
   7#include <linux/percpu.h>
   8#include <linux/hardirq.h>
   9#include <linux/perf_event.h>
  10#include <linux/tracepoint.h>
  11
  12struct trace_array;
  13struct trace_buffer;
  14struct tracer;
  15struct dentry;
  16struct bpf_prog;
  17
  18const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
  19                                  unsigned long flags,
  20                                  const struct trace_print_flags *flag_array);
  21
  22const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
  23                                    const struct trace_print_flags *symbol_array);
  24
  25#if BITS_PER_LONG == 32
  26const char *trace_print_symbols_seq_u64(struct trace_seq *p,
  27                                        unsigned long long val,
  28                                        const struct trace_print_flags_u64
  29                                                                 *symbol_array);
  30#endif
  31
  32const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
  33                                    unsigned int bitmask_size);
  34
  35const char *trace_print_hex_seq(struct trace_seq *p,
  36                                const unsigned char *buf, int len);
  37
  38const char *trace_print_array_seq(struct trace_seq *p,
  39                                   const void *buf, int count,
  40                                   size_t el_size);
  41
  42struct trace_iterator;
  43struct trace_event;
  44
  45int trace_raw_output_prep(struct trace_iterator *iter,
  46                          struct trace_event *event);
  47
  48/*
  49 * The trace entry - the most basic unit of tracing. This is what
  50 * is printed in the end as a single line in the trace output, such as:
  51 *
  52 *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  53 */
  54struct trace_entry {
  55        unsigned short          type;
  56        unsigned char           flags;
  57        unsigned char           preempt_count;
  58        int                     pid;
  59};
  60
  61#define TRACE_EVENT_TYPE_MAX                                            \
  62        ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
  63
  64/*
  65 * Trace iterator - used by printout routines who present trace
  66 * results to users and which routines might sleep, etc:
  67 */
  68struct trace_iterator {
  69        struct trace_array      *tr;
  70        struct tracer           *trace;
  71        struct trace_buffer     *trace_buffer;
  72        void                    *private;
  73        int                     cpu_file;
  74        struct mutex            mutex;
  75        struct ring_buffer_iter **buffer_iter;
  76        unsigned long           iter_flags;
  77
  78        /* trace_seq for __print_flags() and __print_symbolic() etc. */
  79        struct trace_seq        tmp_seq;
  80
  81        cpumask_var_t           started;
  82
  83        /* it's true when current open file is snapshot */
  84        bool                    snapshot;
  85
  86        /* The below is zeroed out in pipe_read */
  87        struct trace_seq        seq;
  88        struct trace_entry      *ent;
  89        unsigned long           lost_events;
  90        int                     leftover;
  91        int                     ent_size;
  92        int                     cpu;
  93        u64                     ts;
  94
  95        loff_t                  pos;
  96        long                    idx;
  97
  98        /* All new field here will be zeroed out in pipe_read */
  99};
 100
 101enum trace_iter_flags {
 102        TRACE_FILE_LAT_FMT      = 1,
 103        TRACE_FILE_ANNOTATE     = 2,
 104        TRACE_FILE_TIME_IN_NS   = 4,
 105};
 106
 107
 108typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 109                                      int flags, struct trace_event *event);
 110
 111struct trace_event_functions {
 112        trace_print_func        trace;
 113        trace_print_func        raw;
 114        trace_print_func        hex;
 115        trace_print_func        binary;
 116};
 117
 118struct trace_event {
 119        struct hlist_node               node;
 120        struct list_head                list;
 121        int                             type;
 122        struct trace_event_functions    *funcs;
 123};
 124
 125extern int register_trace_event(struct trace_event *event);
 126extern int unregister_trace_event(struct trace_event *event);
 127
 128/* Return values for print_line callback */
 129enum print_line_t {
 130        TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
 131        TRACE_TYPE_HANDLED      = 1,
 132        TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
 133        TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
 134};
 135
 136/*
 137 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
 138 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
 139 * simplifies those functions and keeps them in sync.
 140 */
 141static inline enum print_line_t trace_handle_return(struct trace_seq *s)
 142{
 143        return trace_seq_has_overflowed(s) ?
 144                TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
 145}
 146
 147void tracing_generic_entry_update(struct trace_entry *entry,
 148                                  unsigned long flags,
 149                                  int pc);
 150struct trace_event_file;
 151
 152struct ring_buffer_event *
 153trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
 154                                struct trace_event_file *trace_file,
 155                                int type, unsigned long len,
 156                                unsigned long flags, int pc);
 157
 158void tracing_record_cmdline(struct task_struct *tsk);
 159
 160int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
 161
 162struct event_filter;
 163
 164enum trace_reg {
 165        TRACE_REG_REGISTER,
 166        TRACE_REG_UNREGISTER,
 167#ifdef CONFIG_PERF_EVENTS
 168        TRACE_REG_PERF_REGISTER,
 169        TRACE_REG_PERF_UNREGISTER,
 170        TRACE_REG_PERF_OPEN,
 171        TRACE_REG_PERF_CLOSE,
 172        TRACE_REG_PERF_ADD,
 173        TRACE_REG_PERF_DEL,
 174#endif
 175};
 176
 177struct trace_event_call;
 178
 179struct trace_event_class {
 180        const char              *system;
 181        void                    *probe;
 182#ifdef CONFIG_PERF_EVENTS
 183        void                    *perf_probe;
 184#endif
 185        int                     (*reg)(struct trace_event_call *event,
 186                                       enum trace_reg type, void *data);
 187        int                     (*define_fields)(struct trace_event_call *);
 188        struct list_head        *(*get_fields)(struct trace_event_call *);
 189        struct list_head        fields;
 190        int                     (*raw_init)(struct trace_event_call *);
 191};
 192
 193extern int trace_event_reg(struct trace_event_call *event,
 194                            enum trace_reg type, void *data);
 195
 196struct trace_event_buffer {
 197        struct ring_buffer              *buffer;
 198        struct ring_buffer_event        *event;
 199        struct trace_event_file         *trace_file;
 200        void                            *entry;
 201        unsigned long                   flags;
 202        int                             pc;
 203};
 204
 205void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 206                                  struct trace_event_file *trace_file,
 207                                  unsigned long len);
 208
 209void trace_event_buffer_commit(struct trace_event_buffer *fbuffer);
 210
 211enum {
 212        TRACE_EVENT_FL_FILTERED_BIT,
 213        TRACE_EVENT_FL_CAP_ANY_BIT,
 214        TRACE_EVENT_FL_NO_SET_FILTER_BIT,
 215        TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
 216        TRACE_EVENT_FL_WAS_ENABLED_BIT,
 217        TRACE_EVENT_FL_TRACEPOINT_BIT,
 218        TRACE_EVENT_FL_KPROBE_BIT,
 219        TRACE_EVENT_FL_UPROBE_BIT,
 220};
 221
 222/*
 223 * Event flags:
 224 *  FILTERED      - The event has a filter attached
 225 *  CAP_ANY       - Any user can enable for perf
 226 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 227 *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
 228 *  WAS_ENABLED   - Set and stays set when an event was ever enabled
 229 *                    (used for module unloading, if a module event is enabled,
 230 *                     it is best to clear the buffers that used it).
 231 *  TRACEPOINT    - Event is a tracepoint
 232 *  KPROBE        - Event is a kprobe
 233 *  UPROBE        - Event is a uprobe
 234 */
 235enum {
 236        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
 237        TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 238        TRACE_EVENT_FL_NO_SET_FILTER    = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
 239        TRACE_EVENT_FL_IGNORE_ENABLE    = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
 240        TRACE_EVENT_FL_WAS_ENABLED      = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 241        TRACE_EVENT_FL_TRACEPOINT       = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 242        TRACE_EVENT_FL_KPROBE           = (1 << TRACE_EVENT_FL_KPROBE_BIT),
 243        TRACE_EVENT_FL_UPROBE           = (1 << TRACE_EVENT_FL_UPROBE_BIT),
 244};
 245
 246#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
 247
 248struct trace_event_call {
 249        struct list_head        list;
 250        struct trace_event_class *class;
 251        union {
 252                char                    *name;
 253                /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
 254                struct tracepoint       *tp;
 255        };
 256        struct trace_event      event;
 257        char                    *print_fmt;
 258        struct event_filter     *filter;
 259        void                    *mod;
 260        void                    *data;
 261        /*
 262         *   bit 0:             filter_active
 263         *   bit 1:             allow trace by non root (cap any)
 264         *   bit 2:             failed to apply filter
 265         *   bit 3:             trace internal event (do not enable)
 266         *   bit 4:             Event was enabled by module
 267         *   bit 5:             use call filter rather than file filter
 268         *   bit 6:             Event is a tracepoint
 269         */
 270        int                     flags; /* static flags of different events */
 271
 272#ifdef CONFIG_PERF_EVENTS
 273        int                             perf_refcount;
 274        struct hlist_head __percpu      *perf_events;
 275        struct bpf_prog                 *prog;
 276
 277        int     (*perf_perm)(struct trace_event_call *,
 278                             struct perf_event *);
 279#endif
 280};
 281
 282static inline const char *
 283trace_event_name(struct trace_event_call *call)
 284{
 285        if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
 286                return call->tp ? call->tp->name : NULL;
 287        else
 288                return call->name;
 289}
 290
 291struct trace_array;
 292struct trace_subsystem_dir;
 293
 294enum {
 295        EVENT_FILE_FL_ENABLED_BIT,
 296        EVENT_FILE_FL_RECORDED_CMD_BIT,
 297        EVENT_FILE_FL_FILTERED_BIT,
 298        EVENT_FILE_FL_NO_SET_FILTER_BIT,
 299        EVENT_FILE_FL_SOFT_MODE_BIT,
 300        EVENT_FILE_FL_SOFT_DISABLED_BIT,
 301        EVENT_FILE_FL_TRIGGER_MODE_BIT,
 302        EVENT_FILE_FL_TRIGGER_COND_BIT,
 303        EVENT_FILE_FL_PID_FILTER_BIT,
 304};
 305
 306/*
 307 * Event file flags:
 308 *  ENABLED       - The event is enabled
 309 *  RECORDED_CMD  - The comms should be recorded at sched_switch
 310 *  FILTERED      - The event has a filter attached
 311 *  NO_SET_FILTER - Set when filter has error and is to be ignored
 312 *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
 313 *  SOFT_DISABLED - When set, do not trace the event (even though its
 314 *                   tracepoint may be enabled)
 315 *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
 316 *  TRIGGER_COND  - When set, one or more triggers has an associated filter
 317 *  PID_FILTER    - When set, the event is filtered based on pid
 318 */
 319enum {
 320        EVENT_FILE_FL_ENABLED           = (1 << EVENT_FILE_FL_ENABLED_BIT),
 321        EVENT_FILE_FL_RECORDED_CMD      = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
 322        EVENT_FILE_FL_FILTERED          = (1 << EVENT_FILE_FL_FILTERED_BIT),
 323        EVENT_FILE_FL_NO_SET_FILTER     = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
 324        EVENT_FILE_FL_SOFT_MODE         = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
 325        EVENT_FILE_FL_SOFT_DISABLED     = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
 326        EVENT_FILE_FL_TRIGGER_MODE      = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 327        EVENT_FILE_FL_TRIGGER_COND      = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
 328        EVENT_FILE_FL_PID_FILTER        = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 329};
 330
 331struct trace_event_file {
 332        struct list_head                list;
 333        struct trace_event_call         *event_call;
 334        struct event_filter             *filter;
 335        struct dentry                   *dir;
 336        struct trace_array              *tr;
 337        struct trace_subsystem_dir      *system;
 338        struct list_head                triggers;
 339
 340        /*
 341         * 32 bit flags:
 342         *   bit 0:             enabled
 343         *   bit 1:             enabled cmd record
 344         *   bit 2:             enable/disable with the soft disable bit
 345         *   bit 3:             soft disabled
 346         *   bit 4:             trigger enabled
 347         *
 348         * Note: The bits must be set atomically to prevent races
 349         * from other writers. Reads of flags do not need to be in
 350         * sync as they occur in critical sections. But the way flags
 351         * is currently used, these changes do not affect the code
 352         * except that when a change is made, it may have a slight
 353         * delay in propagating the changes to other CPUs due to
 354         * caching and such. Which is mostly OK ;-)
 355         */
 356        unsigned long           flags;
 357        atomic_t                sm_ref; /* soft-mode reference counter */
 358        atomic_t                tm_ref; /* trigger-mode reference counter */
 359};
 360
 361#define __TRACE_EVENT_FLAGS(name, value)                                \
 362        static int __init trace_init_flags_##name(void)                 \
 363        {                                                               \
 364                event_##name.flags |= value;                            \
 365                return 0;                                               \
 366        }                                                               \
 367        early_initcall(trace_init_flags_##name);
 368
 369#define __TRACE_EVENT_PERF_PERM(name, expr...)                          \
 370        static int perf_perm_##name(struct trace_event_call *tp_event, \
 371                                    struct perf_event *p_event)         \
 372        {                                                               \
 373                return ({ expr; });                                     \
 374        }                                                               \
 375        static int __init trace_init_perf_perm_##name(void)             \
 376        {                                                               \
 377                event_##name.perf_perm = &perf_perm_##name;             \
 378                return 0;                                               \
 379        }                                                               \
 380        early_initcall(trace_init_perf_perm_##name);
 381
 382#define PERF_MAX_TRACE_SIZE     2048
 383
 384#define MAX_FILTER_STR_VAL      256     /* Should handle KSYM_SYMBOL_LEN */
 385
 386enum event_trigger_type {
 387        ETT_NONE                = (0),
 388        ETT_TRACE_ONOFF         = (1 << 0),
 389        ETT_SNAPSHOT            = (1 << 1),
 390        ETT_STACKTRACE          = (1 << 2),
 391        ETT_EVENT_ENABLE        = (1 << 3),
 392        ETT_EVENT_HIST          = (1 << 4),
 393        ETT_HIST_ENABLE         = (1 << 5),
 394};
 395
 396extern int filter_match_preds(struct event_filter *filter, void *rec);
 397
 398extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 399                                                   void *rec);
 400extern void event_triggers_post_call(struct trace_event_file *file,
 401                                     enum event_trigger_type tt,
 402                                     void *rec);
 403
 404bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 405
 406/**
 407 * trace_trigger_soft_disabled - do triggers and test if soft disabled
 408 * @file: The file pointer of the event to test
 409 *
 410 * If any triggers without filters are attached to this event, they
 411 * will be called here. If the event is soft disabled and has no
 412 * triggers that require testing the fields, it will return true,
 413 * otherwise false.
 414 */
 415static inline bool
 416trace_trigger_soft_disabled(struct trace_event_file *file)
 417{
 418        unsigned long eflags = file->flags;
 419
 420        if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
 421                if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
 422                        event_triggers_call(file, NULL);
 423                if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 424                        return true;
 425                if (eflags & EVENT_FILE_FL_PID_FILTER)
 426                        return trace_event_ignore_this_pid(file);
 427        }
 428        return false;
 429}
 430
 431#ifdef CONFIG_BPF_EVENTS
 432unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 433#else
 434static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 435{
 436        return 1;
 437}
 438#endif
 439
 440enum {
 441        FILTER_OTHER = 0,
 442        FILTER_STATIC_STRING,
 443        FILTER_DYN_STRING,
 444        FILTER_PTR_STRING,
 445        FILTER_TRACE_FN,
 446        FILTER_COMM,
 447        FILTER_CPU,
 448};
 449
 450extern int trace_event_raw_init(struct trace_event_call *call);
 451extern int trace_define_field(struct trace_event_call *call, const char *type,
 452                              const char *name, int offset, int size,
 453                              int is_signed, int filter_type);
 454extern int trace_add_event_call(struct trace_event_call *call);
 455extern int trace_remove_event_call(struct trace_event_call *call);
 456extern int trace_event_get_offsets(struct trace_event_call *call);
 457
 458#define is_signed_type(type)    (((type)(-1)) < (type)1)
 459
 460int trace_set_clr_event(const char *system, const char *event, int set);
 461
 462/*
 463 * The double __builtin_constant_p is because gcc will give us an error
 464 * if we try to allocate the static variable to fmt if it is not a
 465 * constant. Even with the outer if statement optimizing out.
 466 */
 467#define event_trace_printk(ip, fmt, args...)                            \
 468do {                                                                    \
 469        __trace_printk_check_format(fmt, ##args);                       \
 470        tracing_record_cmdline(current);                                \
 471        if (__builtin_constant_p(fmt)) {                                \
 472                static const char *trace_printk_fmt                     \
 473                  __attribute__((section("__trace_printk_fmt"))) =      \
 474                        __builtin_constant_p(fmt) ? fmt : NULL;         \
 475                                                                        \
 476                __trace_bprintk(ip, trace_printk_fmt, ##args);          \
 477        } else                                                          \
 478                __trace_printk(ip, fmt, ##args);                        \
 479} while (0)
 480
 481#ifdef CONFIG_PERF_EVENTS
 482struct perf_event;
 483
 484DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 485
 486extern int  perf_trace_init(struct perf_event *event);
 487extern void perf_trace_destroy(struct perf_event *event);
 488extern int  perf_trace_add(struct perf_event *event, int flags);
 489extern void perf_trace_del(struct perf_event *event, int flags);
 490extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 491                                     char *filter_str);
 492extern void ftrace_profile_free_filter(struct perf_event *event);
 493void perf_trace_buf_update(void *record, u16 type);
 494void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 495
 496void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 497                               struct trace_event_call *call, u64 count,
 498                               struct pt_regs *regs, struct hlist_head *head,
 499                               struct task_struct *task);
 500
 501static inline void
 502perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
 503                       u64 count, struct pt_regs *regs, void *head,
 504                       struct task_struct *task)
 505{
 506        perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
 507}
 508#endif
 509
 510#endif /* _LINUX_TRACE_EVENT_H */
 511