linux/include/linux/perf_event.h
<<
>>
Prefs
   1/*
   2 * Performance events:
   3 *
   4 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
   5 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
   6 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
   7 *
   8 * Data type definitions, declarations, prototypes.
   9 *
  10 *    Started by: Thomas Gleixner and Ingo Molnar
  11 *
  12 * For licencing details see kernel-base/COPYING
  13 */
  14#ifndef _LINUX_PERF_EVENT_H
  15#define _LINUX_PERF_EVENT_H
  16
  17#include <uapi/linux/perf_event.h>
  18
  19/*
  20 * Kernel-internal data types and definitions:
  21 */
  22
  23#ifdef CONFIG_PERF_EVENTS
  24# include <asm/perf_event.h>
  25# include <asm/local64.h>
  26#endif
  27
  28struct perf_guest_info_callbacks {
  29        int                             (*is_in_guest)(void);
  30        int                             (*is_user_mode)(void);
  31        unsigned long                   (*get_guest_ip)(void);
  32};
  33
  34#ifdef CONFIG_HAVE_HW_BREAKPOINT
  35#include <asm/hw_breakpoint.h>
  36#endif
  37
  38#include <linux/list.h>
  39#include <linux/mutex.h>
  40#include <linux/rculist.h>
  41#include <linux/rcupdate.h>
  42#include <linux/spinlock.h>
  43#include <linux/hrtimer.h>
  44#include <linux/fs.h>
  45#include <linux/pid_namespace.h>
  46#include <linux/workqueue.h>
  47#include <linux/ftrace.h>
  48#include <linux/cpu.h>
  49#include <linux/irq_work.h>
  50#include <linux/static_key.h>
  51#include <linux/jump_label_ratelimit.h>
  52#include <linux/atomic.h>
  53#include <linux/sysfs.h>
  54#include <linux/perf_regs.h>
  55#include <linux/workqueue.h>
  56#include <linux/cgroup.h>
  57#include <asm/local.h>
  58
  59struct perf_callchain_entry {
  60        __u64                           nr;
  61        __u64                           ip[PERF_MAX_STACK_DEPTH];
  62};
  63
  64struct perf_raw_record {
  65        u32                             size;
  66        void                            *data;
  67};
  68
  69/*
  70 * branch stack layout:
  71 *  nr: number of taken branches stored in entries[]
  72 *
  73 * Note that nr can vary from sample to sample
  74 * branches (to, from) are stored from most recent
  75 * to least recent, i.e., entries[0] contains the most
  76 * recent branch.
  77 */
  78struct perf_branch_stack {
  79        __u64                           nr;
  80        struct perf_branch_entry        entries[0];
  81};
  82
  83struct task_struct;
  84
  85/*
  86 * extra PMU register associated with an event
  87 */
  88struct hw_perf_event_extra {
  89        u64             config; /* register value */
  90        unsigned int    reg;    /* register address or index */
  91        int             alloc;  /* extra register already allocated */
  92        int             idx;    /* index in shared_regs->regs[] */
  93};
  94
  95/**
  96 * struct hw_perf_event - performance event hardware details:
  97 */
  98struct hw_perf_event {
  99#ifdef CONFIG_PERF_EVENTS
 100        union {
 101                struct { /* hardware */
 102                        u64             config;
 103                        u64             last_tag;
 104                        unsigned long   config_base;
 105                        unsigned long   event_base;
 106                        int             event_base_rdpmc;
 107                        int             idx;
 108                        int             last_cpu;
 109                        int             flags;
 110
 111                        struct hw_perf_event_extra extra_reg;
 112                        struct hw_perf_event_extra branch_reg;
 113                };
 114                struct { /* software */
 115                        struct hrtimer  hrtimer;
 116                };
 117                struct { /* tracepoint */
 118                        /* for tp_event->class */
 119                        struct list_head        tp_list;
 120                };
 121                struct { /* intel_cqm */
 122                        int                     cqm_state;
 123                        int                     cqm_rmid;
 124                        struct list_head        cqm_events_entry;
 125                        struct list_head        cqm_groups_entry;
 126                        struct list_head        cqm_group_entry;
 127                };
 128                struct { /* itrace */
 129                        int                     itrace_started;
 130                };
 131#ifdef CONFIG_HAVE_HW_BREAKPOINT
 132                struct { /* breakpoint */
 133                        /*
 134                         * Crufty hack to avoid the chicken and egg
 135                         * problem hw_breakpoint has with context
 136                         * creation and event initalization.
 137                         */
 138                        struct arch_hw_breakpoint       info;
 139                        struct list_head                bp_list;
 140                };
 141#endif
 142        };
 143        struct task_struct              *target;
 144        int                             state;
 145        local64_t                       prev_count;
 146        u64                             sample_period;
 147        u64                             last_period;
 148        local64_t                       period_left;
 149        u64                             interrupts_seq;
 150        u64                             interrupts;
 151
 152        u64                             freq_time_stamp;
 153        u64                             freq_count_stamp;
 154#endif
 155};
 156
 157/*
 158 * hw_perf_event::state flags
 159 */
 160#define PERF_HES_STOPPED        0x01 /* the counter is stopped */
 161#define PERF_HES_UPTODATE       0x02 /* event->count up-to-date */
 162#define PERF_HES_ARCH           0x04
 163
 164struct perf_event;
 165
 166/*
 167 * Common implementation detail of pmu::{start,commit,cancel}_txn
 168 */
 169#define PERF_EVENT_TXN 0x1
 170
 171/**
 172 * pmu::capabilities flags
 173 */
 174#define PERF_PMU_CAP_NO_INTERRUPT               0x01
 175#define PERF_PMU_CAP_NO_NMI                     0x02
 176#define PERF_PMU_CAP_AUX_NO_SG                  0x04
 177#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF           0x08
 178#define PERF_PMU_CAP_EXCLUSIVE                  0x10
 179#define PERF_PMU_CAP_ITRACE                     0x20
 180
 181/**
 182 * struct pmu - generic performance monitoring unit
 183 */
 184struct pmu {
 185        struct list_head                entry;
 186
 187        struct module                   *module;
 188        struct device                   *dev;
 189        const struct attribute_group    **attr_groups;
 190        const char                      *name;
 191        int                             type;
 192
 193        /*
 194         * various common per-pmu feature flags
 195         */
 196        int                             capabilities;
 197
 198        int * __percpu                  pmu_disable_count;
 199        struct perf_cpu_context * __percpu pmu_cpu_context;
 200        atomic_t                        exclusive_cnt; /* < 0: cpu; > 0: tsk */
 201        int                             task_ctx_nr;
 202        int                             hrtimer_interval_ms;
 203
 204        /*
 205         * Fully disable/enable this PMU, can be used to protect from the PMI
 206         * as well as for lazy/batch writing of the MSRs.
 207         */
 208        void (*pmu_enable)              (struct pmu *pmu); /* optional */
 209        void (*pmu_disable)             (struct pmu *pmu); /* optional */
 210
 211        /*
 212         * Try and initialize the event for this PMU.
 213         * Should return -ENOENT when the @event doesn't match this PMU.
 214         */
 215        int (*event_init)               (struct perf_event *event);
 216
 217        /*
 218         * Notification that the event was mapped or unmapped.  Called
 219         * in the context of the mapping task.
 220         */
 221        void (*event_mapped)            (struct perf_event *event); /*optional*/
 222        void (*event_unmapped)          (struct perf_event *event); /*optional*/
 223
 224#define PERF_EF_START   0x01            /* start the counter when adding    */
 225#define PERF_EF_RELOAD  0x02            /* reload the counter when starting */
 226#define PERF_EF_UPDATE  0x04            /* update the counter when stopping */
 227
 228        /*
 229         * Adds/Removes a counter to/from the PMU, can be done inside
 230         * a transaction, see the ->*_txn() methods.
 231         */
 232        int  (*add)                     (struct perf_event *event, int flags);
 233        void (*del)                     (struct perf_event *event, int flags);
 234
 235        /*
 236         * Starts/Stops a counter present on the PMU. The PMI handler
 237         * should stop the counter when perf_event_overflow() returns
 238         * !0. ->start() will be used to continue.
 239         */
 240        void (*start)                   (struct perf_event *event, int flags);
 241        void (*stop)                    (struct perf_event *event, int flags);
 242
 243        /*
 244         * Updates the counter value of the event.
 245         */
 246        void (*read)                    (struct perf_event *event);
 247
 248        /*
 249         * Group events scheduling is treated as a transaction, add
 250         * group events as a whole and perform one schedulability test.
 251         * If the test fails, roll back the whole group
 252         *
 253         * Start the transaction, after this ->add() doesn't need to
 254         * do schedulability tests.
 255         */
 256        void (*start_txn)               (struct pmu *pmu); /* optional */
 257        /*
 258         * If ->start_txn() disabled the ->add() schedulability test
 259         * then ->commit_txn() is required to perform one. On success
 260         * the transaction is closed. On error the transaction is kept
 261         * open until ->cancel_txn() is called.
 262         */
 263        int  (*commit_txn)              (struct pmu *pmu); /* optional */
 264        /*
 265         * Will cancel the transaction, assumes ->del() is called
 266         * for each successful ->add() during the transaction.
 267         */
 268        void (*cancel_txn)              (struct pmu *pmu); /* optional */
 269
 270        /*
 271         * Will return the value for perf_event_mmap_page::index for this event,
 272         * if no implementation is provided it will default to: event->hw.idx + 1.
 273         */
 274        int (*event_idx)                (struct perf_event *event); /*optional */
 275
 276        /*
 277         * context-switches callback
 278         */
 279        void (*sched_task)              (struct perf_event_context *ctx,
 280                                        bool sched_in);
 281        /*
 282         * PMU specific data size
 283         */
 284        size_t                          task_ctx_size;
 285
 286
 287        /*
 288         * Return the count value for a counter.
 289         */
 290        u64 (*count)                    (struct perf_event *event); /*optional*/
 291
 292        /*
 293         * Set up pmu-private data structures for an AUX area
 294         */
 295        void *(*setup_aux)              (int cpu, void **pages,
 296                                         int nr_pages, bool overwrite);
 297                                        /* optional */
 298
 299        /*
 300         * Free pmu-private AUX data structures
 301         */
 302        void (*free_aux)                (void *aux); /* optional */
 303};
 304
 305/**
 306 * enum perf_event_active_state - the states of a event
 307 */
 308enum perf_event_active_state {
 309        PERF_EVENT_STATE_EXIT           = -3,
 310        PERF_EVENT_STATE_ERROR          = -2,
 311        PERF_EVENT_STATE_OFF            = -1,
 312        PERF_EVENT_STATE_INACTIVE       =  0,
 313        PERF_EVENT_STATE_ACTIVE         =  1,
 314};
 315
 316struct file;
 317struct perf_sample_data;
 318
 319typedef void (*perf_overflow_handler_t)(struct perf_event *,
 320                                        struct perf_sample_data *,
 321                                        struct pt_regs *regs);
 322
 323enum perf_group_flag {
 324        PERF_GROUP_SOFTWARE             = 0x1,
 325};
 326
 327#define SWEVENT_HLIST_BITS              8
 328#define SWEVENT_HLIST_SIZE              (1 << SWEVENT_HLIST_BITS)
 329
 330struct swevent_hlist {
 331        struct hlist_head               heads[SWEVENT_HLIST_SIZE];
 332        struct rcu_head                 rcu_head;
 333};
 334
 335#define PERF_ATTACH_CONTEXT     0x01
 336#define PERF_ATTACH_GROUP       0x02
 337#define PERF_ATTACH_TASK        0x04
 338#define PERF_ATTACH_TASK_DATA   0x08
 339
 340struct perf_cgroup;
 341struct ring_buffer;
 342
 343/**
 344 * struct perf_event - performance event kernel representation:
 345 */
 346struct perf_event {
 347#ifdef CONFIG_PERF_EVENTS
 348        /*
 349         * entry onto perf_event_context::event_list;
 350         *   modifications require ctx->lock
 351         *   RCU safe iterations.
 352         */
 353        struct list_head                event_entry;
 354
 355        /*
 356         * XXX: group_entry and sibling_list should be mutually exclusive;
 357         * either you're a sibling on a group, or you're the group leader.
 358         * Rework the code to always use the same list element.
 359         *
 360         * Locked for modification by both ctx->mutex and ctx->lock; holding
 361         * either sufficies for read.
 362         */
 363        struct list_head                group_entry;
 364        struct list_head                sibling_list;
 365
 366        /*
 367         * We need storage to track the entries in perf_pmu_migrate_context; we
 368         * cannot use the event_entry because of RCU and we want to keep the
 369         * group in tact which avoids us using the other two entries.
 370         */
 371        struct list_head                migrate_entry;
 372
 373        struct hlist_node               hlist_entry;
 374        struct list_head                active_entry;
 375        int                             nr_siblings;
 376        int                             group_flags;
 377        struct perf_event               *group_leader;
 378        struct pmu                      *pmu;
 379
 380        enum perf_event_active_state    state;
 381        unsigned int                    attach_state;
 382        local64_t                       count;
 383        atomic64_t                      child_count;
 384
 385        /*
 386         * These are the total time in nanoseconds that the event
 387         * has been enabled (i.e. eligible to run, and the task has
 388         * been scheduled in, if this is a per-task event)
 389         * and running (scheduled onto the CPU), respectively.
 390         *
 391         * They are computed from tstamp_enabled, tstamp_running and
 392         * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
 393         */
 394        u64                             total_time_enabled;
 395        u64                             total_time_running;
 396
 397        /*
 398         * These are timestamps used for computing total_time_enabled
 399         * and total_time_running when the event is in INACTIVE or
 400         * ACTIVE state, measured in nanoseconds from an arbitrary point
 401         * in time.
 402         * tstamp_enabled: the notional time when the event was enabled
 403         * tstamp_running: the notional time when the event was scheduled on
 404         * tstamp_stopped: in INACTIVE state, the notional time when the
 405         *      event was scheduled off.
 406         */
 407        u64                             tstamp_enabled;
 408        u64                             tstamp_running;
 409        u64                             tstamp_stopped;
 410
 411        /*
 412         * timestamp shadows the actual context timing but it can
 413         * be safely used in NMI interrupt context. It reflects the
 414         * context time as it was when the event was last scheduled in.
 415         *
 416         * ctx_time already accounts for ctx->timestamp. Therefore to
 417         * compute ctx_time for a sample, simply add perf_clock().
 418         */
 419        u64                             shadow_ctx_time;
 420
 421        struct perf_event_attr          attr;
 422        u16                             header_size;
 423        u16                             id_header_size;
 424        u16                             read_size;
 425        struct hw_perf_event            hw;
 426
 427        struct perf_event_context       *ctx;
 428        atomic_long_t                   refcount;
 429
 430        /*
 431         * These accumulate total time (in nanoseconds) that children
 432         * events have been enabled and running, respectively.
 433         */
 434        atomic64_t                      child_total_time_enabled;
 435        atomic64_t                      child_total_time_running;
 436
 437        /*
 438         * Protect attach/detach and child_list:
 439         */
 440        struct mutex                    child_mutex;
 441        struct list_head                child_list;
 442        struct perf_event               *parent;
 443
 444        int                             oncpu;
 445        int                             cpu;
 446
 447        struct list_head                owner_entry;
 448        struct task_struct              *owner;
 449
 450        /* mmap bits */
 451        struct mutex                    mmap_mutex;
 452        atomic_t                        mmap_count;
 453
 454        struct ring_buffer              *rb;
 455        struct list_head                rb_entry;
 456        unsigned long                   rcu_batches;
 457        int                             rcu_pending;
 458
 459        /* poll related */
 460        wait_queue_head_t               waitq;
 461        struct fasync_struct            *fasync;
 462
 463        /* delayed work for NMIs and such */
 464        int                             pending_wakeup;
 465        int                             pending_kill;
 466        int                             pending_disable;
 467        struct irq_work                 pending;
 468
 469        atomic_t                        event_limit;
 470
 471        void (*destroy)(struct perf_event *);
 472        struct rcu_head                 rcu_head;
 473
 474        struct pid_namespace            *ns;
 475        u64                             id;
 476
 477        u64                             (*clock)(void);
 478        perf_overflow_handler_t         overflow_handler;
 479        void                            *overflow_handler_context;
 480
 481#ifdef CONFIG_EVENT_TRACING
 482        struct ftrace_event_call        *tp_event;
 483        struct event_filter             *filter;
 484#ifdef CONFIG_FUNCTION_TRACER
 485        struct ftrace_ops               ftrace_ops;
 486#endif
 487#endif
 488
 489#ifdef CONFIG_CGROUP_PERF
 490        struct perf_cgroup              *cgrp; /* cgroup event is attach to */
 491        int                             cgrp_defer_enabled;
 492#endif
 493
 494#endif /* CONFIG_PERF_EVENTS */
 495};
 496
 497/**
 498 * struct perf_event_context - event context structure
 499 *
 500 * Used as a container for task events and CPU events as well:
 501 */
 502struct perf_event_context {
 503        struct pmu                      *pmu;
 504        /*
 505         * Protect the states of the events in the list,
 506         * nr_active, and the list:
 507         */
 508        raw_spinlock_t                  lock;
 509        /*
 510         * Protect the list of events.  Locking either mutex or lock
 511         * is sufficient to ensure the list doesn't change; to change
 512         * the list you need to lock both the mutex and the spinlock.
 513         */
 514        struct mutex                    mutex;
 515
 516        struct list_head                active_ctx_list;
 517        struct list_head                pinned_groups;
 518        struct list_head                flexible_groups;
 519        struct list_head                event_list;
 520        int                             nr_events;
 521        int                             nr_active;
 522        int                             is_active;
 523        int                             nr_stat;
 524        int                             nr_freq;
 525        int                             rotate_disable;
 526        atomic_t                        refcount;
 527        struct task_struct              *task;
 528
 529        /*
 530         * Context clock, runs when context enabled.
 531         */
 532        u64                             time;
 533        u64                             timestamp;
 534
 535        /*
 536         * These fields let us detect when two contexts have both
 537         * been cloned (inherited) from a common ancestor.
 538         */
 539        struct perf_event_context       *parent_ctx;
 540        u64                             parent_gen;
 541        u64                             generation;
 542        int                             pin_count;
 543        int                             nr_cgroups;      /* cgroup evts */
 544        void                            *task_ctx_data; /* pmu specific data */
 545        struct rcu_head                 rcu_head;
 546
 547        struct delayed_work             orphans_remove;
 548        bool                            orphans_remove_sched;
 549};
 550
 551/*
 552 * Number of contexts where an event can trigger:
 553 *      task, softirq, hardirq, nmi.
 554 */
 555#define PERF_NR_CONTEXTS        4
 556
 557/**
 558 * struct perf_event_cpu_context - per cpu event context structure
 559 */
 560struct perf_cpu_context {
 561        struct perf_event_context       ctx;
 562        struct perf_event_context       *task_ctx;
 563        int                             active_oncpu;
 564        int                             exclusive;
 565        struct hrtimer                  hrtimer;
 566        ktime_t                         hrtimer_interval;
 567        struct pmu                      *unique_pmu;
 568        struct perf_cgroup              *cgrp;
 569};
 570
 571struct perf_output_handle {
 572        struct perf_event               *event;
 573        struct ring_buffer              *rb;
 574        unsigned long                   wakeup;
 575        unsigned long                   size;
 576        union {
 577                void                    *addr;
 578                unsigned long           head;
 579        };
 580        int                             page;
 581};
 582
 583#ifdef CONFIG_CGROUP_PERF
 584
 585/*
 586 * perf_cgroup_info keeps track of time_enabled for a cgroup.
 587 * This is a per-cpu dynamically allocated data structure.
 588 */
 589struct perf_cgroup_info {
 590        u64                             time;
 591        u64                             timestamp;
 592};
 593
 594struct perf_cgroup {
 595        struct cgroup_subsys_state      css;
 596        struct perf_cgroup_info __percpu *info;
 597};
 598
 599/*
 600 * Must ensure cgroup is pinned (css_get) before calling
 601 * this function. In other words, we cannot call this function
 602 * if there is no cgroup event for the current CPU context.
 603 */
 604static inline struct perf_cgroup *
 605perf_cgroup_from_task(struct task_struct *task)
 606{
 607        return container_of(task_css(task, perf_event_cgrp_id),
 608                            struct perf_cgroup, css);
 609}
 610#endif /* CONFIG_CGROUP_PERF */
 611
 612#ifdef CONFIG_PERF_EVENTS
 613
 614extern void *perf_aux_output_begin(struct perf_output_handle *handle,
 615                                   struct perf_event *event);
 616extern void perf_aux_output_end(struct perf_output_handle *handle,
 617                                unsigned long size, bool truncated);
 618extern int perf_aux_output_skip(struct perf_output_handle *handle,
 619                                unsigned long size);
 620extern void *perf_get_aux(struct perf_output_handle *handle);
 621
 622extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
 623extern void perf_pmu_unregister(struct pmu *pmu);
 624
 625extern int perf_num_counters(void);
 626extern const char *perf_pmu_name(void);
 627extern void __perf_event_task_sched_in(struct task_struct *prev,
 628                                       struct task_struct *task);
 629extern void __perf_event_task_sched_out(struct task_struct *prev,
 630                                        struct task_struct *next);
 631extern int perf_event_init_task(struct task_struct *child);
 632extern void perf_event_exit_task(struct task_struct *child);
 633extern void perf_event_free_task(struct task_struct *task);
 634extern void perf_event_delayed_put(struct task_struct *task);
 635extern void perf_event_print_debug(void);
 636extern void perf_pmu_disable(struct pmu *pmu);
 637extern void perf_pmu_enable(struct pmu *pmu);
 638extern void perf_sched_cb_dec(struct pmu *pmu);
 639extern void perf_sched_cb_inc(struct pmu *pmu);
 640extern int perf_event_task_disable(void);
 641extern int perf_event_task_enable(void);
 642extern int perf_event_refresh(struct perf_event *event, int refresh);
 643extern void perf_event_update_userpage(struct perf_event *event);
 644extern int perf_event_release_kernel(struct perf_event *event);
 645extern struct perf_event *
 646perf_event_create_kernel_counter(struct perf_event_attr *attr,
 647                                int cpu,
 648                                struct task_struct *task,
 649                                perf_overflow_handler_t callback,
 650                                void *context);
 651extern void perf_pmu_migrate_context(struct pmu *pmu,
 652                                int src_cpu, int dst_cpu);
 653extern u64 perf_event_read_value(struct perf_event *event,
 654                                 u64 *enabled, u64 *running);
 655
 656
 657struct perf_sample_data {
 658        /*
 659         * Fields set by perf_sample_data_init(), group so as to
 660         * minimize the cachelines touched.
 661         */
 662        u64                             addr;
 663        struct perf_raw_record          *raw;
 664        struct perf_branch_stack        *br_stack;
 665        u64                             period;
 666        u64                             weight;
 667        u64                             txn;
 668        union  perf_mem_data_src        data_src;
 669
 670        /*
 671         * The other fields, optionally {set,used} by
 672         * perf_{prepare,output}_sample().
 673         */
 674        u64                             type;
 675        u64                             ip;
 676        struct {
 677                u32     pid;
 678                u32     tid;
 679        }                               tid_entry;
 680        u64                             time;
 681        u64                             id;
 682        u64                             stream_id;
 683        struct {
 684                u32     cpu;
 685                u32     reserved;
 686        }                               cpu_entry;
 687        struct perf_callchain_entry     *callchain;
 688
 689        /*
 690         * regs_user may point to task_pt_regs or to regs_user_copy, depending
 691         * on arch details.
 692         */
 693        struct perf_regs                regs_user;
 694        struct pt_regs                  regs_user_copy;
 695
 696        struct perf_regs                regs_intr;
 697        u64                             stack_user_size;
 698} ____cacheline_aligned;
 699
 700/* default value for data source */
 701#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
 702                    PERF_MEM_S(LVL, NA)   |\
 703                    PERF_MEM_S(SNOOP, NA) |\
 704                    PERF_MEM_S(LOCK, NA)  |\
 705                    PERF_MEM_S(TLB, NA))
 706
 707static inline void perf_sample_data_init(struct perf_sample_data *data,
 708                                         u64 addr, u64 period)
 709{
 710        /* remaining struct members initialized in perf_prepare_sample() */
 711        data->addr = addr;
 712        data->raw  = NULL;
 713        data->br_stack = NULL;
 714        data->period = period;
 715        data->weight = 0;
 716        data->data_src.val = PERF_MEM_NA;
 717        data->txn = 0;
 718}
 719
 720extern void perf_output_sample(struct perf_output_handle *handle,
 721                               struct perf_event_header *header,
 722                               struct perf_sample_data *data,
 723                               struct perf_event *event);
 724extern void perf_prepare_sample(struct perf_event_header *header,
 725                                struct perf_sample_data *data,
 726                                struct perf_event *event,
 727                                struct pt_regs *regs);
 728
 729extern int perf_event_overflow(struct perf_event *event,
 730                                 struct perf_sample_data *data,
 731                                 struct pt_regs *regs);
 732
 733static inline bool is_sampling_event(struct perf_event *event)
 734{
 735        return event->attr.sample_period != 0;
 736}
 737
 738/*
 739 * Return 1 for a software event, 0 for a hardware event
 740 */
 741static inline int is_software_event(struct perf_event *event)
 742{
 743        return event->pmu->task_ctx_nr == perf_sw_context;
 744}
 745
 746extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 747
 748extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
 749extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
 750
 751#ifndef perf_arch_fetch_caller_regs
 752static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
 753#endif
 754
 755/*
 756 * Take a snapshot of the regs. Skip ip and frame pointer to
 757 * the nth caller. We only need a few of the regs:
 758 * - ip for PERF_SAMPLE_IP
 759 * - cs for user_mode() tests
 760 * - bp for callchains
 761 * - eflags, for future purposes, just in case
 762 */
 763static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 764{
 765        memset(regs, 0, sizeof(*regs));
 766
 767        perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 768}
 769
 770static __always_inline void
 771perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 772{
 773        if (static_key_false(&perf_swevent_enabled[event_id]))
 774                __perf_sw_event(event_id, nr, regs, addr);
 775}
 776
 777DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
 778
 779/*
 780 * 'Special' version for the scheduler, it hard assumes no recursion,
 781 * which is guaranteed by us not actually scheduling inside other swevents
 782 * because those disable preemption.
 783 */
 784static __always_inline void
 785perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 786{
 787        if (static_key_false(&perf_swevent_enabled[event_id])) {
 788                struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
 789
 790                perf_fetch_caller_regs(regs);
 791                ___perf_sw_event(event_id, nr, regs, addr);
 792        }
 793}
 794
 795extern struct static_key_deferred perf_sched_events;
 796
 797static inline void perf_event_task_sched_in(struct task_struct *prev,
 798                                            struct task_struct *task)
 799{
 800        if (static_key_false(&perf_sched_events.key))
 801                __perf_event_task_sched_in(prev, task);
 802}
 803
 804static inline void perf_event_task_sched_out(struct task_struct *prev,
 805                                             struct task_struct *next)
 806{
 807        perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 808
 809        if (static_key_false(&perf_sched_events.key))
 810                __perf_event_task_sched_out(prev, next);
 811}
 812
 813static inline u64 __perf_event_count(struct perf_event *event)
 814{
 815        return local64_read(&event->count) + atomic64_read(&event->child_count);
 816}
 817
 818extern void perf_event_mmap(struct vm_area_struct *vma);
 819extern struct perf_guest_info_callbacks *perf_guest_cbs;
 820extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 821extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 822
 823extern void perf_event_exec(void);
 824extern void perf_event_comm(struct task_struct *tsk, bool exec);
 825extern void perf_event_fork(struct task_struct *tsk);
 826
 827/* Callchains */
 828DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
 829
 830extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
 831extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
 832
 833static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 834{
 835        if (entry->nr < PERF_MAX_STACK_DEPTH)
 836                entry->ip[entry->nr++] = ip;
 837}
 838
 839extern int sysctl_perf_event_paranoid;
 840extern int sysctl_perf_event_mlock;
 841extern int sysctl_perf_event_sample_rate;
 842extern int sysctl_perf_cpu_time_max_percent;
 843
 844extern void perf_sample_event_took(u64 sample_len_ns);
 845
 846extern int perf_proc_update_handler(struct ctl_table *table, int write,
 847                void __user *buffer, size_t *lenp,
 848                loff_t *ppos);
 849extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 850                void __user *buffer, size_t *lenp,
 851                loff_t *ppos);
 852
 853
 854static inline bool perf_paranoid_tracepoint_raw(void)
 855{
 856        return sysctl_perf_event_paranoid > -1;
 857}
 858
 859static inline bool perf_paranoid_cpu(void)
 860{
 861        return sysctl_perf_event_paranoid > 0;
 862}
 863
 864static inline bool perf_paranoid_kernel(void)
 865{
 866        return sysctl_perf_event_paranoid > 1;
 867}
 868
 869extern void perf_event_init(void);
 870extern void perf_tp_event(u64 addr, u64 count, void *record,
 871                          int entry_size, struct pt_regs *regs,
 872                          struct hlist_head *head, int rctx,
 873                          struct task_struct *task);
 874extern void perf_bp_event(struct perf_event *event, void *data);
 875
 876#ifndef perf_misc_flags
 877# define perf_misc_flags(regs) \
 878                (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
 879# define perf_instruction_pointer(regs) instruction_pointer(regs)
 880#endif
 881
 882static inline bool has_branch_stack(struct perf_event *event)
 883{
 884        return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
 885}
 886
 887static inline bool needs_branch_stack(struct perf_event *event)
 888{
 889        return event->attr.branch_sample_type != 0;
 890}
 891
 892static inline bool has_aux(struct perf_event *event)
 893{
 894        return event->pmu->setup_aux;
 895}
 896
 897extern int perf_output_begin(struct perf_output_handle *handle,
 898                             struct perf_event *event, unsigned int size);
 899extern void perf_output_end(struct perf_output_handle *handle);
 900extern unsigned int perf_output_copy(struct perf_output_handle *handle,
 901                             const void *buf, unsigned int len);
 902extern unsigned int perf_output_skip(struct perf_output_handle *handle,
 903                                     unsigned int len);
 904extern int perf_swevent_get_recursion_context(void);
 905extern void perf_swevent_put_recursion_context(int rctx);
 906extern u64 perf_swevent_set_period(struct perf_event *event);
 907extern void perf_event_enable(struct perf_event *event);
 908extern void perf_event_disable(struct perf_event *event);
 909extern int __perf_event_disable(void *info);
 910extern void perf_event_task_tick(void);
 911#else /* !CONFIG_PERF_EVENTS: */
 912static inline void *
 913perf_aux_output_begin(struct perf_output_handle *handle,
 914                      struct perf_event *event)                         { return NULL; }
 915static inline void
 916perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 917                    bool truncated)                                     { }
 918static inline int
 919perf_aux_output_skip(struct perf_output_handle *handle,
 920                     unsigned long size)                                { return -EINVAL; }
 921static inline void *
 922perf_get_aux(struct perf_output_handle *handle)                         { return NULL; }
 923static inline void
 924perf_event_task_sched_in(struct task_struct *prev,
 925                         struct task_struct *task)                      { }
 926static inline void
 927perf_event_task_sched_out(struct task_struct *prev,
 928                          struct task_struct *next)                     { }
 929static inline int perf_event_init_task(struct task_struct *child)       { return 0; }
 930static inline void perf_event_exit_task(struct task_struct *child)      { }
 931static inline void perf_event_free_task(struct task_struct *task)       { }
 932static inline void perf_event_delayed_put(struct task_struct *task)     { }
 933static inline void perf_event_print_debug(void)                         { }
 934static inline int perf_event_task_disable(void)                         { return -EINVAL; }
 935static inline int perf_event_task_enable(void)                          { return -EINVAL; }
 936static inline int perf_event_refresh(struct perf_event *event, int refresh)
 937{
 938        return -EINVAL;
 939}
 940
 941static inline void
 942perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)     { }
 943static inline void
 944perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)                     { }
 945static inline void
 946perf_bp_event(struct perf_event *event, void *data)                     { }
 947
 948static inline int perf_register_guest_info_callbacks
 949(struct perf_guest_info_callbacks *callbacks)                           { return 0; }
 950static inline int perf_unregister_guest_info_callbacks
 951(struct perf_guest_info_callbacks *callbacks)                           { return 0; }
 952
 953static inline void perf_event_mmap(struct vm_area_struct *vma)          { }
 954static inline void perf_event_exec(void)                                { }
 955static inline void perf_event_comm(struct task_struct *tsk, bool exec)  { }
 956static inline void perf_event_fork(struct task_struct *tsk)             { }
 957static inline void perf_event_init(void)                                { }
 958static inline int  perf_swevent_get_recursion_context(void)             { return -1; }
 959static inline void perf_swevent_put_recursion_context(int rctx)         { }
 960static inline u64 perf_swevent_set_period(struct perf_event *event)     { return 0; }
 961static inline void perf_event_enable(struct perf_event *event)          { }
 962static inline void perf_event_disable(struct perf_event *event)         { }
 963static inline int __perf_event_disable(void *info)                      { return -1; }
 964static inline void perf_event_task_tick(void)                           { }
 965#endif
 966
 967#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
 968extern bool perf_event_can_stop_tick(void);
 969#else
 970static inline bool perf_event_can_stop_tick(void)                       { return true; }
 971#endif
 972
 973#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 974extern void perf_restore_debug_store(void);
 975#else
 976static inline void perf_restore_debug_store(void)                       { }
 977#endif
 978
 979#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 980
 981/*
 982 * This has to have a higher priority than migration_notifier in sched/core.c.
 983 */
 984#define perf_cpu_notifier(fn)                                           \
 985do {                                                                    \
 986        static struct notifier_block fn##_nb =                          \
 987                { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
 988        unsigned long cpu = smp_processor_id();                         \
 989        unsigned long flags;                                            \
 990                                                                        \
 991        cpu_notifier_register_begin();                                  \
 992        fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,                     \
 993                (void *)(unsigned long)cpu);                            \
 994        local_irq_save(flags);                                          \
 995        fn(&fn##_nb, (unsigned long)CPU_STARTING,                       \
 996                (void *)(unsigned long)cpu);                            \
 997        local_irq_restore(flags);                                       \
 998        fn(&fn##_nb, (unsigned long)CPU_ONLINE,                         \
 999                (void *)(unsigned long)cpu);                            \
1000        __register_cpu_notifier(&fn##_nb);                              \
1001        cpu_notifier_register_done();                                   \
1002} while (0)
1003
1004/*
1005 * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
1006 * callback for already online CPUs.
1007 */
1008#define __perf_cpu_notifier(fn)                                         \
1009do {                                                                    \
1010        static struct notifier_block fn##_nb =                          \
1011                { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
1012                                                                        \
1013        __register_cpu_notifier(&fn##_nb);                              \
1014} while (0)
1015
1016struct perf_pmu_events_attr {
1017        struct device_attribute attr;
1018        u64 id;
1019        const char *event_str;
1020};
1021
1022ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
1023                              char *page);
1024
1025#define PMU_EVENT_ATTR(_name, _var, _id, _show)                         \
1026static struct perf_pmu_events_attr _var = {                             \
1027        .attr = __ATTR(_name, 0444, _show, NULL),                       \
1028        .id   =  _id,                                                   \
1029};
1030
1031#define PMU_EVENT_ATTR_STRING(_name, _var, _str)                            \
1032static struct perf_pmu_events_attr _var = {                                 \
1033        .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
1034        .id             = 0,                                                \
1035        .event_str      = _str,                                             \
1036};
1037
1038#define PMU_FORMAT_ATTR(_name, _format)                                 \
1039static ssize_t                                                          \
1040_name##_show(struct device *dev,                                        \
1041                               struct device_attribute *attr,           \
1042                               char *page)                              \
1043{                                                                       \
1044        BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
1045        return sprintf(page, _format "\n");                             \
1046}                                                                       \
1047                                                                        \
1048static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
1049
1050#endif /* _LINUX_PERF_EVENT_H */
1051