linux/drivers/gpu/drm/i915/i915_perf_types.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: MIT */
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#ifndef _I915_PERF_TYPES_H_
   7#define _I915_PERF_TYPES_H_
   8
   9#include <linux/atomic.h>
  10#include <linux/device.h>
  11#include <linux/hrtimer.h>
  12#include <linux/llist.h>
  13#include <linux/poll.h>
  14#include <linux/sysfs.h>
  15#include <linux/types.h>
  16#include <linux/uuid.h>
  17#include <linux/wait.h>
  18#include <uapi/drm/i915_drm.h>
  19
  20#include "gt/intel_sseu.h"
  21#include "i915_reg.h"
  22#include "intel_wakeref.h"
  23
  24struct drm_i915_private;
  25struct file;
  26struct i915_active;
  27struct i915_gem_context;
  28struct i915_perf;
  29struct i915_vma;
  30struct intel_context;
  31struct intel_engine_cs;
  32
  33struct i915_oa_format {
  34        u32 format;
  35        int size;
  36};
  37
  38struct i915_oa_reg {
  39        i915_reg_t addr;
  40        u32 value;
  41};
  42
  43struct i915_oa_config {
  44        struct i915_perf *perf;
  45
  46        char uuid[UUID_STRING_LEN + 1];
  47        int id;
  48
  49        const struct i915_oa_reg *mux_regs;
  50        u32 mux_regs_len;
  51        const struct i915_oa_reg *b_counter_regs;
  52        u32 b_counter_regs_len;
  53        const struct i915_oa_reg *flex_regs;
  54        u32 flex_regs_len;
  55
  56        struct attribute_group sysfs_metric;
  57        struct attribute *attrs[2];
  58        struct device_attribute sysfs_metric_id;
  59
  60        struct kref ref;
  61        struct rcu_head rcu;
  62};
  63
  64struct i915_perf_stream;
  65
  66/**
  67 * struct i915_perf_stream_ops - the OPs to support a specific stream type
  68 */
  69struct i915_perf_stream_ops {
  70        /**
  71         * @enable: Enables the collection of HW samples, either in response to
  72         * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
  73         * without `I915_PERF_FLAG_DISABLED`.
  74         */
  75        void (*enable)(struct i915_perf_stream *stream);
  76
  77        /**
  78         * @disable: Disables the collection of HW samples, either in response
  79         * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
  80         * the stream.
  81         */
  82        void (*disable)(struct i915_perf_stream *stream);
  83
  84        /**
  85         * @poll_wait: Call poll_wait, passing a wait queue that will be woken
  86         * once there is something ready to read() for the stream
  87         */
  88        void (*poll_wait)(struct i915_perf_stream *stream,
  89                          struct file *file,
  90                          poll_table *wait);
  91
  92        /**
  93         * @wait_unlocked: For handling a blocking read, wait until there is
  94         * something to ready to read() for the stream. E.g. wait on the same
  95         * wait queue that would be passed to poll_wait().
  96         */
  97        int (*wait_unlocked)(struct i915_perf_stream *stream);
  98
  99        /**
 100         * @read: Copy buffered metrics as records to userspace
 101         * **buf**: the userspace, destination buffer
 102         * **count**: the number of bytes to copy, requested by userspace
 103         * **offset**: zero at the start of the read, updated as the read
 104         * proceeds, it represents how many bytes have been copied so far and
 105         * the buffer offset for copying the next record.
 106         *
 107         * Copy as many buffered i915 perf samples and records for this stream
 108         * to userspace as will fit in the given buffer.
 109         *
 110         * Only write complete records; returning -%ENOSPC if there isn't room
 111         * for a complete record.
 112         *
 113         * Return any error condition that results in a short read such as
 114         * -%ENOSPC or -%EFAULT, even though these may be squashed before
 115         * returning to userspace.
 116         */
 117        int (*read)(struct i915_perf_stream *stream,
 118                    char __user *buf,
 119                    size_t count,
 120                    size_t *offset);
 121
 122        /**
 123         * @destroy: Cleanup any stream specific resources.
 124         *
 125         * The stream will always be disabled before this is called.
 126         */
 127        void (*destroy)(struct i915_perf_stream *stream);
 128};
 129
 130/**
 131 * struct i915_perf_stream - state for a single open stream FD
 132 */
 133struct i915_perf_stream {
 134        /**
 135         * @perf: i915_perf backpointer
 136         */
 137        struct i915_perf *perf;
 138
 139        /**
 140         * @uncore: mmio access path
 141         */
 142        struct intel_uncore *uncore;
 143
 144        /**
 145         * @engine: Engine associated with this performance stream.
 146         */
 147        struct intel_engine_cs *engine;
 148
 149        /**
 150         * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 151         * properties given when opening a stream, representing the contents
 152         * of a single sample as read() by userspace.
 153         */
 154        u32 sample_flags;
 155
 156        /**
 157         * @sample_size: Considering the configured contents of a sample
 158         * combined with the required header size, this is the total size
 159         * of a single sample record.
 160         */
 161        int sample_size;
 162
 163        /**
 164         * @ctx: %NULL if measuring system-wide across all contexts or a
 165         * specific context that is being monitored.
 166         */
 167        struct i915_gem_context *ctx;
 168
 169        /**
 170         * @enabled: Whether the stream is currently enabled, considering
 171         * whether the stream was opened in a disabled state and based
 172         * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
 173         */
 174        bool enabled;
 175
 176        /**
 177         * @hold_preemption: Whether preemption is put on hold for command
 178         * submissions done on the @ctx. This is useful for some drivers that
 179         * cannot easily post process the OA buffer context to subtract delta
 180         * of performance counters not associated with @ctx.
 181         */
 182        bool hold_preemption;
 183
 184        /**
 185         * @ops: The callbacks providing the implementation of this specific
 186         * type of configured stream.
 187         */
 188        const struct i915_perf_stream_ops *ops;
 189
 190        /**
 191         * @oa_config: The OA configuration used by the stream.
 192         */
 193        struct i915_oa_config *oa_config;
 194
 195        /**
 196         * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
 197         * each time @oa_config changes.
 198         */
 199        struct llist_head oa_config_bos;
 200
 201        /**
 202         * @pinned_ctx: The OA context specific information.
 203         */
 204        struct intel_context *pinned_ctx;
 205
 206        /**
 207         * @specific_ctx_id: The id of the specific context.
 208         */
 209        u32 specific_ctx_id;
 210
 211        /**
 212         * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
 213         */
 214        u32 specific_ctx_id_mask;
 215
 216        /**
 217         * @poll_check_timer: High resolution timer that will periodically
 218         * check for data in the circular OA buffer for notifying userspace
 219         * (e.g. during a read() or poll()).
 220         */
 221        struct hrtimer poll_check_timer;
 222
 223        /**
 224         * @poll_wq: The wait queue that hrtimer callback wakes when it
 225         * sees data ready to read in the circular OA buffer.
 226         */
 227        wait_queue_head_t poll_wq;
 228
 229        /**
 230         * @pollin: Whether there is data available to read.
 231         */
 232        bool pollin;
 233
 234        /**
 235         * @periodic: Whether periodic sampling is currently enabled.
 236         */
 237        bool periodic;
 238
 239        /**
 240         * @period_exponent: The OA unit sampling frequency is derived from this.
 241         */
 242        int period_exponent;
 243
 244        /**
 245         * @oa_buffer: State of the OA buffer.
 246         */
 247        struct {
 248                struct i915_vma *vma;
 249                u8 *vaddr;
 250                u32 last_ctx_id;
 251                int format;
 252                int format_size;
 253                int size_exponent;
 254
 255                /**
 256                 * @ptr_lock: Locks reads and writes to all head/tail state
 257                 *
 258                 * Consider: the head and tail pointer state needs to be read
 259                 * consistently from a hrtimer callback (atomic context) and
 260                 * read() fop (user context) with tail pointer updates happening
 261                 * in atomic context and head updates in user context and the
 262                 * (unlikely) possibility of read() errors needing to reset all
 263                 * head/tail state.
 264                 *
 265                 * Note: Contention/performance aren't currently a significant
 266                 * concern here considering the relatively low frequency of
 267                 * hrtimer callbacks (5ms period) and that reads typically only
 268                 * happen in response to a hrtimer event and likely complete
 269                 * before the next callback.
 270                 *
 271                 * Note: This lock is not held *while* reading and copying data
 272                 * to userspace so the value of head observed in htrimer
 273                 * callbacks won't represent any partial consumption of data.
 274                 */
 275                spinlock_t ptr_lock;
 276
 277                /**
 278                 * @aging_tail: The last HW tail reported by HW. The data
 279                 * might not have made it to memory yet though.
 280                 */
 281                u32 aging_tail;
 282
 283                /**
 284                 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
 285                 * was read; used to determine when it is old enough to trust.
 286                 */
 287                u64 aging_timestamp;
 288
 289                /**
 290                 * @head: Although we can always read back the head pointer register,
 291                 * we prefer to avoid trusting the HW state, just to avoid any
 292                 * risk that some hardware condition could * somehow bump the
 293                 * head pointer unpredictably and cause us to forward the wrong
 294                 * OA buffer data to userspace.
 295                 */
 296                u32 head;
 297
 298                /**
 299                 * @tail: The last verified tail that can be read by userspace.
 300                 */
 301                u32 tail;
 302        } oa_buffer;
 303
 304        /**
 305         * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
 306         * reprogrammed.
 307         */
 308        struct i915_vma *noa_wait;
 309
 310        /**
 311         * @poll_oa_period: The period in nanoseconds at which the OA
 312         * buffer should be checked for available data.
 313         */
 314        u64 poll_oa_period;
 315};
 316
 317/**
 318 * struct i915_oa_ops - Gen specific implementation of an OA unit stream
 319 */
 320struct i915_oa_ops {
 321        /**
 322         * @is_valid_b_counter_reg: Validates register's address for
 323         * programming boolean counters for a particular platform.
 324         */
 325        bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
 326
 327        /**
 328         * @is_valid_mux_reg: Validates register's address for programming mux
 329         * for a particular platform.
 330         */
 331        bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
 332
 333        /**
 334         * @is_valid_flex_reg: Validates register's address for programming
 335         * flex EU filtering for a particular platform.
 336         */
 337        bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
 338
 339        /**
 340         * @enable_metric_set: Selects and applies any MUX configuration to set
 341         * up the Boolean and Custom (B/C) counters that are part of the
 342         * counter reports being sampled. May apply system constraints such as
 343         * disabling EU clock gating as required.
 344         */
 345        int (*enable_metric_set)(struct i915_perf_stream *stream,
 346                                 struct i915_active *active);
 347
 348        /**
 349         * @disable_metric_set: Remove system constraints associated with using
 350         * the OA unit.
 351         */
 352        void (*disable_metric_set)(struct i915_perf_stream *stream);
 353
 354        /**
 355         * @oa_enable: Enable periodic sampling
 356         */
 357        void (*oa_enable)(struct i915_perf_stream *stream);
 358
 359        /**
 360         * @oa_disable: Disable periodic sampling
 361         */
 362        void (*oa_disable)(struct i915_perf_stream *stream);
 363
 364        /**
 365         * @read: Copy data from the circular OA buffer into a given userspace
 366         * buffer.
 367         */
 368        int (*read)(struct i915_perf_stream *stream,
 369                    char __user *buf,
 370                    size_t count,
 371                    size_t *offset);
 372
 373        /**
 374         * @oa_hw_tail_read: read the OA tail pointer register
 375         *
 376         * In particular this enables us to share all the fiddly code for
 377         * handling the OA unit tail pointer race that affects multiple
 378         * generations.
 379         */
 380        u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
 381};
 382
 383struct i915_perf {
 384        struct drm_i915_private *i915;
 385
 386        struct kobject *metrics_kobj;
 387
 388        /*
 389         * Lock associated with adding/modifying/removing OA configs
 390         * in perf->metrics_idr.
 391         */
 392        struct mutex metrics_lock;
 393
 394        /*
 395         * List of dynamic configurations (struct i915_oa_config), you
 396         * need to hold perf->metrics_lock to access it.
 397         */
 398        struct idr metrics_idr;
 399
 400        /*
 401         * Lock associated with anything below within this structure
 402         * except exclusive_stream.
 403         */
 404        struct mutex lock;
 405
 406        /*
 407         * The stream currently using the OA unit. If accessed
 408         * outside a syscall associated to its file
 409         * descriptor.
 410         */
 411        struct i915_perf_stream *exclusive_stream;
 412
 413        /**
 414         * @sseu: sseu configuration selected to run while perf is active,
 415         * applies to all contexts.
 416         */
 417        struct intel_sseu sseu;
 418
 419        /**
 420         * For rate limiting any notifications of spurious
 421         * invalid OA reports
 422         */
 423        struct ratelimit_state spurious_report_rs;
 424
 425        /**
 426         * For rate limiting any notifications of tail pointer
 427         * race.
 428         */
 429        struct ratelimit_state tail_pointer_race;
 430
 431        u32 gen7_latched_oastatus1;
 432        u32 ctx_oactxctrl_offset;
 433        u32 ctx_flexeu0_offset;
 434
 435        /**
 436         * The RPT_ID/reason field for Gen8+ includes a bit
 437         * to determine if the CTX ID in the report is valid
 438         * but the specific bit differs between Gen 8 and 9
 439         */
 440        u32 gen8_valid_ctx_bit;
 441
 442        struct i915_oa_ops ops;
 443        const struct i915_oa_format *oa_formats;
 444
 445        /**
 446         * Use a format mask to store the supported formats
 447         * for a platform.
 448         */
 449#define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG)
 450        unsigned long format_mask[FORMAT_MASK_SIZE];
 451
 452        atomic64_t noa_programming_delay;
 453};
 454
 455#endif /* _I915_PERF_TYPES_H_ */
 456