linux/drivers/gpu/drm/i915/intel_ringbuffer.h
<<
>>
Prefs
   1#ifndef _INTEL_RINGBUFFER_H_
   2#define _INTEL_RINGBUFFER_H_
   3
   4#include <linux/hashtable.h>
   5#include "i915_gem_batch_pool.h"
   6
   7#define I915_CMD_HASH_ORDER 9
   8
   9/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  10 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  11 * to give some inclination as to some of the magic values used in the various
  12 * workarounds!
  13 */
  14#define CACHELINE_BYTES 64
  15#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
  16
  17/*
  18 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  19 * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
  20 * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
  21 *
  22 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
  23 * cacheline, the Head Pointer must not be greater than the Tail
  24 * Pointer."
  25 */
  26#define I915_RING_FREE_SPACE 64
  27
  28struct  intel_hw_status_page {
  29        u32             *page_addr;
  30        unsigned int    gfx_addr;
  31        struct          drm_i915_gem_object *obj;
  32};
  33
  34#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
  35#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
  36
  37#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base))
  38#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
  39
  40#define I915_READ_HEAD(ring)  I915_READ(RING_HEAD((ring)->mmio_base))
  41#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
  42
  43#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base))
  44#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
  45
  46#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
  47#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
  48
  49#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base))
  50#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val)
  51
  52/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  53 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  54 */
  55#define i915_semaphore_seqno_size sizeof(uint64_t)
  56#define GEN8_SIGNAL_OFFSET(__ring, to)                       \
  57        (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  58        ((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) +   \
  59        (i915_semaphore_seqno_size * (to)))
  60
  61#define GEN8_WAIT_OFFSET(__ring, from)                       \
  62        (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  63        ((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
  64        (i915_semaphore_seqno_size * (__ring)->id))
  65
  66#define GEN8_RING_SEMAPHORE_INIT do { \
  67        if (!dev_priv->semaphore_obj) { \
  68                break; \
  69        } \
  70        ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
  71        ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
  72        ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
  73        ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
  74        ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \
  75        ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
  76        } while(0)
  77
  78enum intel_ring_hangcheck_action {
  79        HANGCHECK_IDLE = 0,
  80        HANGCHECK_WAIT,
  81        HANGCHECK_ACTIVE,
  82        HANGCHECK_ACTIVE_LOOP,
  83        HANGCHECK_KICK,
  84        HANGCHECK_HUNG,
  85};
  86
  87#define HANGCHECK_SCORE_RING_HUNG 31
  88
  89struct intel_ring_hangcheck {
  90        u64 acthd;
  91        u64 max_acthd;
  92        u32 seqno;
  93        int score;
  94        enum intel_ring_hangcheck_action action;
  95        int deadlock;
  96        u32 instdone[I915_NUM_INSTDONE_REG];
  97};
  98
  99struct intel_ringbuffer {
 100        struct drm_i915_gem_object *obj;
 101        void __iomem *virtual_start;
 102        struct i915_vma *vma;
 103
 104        struct intel_engine_cs *ring;
 105        struct list_head link;
 106
 107        u32 head;
 108        u32 tail;
 109        int space;
 110        int size;
 111        int effective_size;
 112        int reserved_size;
 113        int reserved_tail;
 114        bool reserved_in_use;
 115
 116        /** We track the position of the requests in the ring buffer, and
 117         * when each is retired we increment last_retired_head as the GPU
 118         * must have finished processing the request and so we know we
 119         * can advance the ringbuffer up to that position.
 120         *
 121         * last_retired_head is set to -1 after the value is consumed so
 122         * we can detect new retirements.
 123         */
 124        u32 last_retired_head;
 125};
 126
 127struct  intel_context;
 128struct drm_i915_reg_descriptor;
 129
 130/*
 131 * we use a single page to load ctx workarounds so all of these
 132 * values are referred in terms of dwords
 133 *
 134 * struct i915_wa_ctx_bb:
 135 *  offset: specifies batch starting position, also helpful in case
 136 *    if we want to have multiple batches at different offsets based on
 137 *    some criteria. It is not a requirement at the moment but provides
 138 *    an option for future use.
 139 *  size: size of the batch in DWORDS
 140 */
 141struct  i915_ctx_workarounds {
 142        struct i915_wa_ctx_bb {
 143                u32 offset;
 144                u32 size;
 145        } indirect_ctx, per_ctx;
 146        struct drm_i915_gem_object *obj;
 147};
 148
 149struct  intel_engine_cs {
 150        const char      *name;
 151        enum intel_ring_id {
 152                RCS = 0,
 153                BCS,
 154                VCS,
 155                VCS2,   /* Keep instances of the same type engine together. */
 156                VECS
 157        } id;
 158#define I915_NUM_RINGS 5
 159#define _VCS(n) (VCS + (n))
 160        unsigned int exec_id;
 161        unsigned int guc_id;
 162        u32             mmio_base;
 163        struct          drm_device *dev;
 164        struct intel_ringbuffer *buffer;
 165        struct list_head buffers;
 166
 167        /*
 168         * A pool of objects to use as shadow copies of client batch buffers
 169         * when the command parser is enabled. Prevents the client from
 170         * modifying the batch contents after software parsing.
 171         */
 172        struct i915_gem_batch_pool batch_pool;
 173
 174        struct intel_hw_status_page status_page;
 175        struct i915_ctx_workarounds wa_ctx;
 176
 177        unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 178        u32             irq_enable_mask;        /* bitmask to enable ring interrupt */
 179        struct drm_i915_gem_request *trace_irq_req;
 180        bool __must_check (*irq_get)(struct intel_engine_cs *ring);
 181        void            (*irq_put)(struct intel_engine_cs *ring);
 182
 183        int             (*init_hw)(struct intel_engine_cs *ring);
 184
 185        int             (*init_context)(struct drm_i915_gem_request *req);
 186
 187        void            (*write_tail)(struct intel_engine_cs *ring,
 188                                      u32 value);
 189        int __must_check (*flush)(struct drm_i915_gem_request *req,
 190                                  u32   invalidate_domains,
 191                                  u32   flush_domains);
 192        int             (*add_request)(struct drm_i915_gem_request *req);
 193        /* Some chipsets are not quite as coherent as advertised and need
 194         * an expensive kick to force a true read of the up-to-date seqno.
 195         * However, the up-to-date seqno is not always required and the last
 196         * seen value is good enough. Note that the seqno will always be
 197         * monotonic, even if not coherent.
 198         */
 199        u32             (*get_seqno)(struct intel_engine_cs *ring,
 200                                     bool lazy_coherency);
 201        void            (*set_seqno)(struct intel_engine_cs *ring,
 202                                     u32 seqno);
 203        int             (*dispatch_execbuffer)(struct drm_i915_gem_request *req,
 204                                               u64 offset, u32 length,
 205                                               unsigned dispatch_flags);
 206#define I915_DISPATCH_SECURE 0x1
 207#define I915_DISPATCH_PINNED 0x2
 208#define I915_DISPATCH_RS     0x4
 209        void            (*cleanup)(struct intel_engine_cs *ring);
 210
 211        /* GEN8 signal/wait table - never trust comments!
 212         *        signal to     signal to    signal to   signal to      signal to
 213         *          RCS            VCS          BCS        VECS          VCS2
 214         *      --------------------------------------------------------------------
 215         *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
 216         *      |-------------------------------------------------------------------
 217         *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
 218         *      |-------------------------------------------------------------------
 219         *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
 220         *      |-------------------------------------------------------------------
 221         * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
 222         *      |-------------------------------------------------------------------
 223         * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
 224         *      |-------------------------------------------------------------------
 225         *
 226         * Generalization:
 227         *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
 228         *  ie. transpose of g(x, y)
 229         *
 230         *       sync from      sync from    sync from    sync from     sync from
 231         *          RCS            VCS          BCS        VECS          VCS2
 232         *      --------------------------------------------------------------------
 233         *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
 234         *      |-------------------------------------------------------------------
 235         *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
 236         *      |-------------------------------------------------------------------
 237         *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
 238         *      |-------------------------------------------------------------------
 239         * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
 240         *      |-------------------------------------------------------------------
 241         * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
 242         *      |-------------------------------------------------------------------
 243         *
 244         * Generalization:
 245         *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
 246         *  ie. transpose of f(x, y)
 247         */
 248        struct {
 249                u32     sync_seqno[I915_NUM_RINGS-1];
 250
 251                union {
 252                        struct {
 253                                /* our mbox written by others */
 254                                u32             wait[I915_NUM_RINGS];
 255                                /* mboxes this ring signals to */
 256                                i915_reg_t      signal[I915_NUM_RINGS];
 257                        } mbox;
 258                        u64             signal_ggtt[I915_NUM_RINGS];
 259                };
 260
 261                /* AKA wait() */
 262                int     (*sync_to)(struct drm_i915_gem_request *to_req,
 263                                   struct intel_engine_cs *from,
 264                                   u32 seqno);
 265                int     (*signal)(struct drm_i915_gem_request *signaller_req,
 266                                  /* num_dwords needed by caller */
 267                                  unsigned int num_dwords);
 268        } semaphore;
 269
 270        /* Execlists */
 271        spinlock_t execlist_lock;
 272        struct list_head execlist_queue;
 273        struct list_head execlist_retired_req_list;
 274        u8 next_context_status_buffer;
 275        bool disable_lite_restore_wa;
 276        u32 ctx_desc_template;
 277        u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 278        int             (*emit_request)(struct drm_i915_gem_request *request);
 279        int             (*emit_flush)(struct drm_i915_gem_request *request,
 280                                      u32 invalidate_domains,
 281                                      u32 flush_domains);
 282        int             (*emit_bb_start)(struct drm_i915_gem_request *req,
 283                                         u64 offset, unsigned dispatch_flags);
 284
 285        /**
 286         * List of objects currently involved in rendering from the
 287         * ringbuffer.
 288         *
 289         * Includes buffers having the contents of their GPU caches
 290         * flushed, not necessarily primitives.  last_read_req
 291         * represents when the rendering involved will be completed.
 292         *
 293         * A reference is held on the buffer while on this list.
 294         */
 295        struct list_head active_list;
 296
 297        /**
 298         * List of breadcrumbs associated with GPU requests currently
 299         * outstanding.
 300         */
 301        struct list_head request_list;
 302
 303        /**
 304         * Seqno of request most recently submitted to request_list.
 305         * Used exclusively by hang checker to avoid grabbing lock while
 306         * inspecting request list.
 307         */
 308        u32 last_submitted_seqno;
 309
 310        bool gpu_caches_dirty;
 311
 312        wait_queue_head_t irq_queue;
 313
 314        struct intel_context *last_context;
 315
 316        struct intel_ring_hangcheck hangcheck;
 317
 318        struct {
 319                struct drm_i915_gem_object *obj;
 320                u32 gtt_offset;
 321                volatile u32 *cpu_page;
 322        } scratch;
 323
 324        bool needs_cmd_parser;
 325
 326        /*
 327         * Table of commands the command parser needs to know about
 328         * for this ring.
 329         */
 330        DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
 331
 332        /*
 333         * Table of registers allowed in commands that read/write registers.
 334         */
 335        const struct drm_i915_reg_descriptor *reg_table;
 336        int reg_count;
 337
 338        /*
 339         * Table of registers allowed in commands that read/write registers, but
 340         * only from the DRM master.
 341         */
 342        const struct drm_i915_reg_descriptor *master_reg_table;
 343        int master_reg_count;
 344
 345        /*
 346         * Returns the bitmask for the length field of the specified command.
 347         * Return 0 for an unrecognized/invalid command.
 348         *
 349         * If the command parser finds an entry for a command in the ring's
 350         * cmd_tables, it gets the command's length based on the table entry.
 351         * If not, it calls this function to determine the per-ring length field
 352         * encoding for the command (i.e. certain opcode ranges use certain bits
 353         * to encode the command length in the header).
 354         */
 355        u32 (*get_cmd_length_mask)(u32 cmd_header);
 356};
 357
 358static inline bool
 359intel_ring_initialized(struct intel_engine_cs *ring)
 360{
 361        return ring->dev != NULL;
 362}
 363
 364static inline unsigned
 365intel_ring_flag(struct intel_engine_cs *ring)
 366{
 367        return 1 << ring->id;
 368}
 369
 370static inline u32
 371intel_ring_sync_index(struct intel_engine_cs *ring,
 372                      struct intel_engine_cs *other)
 373{
 374        int idx;
 375
 376        /*
 377         * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
 378         * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
 379         * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
 380         * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
 381         * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
 382         */
 383
 384        idx = (other - ring) - 1;
 385        if (idx < 0)
 386                idx += I915_NUM_RINGS;
 387
 388        return idx;
 389}
 390
 391static inline void
 392intel_flush_status_page(struct intel_engine_cs *ring, int reg)
 393{
 394        drm_clflush_virt_range(&ring->status_page.page_addr[reg],
 395                               sizeof(uint32_t));
 396}
 397
 398static inline u32
 399intel_read_status_page(struct intel_engine_cs *ring,
 400                       int reg)
 401{
 402        /* Ensure that the compiler doesn't optimize away the load. */
 403        barrier();
 404        return ring->status_page.page_addr[reg];
 405}
 406
 407static inline void
 408intel_write_status_page(struct intel_engine_cs *ring,
 409                        int reg, u32 value)
 410{
 411        ring->status_page.page_addr[reg] = value;
 412}
 413
 414/*
 415 * Reads a dword out of the status page, which is written to from the command
 416 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
 417 * MI_STORE_DATA_IMM.
 418 *
 419 * The following dwords have a reserved meaning:
 420 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
 421 * 0x04: ring 0 head pointer
 422 * 0x05: ring 1 head pointer (915-class)
 423 * 0x06: ring 2 head pointer (915-class)
 424 * 0x10-0x1b: Context status DWords (GM45)
 425 * 0x1f: Last written status offset. (GM45)
 426 * 0x20-0x2f: Reserved (Gen6+)
 427 *
 428 * The area from dword 0x30 to 0x3ff is available for driver usage.
 429 */
 430#define I915_GEM_HWS_INDEX              0x30
 431#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 432#define I915_GEM_HWS_SCRATCH_INDEX      0x40
 433#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 434
 435struct intel_ringbuffer *
 436intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size);
 437int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 438                                     struct intel_ringbuffer *ringbuf);
 439void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 440void intel_ringbuffer_free(struct intel_ringbuffer *ring);
 441
 442void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 443void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 444
 445int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 446
 447int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 448int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 449static inline void intel_ring_emit(struct intel_engine_cs *ring,
 450                                   u32 data)
 451{
 452        struct intel_ringbuffer *ringbuf = ring->buffer;
 453        iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 454        ringbuf->tail += 4;
 455}
 456static inline void intel_ring_emit_reg(struct intel_engine_cs *ring,
 457                                       i915_reg_t reg)
 458{
 459        intel_ring_emit(ring, i915_mmio_reg_offset(reg));
 460}
 461static inline void intel_ring_advance(struct intel_engine_cs *ring)
 462{
 463        struct intel_ringbuffer *ringbuf = ring->buffer;
 464        ringbuf->tail &= ringbuf->size - 1;
 465}
 466int __intel_ring_space(int head, int tail, int size);
 467void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
 468int intel_ring_space(struct intel_ringbuffer *ringbuf);
 469bool intel_ring_stopped(struct intel_engine_cs *ring);
 470
 471int __must_check intel_ring_idle(struct intel_engine_cs *ring);
 472void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 473int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
 474int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
 475
 476void intel_fini_pipe_control(struct intel_engine_cs *ring);
 477int intel_init_pipe_control(struct intel_engine_cs *ring);
 478
 479int intel_init_render_ring_buffer(struct drm_device *dev);
 480int intel_init_bsd_ring_buffer(struct drm_device *dev);
 481int intel_init_bsd2_ring_buffer(struct drm_device *dev);
 482int intel_init_blt_ring_buffer(struct drm_device *dev);
 483int intel_init_vebox_ring_buffer(struct drm_device *dev);
 484
 485u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
 486
 487int init_workarounds_ring(struct intel_engine_cs *ring);
 488
 489static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 490{
 491        return ringbuf->tail;
 492}
 493
 494/*
 495 * Arbitrary size for largest possible 'add request' sequence. The code paths
 496 * are complex and variable. Empirical measurement shows that the worst case
 497 * is ILK at 136 words. Reserving too much is better than reserving too little
 498 * as that allows for corner cases that might have been missed. So the figure
 499 * has been rounded up to 160 words.
 500 */
 501#define MIN_SPACE_FOR_ADD_REQUEST       160
 502
 503/*
 504 * Reserve space in the ring to guarantee that the i915_add_request() call
 505 * will always have sufficient room to do its stuff. The request creation
 506 * code calls this automatically.
 507 */
 508void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
 509/* Cancel the reservation, e.g. because the request is being discarded. */
 510void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
 511/* Use the reserved space - for use by i915_add_request() only. */
 512void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
 513/* Finish with the reserved space - for use by i915_add_request() only. */
 514void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
 515
 516/* Legacy ringbuffer specific portion of reservation code: */
 517int intel_ring_reserve_space(struct drm_i915_gem_request *request);
 518
 519#endif /* _INTEL_RINGBUFFER_H_ */
 520