linux/drivers/gpu/drm/i915/intel_ringbuffer.h
<<
>>
Prefs
   1#ifndef _INTEL_RINGBUFFER_H_
   2#define _INTEL_RINGBUFFER_H_
   3
   4#include <linux/hashtable.h>
   5
   6#define I915_CMD_HASH_ORDER 9
   7
   8/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
   9 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  10 * to give some inclination as to some of the magic values used in the various
  11 * workarounds!
  12 */
  13#define CACHELINE_BYTES 64
  14
  15/*
  16 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  17 * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
  18 * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
  19 *
  20 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
  21 * cacheline, the Head Pointer must not be greater than the Tail
  22 * Pointer."
  23 */
  24#define I915_RING_FREE_SPACE 64
  25
  26struct  intel_hw_status_page {
  27        u32             *page_addr;
  28        unsigned int    gfx_addr;
  29        struct          drm_i915_gem_object *obj;
  30};
  31
  32#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
  33#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
  34
  35#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base))
  36#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
  37
  38#define I915_READ_HEAD(ring)  I915_READ(RING_HEAD((ring)->mmio_base))
  39#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
  40
  41#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base))
  42#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
  43
  44#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
  45#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
  46
  47#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base))
  48#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val)
  49
  50/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  51 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  52 */
  53#define i915_semaphore_seqno_size sizeof(uint64_t)
  54#define GEN8_SIGNAL_OFFSET(__ring, to)                       \
  55        (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  56        ((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) +   \
  57        (i915_semaphore_seqno_size * (to)))
  58
  59#define GEN8_WAIT_OFFSET(__ring, from)                       \
  60        (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  61        ((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
  62        (i915_semaphore_seqno_size * (__ring)->id))
  63
  64#define GEN8_RING_SEMAPHORE_INIT do { \
  65        if (!dev_priv->semaphore_obj) { \
  66                break; \
  67        } \
  68        ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
  69        ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
  70        ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
  71        ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
  72        ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \
  73        ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
  74        } while(0)
  75
  76enum intel_ring_hangcheck_action {
  77        HANGCHECK_IDLE = 0,
  78        HANGCHECK_WAIT,
  79        HANGCHECK_ACTIVE,
  80        HANGCHECK_ACTIVE_LOOP,
  81        HANGCHECK_KICK,
  82        HANGCHECK_HUNG,
  83};
  84
  85#define HANGCHECK_SCORE_RING_HUNG 31
  86
  87struct intel_ring_hangcheck {
  88        u64 acthd;
  89        u64 max_acthd;
  90        u32 seqno;
  91        int score;
  92        enum intel_ring_hangcheck_action action;
  93        int deadlock;
  94};
  95
  96struct intel_ringbuffer {
  97        struct drm_i915_gem_object *obj;
  98        void __iomem *virtual_start;
  99
 100        struct intel_engine_cs *ring;
 101
 102        u32 head;
 103        u32 tail;
 104        int space;
 105        int size;
 106        int effective_size;
 107
 108        /** We track the position of the requests in the ring buffer, and
 109         * when each is retired we increment last_retired_head as the GPU
 110         * must have finished processing the request and so we know we
 111         * can advance the ringbuffer up to that position.
 112         *
 113         * last_retired_head is set to -1 after the value is consumed so
 114         * we can detect new retirements.
 115         */
 116        u32 last_retired_head;
 117};
 118
 119struct  intel_context;
 120
 121struct  intel_engine_cs {
 122        const char      *name;
 123        enum intel_ring_id {
 124                RCS = 0x0,
 125                VCS,
 126                BCS,
 127                VECS,
 128                VCS2
 129        } id;
 130#define I915_NUM_RINGS 5
 131#define LAST_USER_RING (VECS + 1)
 132        u32             mmio_base;
 133        struct          drm_device *dev;
 134        struct intel_ringbuffer *buffer;
 135
 136        struct intel_hw_status_page status_page;
 137
 138        unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 139        u32             irq_enable_mask;        /* bitmask to enable ring interrupt */
 140        struct drm_i915_gem_request *trace_irq_req;
 141        bool __must_check (*irq_get)(struct intel_engine_cs *ring);
 142        void            (*irq_put)(struct intel_engine_cs *ring);
 143
 144        int             (*init_hw)(struct intel_engine_cs *ring);
 145
 146        int             (*init_context)(struct intel_engine_cs *ring,
 147                                        struct intel_context *ctx);
 148
 149        void            (*write_tail)(struct intel_engine_cs *ring,
 150                                      u32 value);
 151        int __must_check (*flush)(struct intel_engine_cs *ring,
 152                                  u32   invalidate_domains,
 153                                  u32   flush_domains);
 154        int             (*add_request)(struct intel_engine_cs *ring);
 155        /* Some chipsets are not quite as coherent as advertised and need
 156         * an expensive kick to force a true read of the up-to-date seqno.
 157         * However, the up-to-date seqno is not always required and the last
 158         * seen value is good enough. Note that the seqno will always be
 159         * monotonic, even if not coherent.
 160         */
 161        u32             (*get_seqno)(struct intel_engine_cs *ring,
 162                                     bool lazy_coherency);
 163        void            (*set_seqno)(struct intel_engine_cs *ring,
 164                                     u32 seqno);
 165        int             (*dispatch_execbuffer)(struct intel_engine_cs *ring,
 166                                               u64 offset, u32 length,
 167                                               unsigned dispatch_flags);
 168#define I915_DISPATCH_SECURE 0x1
 169#define I915_DISPATCH_PINNED 0x2
 170        void            (*cleanup)(struct intel_engine_cs *ring);
 171
 172        /* GEN8 signal/wait table - never trust comments!
 173         *        signal to     signal to    signal to   signal to      signal to
 174         *          RCS            VCS          BCS        VECS          VCS2
 175         *      --------------------------------------------------------------------
 176         *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
 177         *      |-------------------------------------------------------------------
 178         *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
 179         *      |-------------------------------------------------------------------
 180         *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
 181         *      |-------------------------------------------------------------------
 182         * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
 183         *      |-------------------------------------------------------------------
 184         * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
 185         *      |-------------------------------------------------------------------
 186         *
 187         * Generalization:
 188         *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
 189         *  ie. transpose of g(x, y)
 190         *
 191         *       sync from      sync from    sync from    sync from     sync from
 192         *          RCS            VCS          BCS        VECS          VCS2
 193         *      --------------------------------------------------------------------
 194         *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
 195         *      |-------------------------------------------------------------------
 196         *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
 197         *      |-------------------------------------------------------------------
 198         *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
 199         *      |-------------------------------------------------------------------
 200         * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
 201         *      |-------------------------------------------------------------------
 202         * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
 203         *      |-------------------------------------------------------------------
 204         *
 205         * Generalization:
 206         *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
 207         *  ie. transpose of f(x, y)
 208         */
 209        struct {
 210                u32     sync_seqno[I915_NUM_RINGS-1];
 211
 212                union {
 213                        struct {
 214                                /* our mbox written by others */
 215                                u32             wait[I915_NUM_RINGS];
 216                                /* mboxes this ring signals to */
 217                                u32             signal[I915_NUM_RINGS];
 218                        } mbox;
 219                        u64             signal_ggtt[I915_NUM_RINGS];
 220                };
 221
 222                /* AKA wait() */
 223                int     (*sync_to)(struct intel_engine_cs *ring,
 224                                   struct intel_engine_cs *to,
 225                                   u32 seqno);
 226                int     (*signal)(struct intel_engine_cs *signaller,
 227                                  /* num_dwords needed by caller */
 228                                  unsigned int num_dwords);
 229        } semaphore;
 230
 231        /* Execlists */
 232        spinlock_t execlist_lock;
 233        struct list_head execlist_queue;
 234        struct list_head execlist_retired_req_list;
 235        u8 next_context_status_buffer;
 236        u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 237        int             (*emit_request)(struct intel_ringbuffer *ringbuf,
 238                                        struct drm_i915_gem_request *request);
 239        int             (*emit_flush)(struct intel_ringbuffer *ringbuf,
 240                                      struct intel_context *ctx,
 241                                      u32 invalidate_domains,
 242                                      u32 flush_domains);
 243        int             (*emit_bb_start)(struct intel_ringbuffer *ringbuf,
 244                                         struct intel_context *ctx,
 245                                         u64 offset, unsigned dispatch_flags);
 246
 247        /**
 248         * List of objects currently involved in rendering from the
 249         * ringbuffer.
 250         *
 251         * Includes buffers having the contents of their GPU caches
 252         * flushed, not necessarily primitives.  last_read_req
 253         * represents when the rendering involved will be completed.
 254         *
 255         * A reference is held on the buffer while on this list.
 256         */
 257        struct list_head active_list;
 258
 259        /**
 260         * List of breadcrumbs associated with GPU requests currently
 261         * outstanding.
 262         */
 263        struct list_head request_list;
 264
 265        /**
 266         * Do we have some not yet emitted requests outstanding?
 267         */
 268        struct drm_i915_gem_request *outstanding_lazy_request;
 269        bool gpu_caches_dirty;
 270
 271        wait_queue_head_t irq_queue;
 272
 273        struct intel_context *default_context;
 274        struct intel_context *last_context;
 275
 276        struct intel_ring_hangcheck hangcheck;
 277
 278        struct {
 279                struct drm_i915_gem_object *obj;
 280                u32 gtt_offset;
 281                volatile u32 *cpu_page;
 282        } scratch;
 283
 284        bool needs_cmd_parser;
 285
 286        /*
 287         * Table of commands the command parser needs to know about
 288         * for this ring.
 289         */
 290        DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
 291
 292        /*
 293         * Table of registers allowed in commands that read/write registers.
 294         */
 295        const u32 *reg_table;
 296        int reg_count;
 297
 298        /*
 299         * Table of registers allowed in commands that read/write registers, but
 300         * only from the DRM master.
 301         */
 302        const u32 *master_reg_table;
 303        int master_reg_count;
 304
 305        /*
 306         * Returns the bitmask for the length field of the specified command.
 307         * Return 0 for an unrecognized/invalid command.
 308         *
 309         * If the command parser finds an entry for a command in the ring's
 310         * cmd_tables, it gets the command's length based on the table entry.
 311         * If not, it calls this function to determine the per-ring length field
 312         * encoding for the command (i.e. certain opcode ranges use certain bits
 313         * to encode the command length in the header).
 314         */
 315        u32 (*get_cmd_length_mask)(u32 cmd_header);
 316};
 317
 318bool intel_ring_initialized(struct intel_engine_cs *ring);
 319
 320static inline unsigned
 321intel_ring_flag(struct intel_engine_cs *ring)
 322{
 323        return 1 << ring->id;
 324}
 325
 326static inline u32
 327intel_ring_sync_index(struct intel_engine_cs *ring,
 328                      struct intel_engine_cs *other)
 329{
 330        int idx;
 331
 332        /*
 333         * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
 334         * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
 335         * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
 336         * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
 337         * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
 338         */
 339
 340        idx = (other - ring) - 1;
 341        if (idx < 0)
 342                idx += I915_NUM_RINGS;
 343
 344        return idx;
 345}
 346
 347static inline u32
 348intel_read_status_page(struct intel_engine_cs *ring,
 349                       int reg)
 350{
 351        /* Ensure that the compiler doesn't optimize away the load. */
 352        barrier();
 353        return ring->status_page.page_addr[reg];
 354}
 355
 356static inline void
 357intel_write_status_page(struct intel_engine_cs *ring,
 358                        int reg, u32 value)
 359{
 360        ring->status_page.page_addr[reg] = value;
 361}
 362
 363/**
 364 * Reads a dword out of the status page, which is written to from the command
 365 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
 366 * MI_STORE_DATA_IMM.
 367 *
 368 * The following dwords have a reserved meaning:
 369 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
 370 * 0x04: ring 0 head pointer
 371 * 0x05: ring 1 head pointer (915-class)
 372 * 0x06: ring 2 head pointer (915-class)
 373 * 0x10-0x1b: Context status DWords (GM45)
 374 * 0x1f: Last written status offset. (GM45)
 375 * 0x20-0x2f: Reserved (Gen6+)
 376 *
 377 * The area from dword 0x30 to 0x3ff is available for driver usage.
 378 */
 379#define I915_GEM_HWS_INDEX              0x30
 380#define I915_GEM_HWS_SCRATCH_INDEX      0x40
 381#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 382
 383void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 384int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 385                                     struct intel_ringbuffer *ringbuf);
 386void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 387int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 388                               struct intel_ringbuffer *ringbuf);
 389
 390void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 391void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 392
 393int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
 394int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
 395static inline void intel_ring_emit(struct intel_engine_cs *ring,
 396                                   u32 data)
 397{
 398        struct intel_ringbuffer *ringbuf = ring->buffer;
 399        iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 400        ringbuf->tail += 4;
 401}
 402static inline void intel_ring_advance(struct intel_engine_cs *ring)
 403{
 404        struct intel_ringbuffer *ringbuf = ring->buffer;
 405        ringbuf->tail &= ringbuf->size - 1;
 406}
 407int __intel_ring_space(int head, int tail, int size);
 408void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
 409int intel_ring_space(struct intel_ringbuffer *ringbuf);
 410bool intel_ring_stopped(struct intel_engine_cs *ring);
 411void __intel_ring_advance(struct intel_engine_cs *ring);
 412
 413int __must_check intel_ring_idle(struct intel_engine_cs *ring);
 414void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 415int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
 416int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
 417
 418void intel_fini_pipe_control(struct intel_engine_cs *ring);
 419int intel_init_pipe_control(struct intel_engine_cs *ring);
 420
 421int intel_init_render_ring_buffer(struct drm_device *dev);
 422int intel_init_bsd_ring_buffer(struct drm_device *dev);
 423int intel_init_bsd2_ring_buffer(struct drm_device *dev);
 424int intel_init_blt_ring_buffer(struct drm_device *dev);
 425int intel_init_vebox_ring_buffer(struct drm_device *dev);
 426
 427u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
 428
 429int init_workarounds_ring(struct intel_engine_cs *ring);
 430
 431static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 432{
 433        return ringbuf->tail;
 434}
 435
 436static inline struct drm_i915_gem_request *
 437intel_ring_get_request(struct intel_engine_cs *ring)
 438{
 439        BUG_ON(ring->outstanding_lazy_request == NULL);
 440        return ring->outstanding_lazy_request;
 441}
 442
 443#endif /* _INTEL_RINGBUFFER_H_ */
 444