LXR linux/drivers/gpu/drm/i915/gt/gen8_engine

   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014 Intel Corporation
   4 */
   5
   6#include "gen8_engine_cs.h"
   7#include "i915_drv.h"
   8#include "intel_lrc.h"
   9#include "intel_gpu_commands.h"
  10#include "intel_ring.h"
  11
  12int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
  13{
  14        bool vf_flush_wa = false, dc_flush_wa = false;
  15        u32 *cs, flags = 0;
  16        int len;
  17
  18        flags |= PIPE_CONTROL_CS_STALL;
  19
  20        if (mode & EMIT_FLUSH) {
  21                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  22                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  23                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  24                flags |= PIPE_CONTROL_FLUSH_ENABLE;
  25        }
  26
  27        if (mode & EMIT_INVALIDATE) {
  28                flags |= PIPE_CONTROL_TLB_INVALIDATE;
  29                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  30                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  31                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  32                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  33                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  34                flags |= PIPE_CONTROL_QW_WRITE;
  35                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
  36
  37                /*
  38                 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
  39                 * pipe control.
  40                 */
  41                if (GRAPHICS_VER(rq->engine->i915) == 9)
  42                        vf_flush_wa = true;
  43
  44                /* WaForGAMHang:kbl */
  45                if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0))
  46                        dc_flush_wa = true;
  47        }
  48
  49        len = 6;
  50
  51        if (vf_flush_wa)
  52                len += 6;
  53
  54        if (dc_flush_wa)
  55                len += 12;
  56
  57        cs = intel_ring_begin(rq, len);
  58        if (IS_ERR(cs))
  59                return PTR_ERR(cs);
  60
  61        if (vf_flush_wa)
  62                cs = gen8_emit_pipe_control(cs, 0, 0);
  63
  64        if (dc_flush_wa)
  65                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
  66                                            0);
  67
  68        cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
  69
  70        if (dc_flush_wa)
  71                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
  72
  73        intel_ring_advance(rq, cs);
  74
  75        return 0;
  76}
  77
  78int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
  79{
  80        u32 cmd, *cs;
  81
  82        cs = intel_ring_begin(rq, 4);
  83        if (IS_ERR(cs))
  84                return PTR_ERR(cs);
  85
  86        cmd = MI_FLUSH_DW + 1;
  87
  88        /*
  89         * We always require a command barrier so that subsequent
  90         * commands, such as breadcrumb interrupts, are strictly ordered
  91         * wrt the contents of the write cache being flushed to memory
  92         * (and thus being coherent from the CPU).
  93         */
  94        cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  95
  96        if (mode & EMIT_INVALIDATE) {
  97                cmd |= MI_INVALIDATE_TLB;
  98                if (rq->engine->class == VIDEO_DECODE_CLASS)
  99                        cmd |= MI_INVALIDATE_BSD;
 100        }
 101
 102        *cs++ = cmd;
 103        *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
 104        *cs++ = 0; /* upper addr */
 105        *cs++ = 0; /* value */
 106        intel_ring_advance(rq, cs);
 107
 108        return 0;
 109}
 110
 111int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
 112{
 113        if (mode & EMIT_FLUSH) {
 114                u32 *cs;
 115                u32 flags = 0;
 116
 117                flags |= PIPE_CONTROL_CS_STALL;
 118
 119                flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
 120                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 121                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 122                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 123                flags |= PIPE_CONTROL_FLUSH_ENABLE;
 124                flags |= PIPE_CONTROL_QW_WRITE;
 125                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 126
 127                cs = intel_ring_begin(rq, 6);
 128                if (IS_ERR(cs))
 129                        return PTR_ERR(cs);
 130
 131                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 132                intel_ring_advance(rq, cs);
 133        }
 134
 135        if (mode & EMIT_INVALIDATE) {
 136                u32 *cs;
 137                u32 flags = 0;
 138
 139                flags |= PIPE_CONTROL_CS_STALL;
 140
 141                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
 142                flags |= PIPE_CONTROL_TLB_INVALIDATE;
 143                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 144                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 145                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 146                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 147                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 148                flags |= PIPE_CONTROL_QW_WRITE;
 149                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 150
 151                cs = intel_ring_begin(rq, 6);
 152                if (IS_ERR(cs))
 153                        return PTR_ERR(cs);
 154
 155                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 156                intel_ring_advance(rq, cs);
 157        }
 158
 159        return 0;
 160}
 161
 162static u32 preparser_disable(bool state)
 163{
 164        return MI_ARB_CHECK | 1 << 8 | state;
 165}
 166
 167static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
 168{
 169        static const i915_reg_t vd[] = {
 170                GEN12_VD0_AUX_NV,
 171                GEN12_VD1_AUX_NV,
 172                GEN12_VD2_AUX_NV,
 173                GEN12_VD3_AUX_NV,
 174        };
 175
 176        static const i915_reg_t ve[] = {
 177                GEN12_VE0_AUX_NV,
 178                GEN12_VE1_AUX_NV,
 179        };
 180
 181        if (engine->class == VIDEO_DECODE_CLASS)
 182                return vd[engine->instance];
 183
 184        if (engine->class == VIDEO_ENHANCEMENT_CLASS)
 185                return ve[engine->instance];
 186
 187        GEM_BUG_ON("unknown aux_inv reg\n");
 188        return INVALID_MMIO_REG;
 189}
 190
 191static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
 192{
 193        *cs++ = MI_LOAD_REGISTER_IMM(1);
 194        *cs++ = i915_mmio_reg_offset(inv_reg);
 195        *cs++ = AUX_INV;
 196        *cs++ = MI_NOOP;
 197
 198        return cs;
 199}
 200
 201int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 202{
 203        if (mode & EMIT_FLUSH) {
 204                u32 flags = 0;
 205                u32 *cs;
 206
 207                flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
 208                flags |= PIPE_CONTROL_FLUSH_L3;
 209                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
 210                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 211                /* Wa_1409600907:tgl,adl-p */
 212                flags |= PIPE_CONTROL_DEPTH_STALL;
 213                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
 214                flags |= PIPE_CONTROL_FLUSH_ENABLE;
 215
 216                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 217                flags |= PIPE_CONTROL_QW_WRITE;
 218
 219                flags |= PIPE_CONTROL_CS_STALL;
 220
 221                cs = intel_ring_begin(rq, 6);
 222                if (IS_ERR(cs))
 223                        return PTR_ERR(cs);
 224
 225                cs = gen12_emit_pipe_control(cs,
 226                                             PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
 227                                             flags, LRC_PPHWSP_SCRATCH_ADDR);
 228                intel_ring_advance(rq, cs);
 229        }
 230
 231        if (mode & EMIT_INVALIDATE) {
 232                u32 flags = 0;
 233                u32 *cs;
 234
 235                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
 236                flags |= PIPE_CONTROL_TLB_INVALIDATE;
 237                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
 238                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
 239                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 240                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 241                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 242
 243                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
 244                flags |= PIPE_CONTROL_QW_WRITE;
 245
 246                flags |= PIPE_CONTROL_CS_STALL;
 247
 248                cs = intel_ring_begin(rq, 8 + 4);
 249                if (IS_ERR(cs))
 250                        return PTR_ERR(cs);
 251
 252                /*
 253                 * Prevent the pre-parser from skipping past the TLB
 254                 * invalidate and loading a stale page for the batch
 255                 * buffer / request payload.
 256                 */
 257                *cs++ = preparser_disable(true);
 258
 259                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 260
 261                /* hsdes: 1809175790 */
 262                cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
 263
 264                *cs++ = preparser_disable(false);
 265                intel_ring_advance(rq, cs);
 266        }
 267
 268        return 0;
 269}
 270
 271int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 272{
 273        intel_engine_mask_t aux_inv = 0;
 274        u32 cmd, *cs;
 275
 276        cmd = 4;
 277        if (mode & EMIT_INVALIDATE)
 278                cmd += 2;
 279        if (mode & EMIT_INVALIDATE)
 280                aux_inv = rq->engine->mask & ~BIT(BCS0);
 281        if (aux_inv)
 282                cmd += 2 * hweight32(aux_inv) + 2;
 283
 284        cs = intel_ring_begin(rq, cmd);
 285        if (IS_ERR(cs))
 286                return PTR_ERR(cs);
 287
 288        if (mode & EMIT_INVALIDATE)
 289                *cs++ = preparser_disable(true);
 290
 291        cmd = MI_FLUSH_DW + 1;
 292
 293        /*
 294         * We always require a command barrier so that subsequent
 295         * commands, such as breadcrumb interrupts, are strictly ordered
 296         * wrt the contents of the write cache being flushed to memory
 297         * (and thus being coherent from the CPU).
 298         */
 299        cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 300
 301        if (mode & EMIT_INVALIDATE) {
 302                cmd |= MI_INVALIDATE_TLB;
 303                if (rq->engine->class == VIDEO_DECODE_CLASS)
 304                        cmd |= MI_INVALIDATE_BSD;
 305        }
 306
 307        *cs++ = cmd;
 308        *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
 309        *cs++ = 0; /* upper addr */
 310        *cs++ = 0; /* value */
 311
 312        if (aux_inv) { /* hsdes: 1809175790 */
 313                struct intel_engine_cs *engine;
 314                unsigned int tmp;
 315
 316                *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
 317                for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
 318                        *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
 319                        *cs++ = AUX_INV;
 320                }
 321                *cs++ = MI_NOOP;
 322        }
 323
 324        if (mode & EMIT_INVALIDATE)
 325                *cs++ = preparser_disable(false);
 326
 327        intel_ring_advance(rq, cs);
 328
 329        return 0;
 330}
 331
 332static u32 preempt_address(struct intel_engine_cs *engine)
 333{
 334        return (i915_ggtt_offset(engine->status_page.vma) +
 335                I915_GEM_HWS_PREEMPT_ADDR);
 336}
 337
 338static u32 hwsp_offset(const struct i915_request *rq)
 339{
 340        const struct intel_timeline *tl;
 341
 342        /* Before the request is executed, the timeline is fixed */
 343        tl = rcu_dereference_protected(rq->timeline,
 344                                       !i915_request_signaled(rq));
 345
 346        /* See the comment in i915_request_active_seqno(). */
 347        return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
 348}
 349
 350int gen8_emit_init_breadcrumb(struct i915_request *rq)
 351{
 352        u32 *cs;
 353
 354        GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
 355        if (!i915_request_timeline(rq)->has_initial_breadcrumb)
 356                return 0;
 357
 358        cs = intel_ring_begin(rq, 6);
 359        if (IS_ERR(cs))
 360                return PTR_ERR(cs);
 361
 362        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 363        *cs++ = hwsp_offset(rq);
 364        *cs++ = 0;
 365        *cs++ = rq->fence.seqno - 1;
 366
 367        /*
 368         * Check if we have been preempted before we even get started.
 369         *
 370         * After this point i915_request_started() reports true, even if
 371         * we get preempted and so are no longer running.
 372         *
 373         * i915_request_started() is used during preemption processing
 374         * to decide if the request is currently inside the user payload
 375         * or spinning on a kernel semaphore (or earlier). For no-preemption
 376         * requests, we do allow preemption on the semaphore before the user
 377         * payload, but do not allow preemption once the request is started.
 378         *
 379         * i915_request_started() is similarly used during GPU hangs to
 380         * determine if the user's payload was guilty, and if so, the
 381         * request is banned. Before the request is started, it is assumed
 382         * to be unharmed and an innocent victim of another's hang.
 383         */
 384        *cs++ = MI_NOOP;
 385        *cs++ = MI_ARB_CHECK;
 386
 387        intel_ring_advance(rq, cs);
 388
 389        /* Record the updated position of the request's payload */
 390        rq->infix = intel_ring_offset(rq, cs);
 391
 392        __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
 393
 394        return 0;
 395}
 396
 397int gen8_emit_bb_start_noarb(struct i915_request *rq,
 398                             u64 offset, u32 len,
 399                             const unsigned int flags)
 400{
 401        u32 *cs;
 402
 403        cs = intel_ring_begin(rq, 4);
 404        if (IS_ERR(cs))
 405                return PTR_ERR(cs);
 406
 407        /*
 408         * WaDisableCtxRestoreArbitration:bdw,chv
 409         *
 410         * We don't need to perform MI_ARB_ENABLE as often as we do (in
 411         * particular all the gen that do not need the w/a at all!), if we
 412         * took care to make sure that on every switch into this context
 413         * (both ordinary and for preemption) that arbitrartion was enabled
 414         * we would be fine.  However, for gen8 there is another w/a that
 415         * requires us to not preempt inside GPGPU execution, so we keep
 416         * arbitration disabled for gen8 batches. Arbitration will be
 417         * re-enabled before we close the request
 418         * (engine->emit_fini_breadcrumb).
 419         */
 420        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 421
 422        /* FIXME(BDW+): Address space and security selectors. */
 423        *cs++ = MI_BATCH_BUFFER_START_GEN8 |
 424                (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
 425        *cs++ = lower_32_bits(offset);
 426        *cs++ = upper_32_bits(offset);
 427
 428        intel_ring_advance(rq, cs);
 429
 430        return 0;
 431}
 432
 433int gen8_emit_bb_start(struct i915_request *rq,
 434                       u64 offset, u32 len,
 435                       const unsigned int flags)
 436{
 437        u32 *cs;
 438
 439        if (unlikely(i915_request_has_nopreempt(rq)))
 440                return gen8_emit_bb_start_noarb(rq, offset, len, flags);
 441
 442        cs = intel_ring_begin(rq, 6);
 443        if (IS_ERR(cs))
 444                return PTR_ERR(cs);
 445
 446        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 447
 448        *cs++ = MI_BATCH_BUFFER_START_GEN8 |
 449                (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
 450        *cs++ = lower_32_bits(offset);
 451        *cs++ = upper_32_bits(offset);
 452
 453        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 454        *cs++ = MI_NOOP;
 455
 456        intel_ring_advance(rq, cs);
 457
 458        return 0;
 459}
 460
 461static void assert_request_valid(struct i915_request *rq)
 462{
 463        struct intel_ring *ring __maybe_unused = rq->ring;
 464
 465        /* Can we unwind this request without appearing to go forwards? */
 466        GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
 467}
 468
 469/*
 470 * Reserve space for 2 NOOPs at the end of each request to be
 471 * used as a workaround for not being allowed to do lite
 472 * restore with HEAD==TAIL (WaIdleLiteRestore).
 473 */
 474static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
 475{
 476        /* Ensure there's always at least one preemption point per-request. */
 477        *cs++ = MI_ARB_CHECK;
 478        *cs++ = MI_NOOP;
 479        rq->wa_tail = intel_ring_offset(rq, cs);
 480
 481        /* Check that entire request is less than half the ring */
 482        assert_request_valid(rq);
 483
 484        return cs;
 485}
 486
 487static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
 488{
 489        *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
 490        *cs++ = MI_SEMAPHORE_WAIT |
 491                MI_SEMAPHORE_GLOBAL_GTT |
 492                MI_SEMAPHORE_POLL |
 493                MI_SEMAPHORE_SAD_EQ_SDD;
 494        *cs++ = 0;
 495        *cs++ = preempt_address(rq->engine);
 496        *cs++ = 0;
 497        *cs++ = MI_NOOP;
 498
 499        return cs;
 500}
 501
 502static __always_inline u32*
 503gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 504{
 505        *cs++ = MI_USER_INTERRUPT;
 506
 507        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 508        if (intel_engine_has_semaphores(rq->engine) &&
 509            !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
 510                cs = emit_preempt_busywait(rq, cs);
 511
 512        rq->tail = intel_ring_offset(rq, cs);
 513        assert_ring_tail_valid(rq->ring, rq->tail);
 514
 515        return gen8_emit_wa_tail(rq, cs);
 516}
 517
 518static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 519{
 520        return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
 521}
 522
 523u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 524{
 525        return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
 526}
 527
 528u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 529{
 530        cs = gen8_emit_pipe_control(cs,
 531                                    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 532                                    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 533                                    PIPE_CONTROL_DC_FLUSH_ENABLE,
 534                                    0);
 535
 536        /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
 537        cs = gen8_emit_ggtt_write_rcs(cs,
 538                                      rq->fence.seqno,
 539                                      hwsp_offset(rq),
 540                                      PIPE_CONTROL_FLUSH_ENABLE |
 541                                      PIPE_CONTROL_CS_STALL);
 542
 543        return gen8_emit_fini_breadcrumb_tail(rq, cs);
 544}
 545
 546u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 547{
 548        cs = gen8_emit_ggtt_write_rcs(cs,
 549                                      rq->fence.seqno,
 550                                      hwsp_offset(rq),
 551                                      PIPE_CONTROL_CS_STALL |
 552                                      PIPE_CONTROL_TILE_CACHE_FLUSH |
 553                                      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 554                                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 555                                      PIPE_CONTROL_DC_FLUSH_ENABLE |
 556                                      PIPE_CONTROL_FLUSH_ENABLE);
 557
 558        return gen8_emit_fini_breadcrumb_tail(rq, cs);
 559}
 560
 561/*
 562 * Note that the CS instruction pre-parser will not stall on the breadcrumb
 563 * flush and will continue pre-fetching the instructions after it before the
 564 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
 565 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
 566 * of the next request before the memory has been flushed, we're guaranteed that
 567 * we won't access the batch itself too early.
 568 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
 569 * so, if the current request is modifying an instruction in the next request on
 570 * the same intel_context, we might pre-fetch and then execute the pre-update
 571 * instruction. To avoid this, the users of self-modifying code should either
 572 * disable the parser around the code emitting the memory writes, via a new flag
 573 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
 574 * the in-kernel use-cases we've opted to use a separate context, see
 575 * reloc_gpu() as an example.
 576 * All the above applies only to the instructions themselves. Non-inline data
 577 * used by the instructions is not pre-fetched.
 578 */
 579
 580static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
 581{
 582        *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
 583        *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
 584                MI_SEMAPHORE_GLOBAL_GTT |
 585                MI_SEMAPHORE_POLL |
 586                MI_SEMAPHORE_SAD_EQ_SDD;
 587        *cs++ = 0;
 588        *cs++ = preempt_address(rq->engine);
 589        *cs++ = 0;
 590        *cs++ = 0;
 591
 592        return cs;
 593}
 594
 595static __always_inline u32*
 596gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
 597{
 598        *cs++ = MI_USER_INTERRUPT;
 599
 600        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 601        if (intel_engine_has_semaphores(rq->engine) &&
 602            !intel_uc_uses_guc_submission(&rq->engine->gt->uc))
 603                cs = gen12_emit_preempt_busywait(rq, cs);
 604
 605        rq->tail = intel_ring_offset(rq, cs);
 606        assert_ring_tail_valid(rq->ring, rq->tail);
 607
 608        return gen8_emit_wa_tail(rq, cs);
 609}
 610
 611u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
 612{
 613        /* XXX Stalling flush before seqno write; post-sync not */
 614        cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
 615        return gen12_emit_fini_breadcrumb_tail(rq, cs);
 616}
 617
 618u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 619{
 620        cs = gen12_emit_ggtt_write_rcs(cs,
 621                                       rq->fence.seqno,
 622                                       hwsp_offset(rq),
 623                                       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
 624                                       PIPE_CONTROL_CS_STALL |
 625                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
 626                                       PIPE_CONTROL_FLUSH_L3 |
 627                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 628                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 629                                       /* Wa_1409600907:tgl */
 630                                       PIPE_CONTROL_DEPTH_STALL |
 631                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
 632                                       PIPE_CONTROL_FLUSH_ENABLE);
 633
 634        return gen12_emit_fini_breadcrumb_tail(rq, cs);
 635}
 636