linux/drivers/gpu/drm/i915/gt/intel_ring_submission.c
<<
>>
Prefs
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2008-2021 Intel Corporation
   4 */
   5
   6#include "gen2_engine_cs.h"
   7#include "gen6_engine_cs.h"
   8#include "gen6_ppgtt.h"
   9#include "gen7_renderclear.h"
  10#include "i915_drv.h"
  11#include "i915_mitigations.h"
  12#include "intel_breadcrumbs.h"
  13#include "intel_context.h"
  14#include "intel_gt.h"
  15#include "intel_reset.h"
  16#include "intel_ring.h"
  17#include "shmem_utils.h"
  18
  19/* Rough estimate of the typical request size, performing a flush,
  20 * set-context and then emitting the batch.
  21 */
  22#define LEGACY_REQUEST_SIZE 200
  23
  24static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
  25{
  26        /*
  27         * Keep the render interrupt unmasked as this papers over
  28         * lost interrupts following a reset.
  29         */
  30        if (engine->class == RENDER_CLASS) {
  31                if (INTEL_GEN(engine->i915) >= 6)
  32                        mask &= ~BIT(0);
  33                else
  34                        mask &= ~I915_USER_INTERRUPT;
  35        }
  36
  37        intel_engine_set_hwsp_writemask(engine, mask);
  38}
  39
  40static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
  41{
  42        u32 addr;
  43
  44        addr = lower_32_bits(phys);
  45        if (INTEL_GEN(engine->i915) >= 4)
  46                addr |= (phys >> 28) & 0xf0;
  47
  48        intel_uncore_write(engine->uncore, HWS_PGA, addr);
  49}
  50
  51static struct page *status_page(struct intel_engine_cs *engine)
  52{
  53        struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
  54
  55        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  56        return sg_page(obj->mm.pages->sgl);
  57}
  58
  59static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
  60{
  61        set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
  62        set_hwstam(engine, ~0u);
  63}
  64
  65static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
  66{
  67        i915_reg_t hwsp;
  68
  69        /*
  70         * The ring status page addresses are no longer next to the rest of
  71         * the ring registers as of gen7.
  72         */
  73        if (IS_GEN(engine->i915, 7)) {
  74                switch (engine->id) {
  75                /*
  76                 * No more rings exist on Gen7. Default case is only to shut up
  77                 * gcc switch check warning.
  78                 */
  79                default:
  80                        GEM_BUG_ON(engine->id);
  81                        fallthrough;
  82                case RCS0:
  83                        hwsp = RENDER_HWS_PGA_GEN7;
  84                        break;
  85                case BCS0:
  86                        hwsp = BLT_HWS_PGA_GEN7;
  87                        break;
  88                case VCS0:
  89                        hwsp = BSD_HWS_PGA_GEN7;
  90                        break;
  91                case VECS0:
  92                        hwsp = VEBOX_HWS_PGA_GEN7;
  93                        break;
  94                }
  95        } else if (IS_GEN(engine->i915, 6)) {
  96                hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
  97        } else {
  98                hwsp = RING_HWS_PGA(engine->mmio_base);
  99        }
 100
 101        intel_uncore_write_fw(engine->uncore, hwsp, offset);
 102        intel_uncore_posting_read_fw(engine->uncore, hwsp);
 103}
 104
 105static void flush_cs_tlb(struct intel_engine_cs *engine)
 106{
 107        if (!IS_GEN_RANGE(engine->i915, 6, 7))
 108                return;
 109
 110        /* ring should be idle before issuing a sync flush*/
 111        GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
 112
 113        ENGINE_WRITE_FW(engine, RING_INSTPM,
 114                        _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
 115                                           INSTPM_SYNC_FLUSH));
 116        if (__intel_wait_for_register_fw(engine->uncore,
 117                                         RING_INSTPM(engine->mmio_base),
 118                                         INSTPM_SYNC_FLUSH, 0,
 119                                         2000, 0, NULL))
 120                ENGINE_TRACE(engine,
 121                             "wait for SyncFlush to complete for TLB invalidation timed out\n");
 122}
 123
 124static void ring_setup_status_page(struct intel_engine_cs *engine)
 125{
 126        set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
 127        set_hwstam(engine, ~0u);
 128
 129        flush_cs_tlb(engine);
 130}
 131
 132static struct i915_address_space *vm_alias(struct i915_address_space *vm)
 133{
 134        if (i915_is_ggtt(vm))
 135                vm = &i915_vm_to_ggtt(vm)->alias->vm;
 136
 137        return vm;
 138}
 139
 140static u32 pp_dir(struct i915_address_space *vm)
 141{
 142        return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
 143}
 144
 145static void set_pp_dir(struct intel_engine_cs *engine)
 146{
 147        struct i915_address_space *vm = vm_alias(engine->gt->vm);
 148
 149        if (!vm)
 150                return;
 151
 152        ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
 153        ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm));
 154
 155        if (INTEL_GEN(engine->i915) >= 7) {
 156                ENGINE_WRITE_FW(engine,
 157                                RING_MODE_GEN7,
 158                                _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 159        }
 160}
 161
 162static bool stop_ring(struct intel_engine_cs *engine)
 163{
 164        /* Empty the ring by skipping to the end */
 165        ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
 166        ENGINE_POSTING_READ(engine, RING_HEAD);
 167
 168        /* The ring must be empty before it is disabled */
 169        ENGINE_WRITE_FW(engine, RING_CTL, 0);
 170        ENGINE_POSTING_READ(engine, RING_CTL);
 171
 172        /* Then reset the disabled ring */
 173        ENGINE_WRITE_FW(engine, RING_HEAD, 0);
 174        ENGINE_WRITE_FW(engine, RING_TAIL, 0);
 175
 176        return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0;
 177}
 178
 179static int xcs_resume(struct intel_engine_cs *engine)
 180{
 181        struct intel_ring *ring = engine->legacy.ring;
 182
 183        ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
 184                     ring->head, ring->tail);
 185
 186        /* Double check the ring is empty & disabled before we resume */
 187        synchronize_hardirq(engine->i915->drm.irq);
 188        if (!stop_ring(engine))
 189                goto err;
 190
 191        if (HWS_NEEDS_PHYSICAL(engine->i915))
 192                ring_setup_phys_status_page(engine);
 193        else
 194                ring_setup_status_page(engine);
 195
 196        intel_breadcrumbs_reset(engine->breadcrumbs);
 197
 198        /* Enforce ordering by reading HEAD register back */
 199        ENGINE_POSTING_READ(engine, RING_HEAD);
 200
 201        /*
 202         * Initialize the ring. This must happen _after_ we've cleared the ring
 203         * registers with the above sequence (the readback of the HEAD registers
 204         * also enforces ordering), otherwise the hw might lose the new ring
 205         * register values.
 206         */
 207        ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
 208
 209        /* Check that the ring offsets point within the ring! */
 210        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
 211        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
 212        intel_ring_update_space(ring);
 213
 214        set_pp_dir(engine);
 215
 216        /* First wake the ring up to an empty/idle ring */
 217        ENGINE_WRITE_FW(engine, RING_HEAD, ring->head);
 218        ENGINE_WRITE_FW(engine, RING_TAIL, ring->head);
 219        ENGINE_POSTING_READ(engine, RING_TAIL);
 220
 221        ENGINE_WRITE_FW(engine, RING_CTL,
 222                        RING_CTL_SIZE(ring->size) | RING_VALID);
 223
 224        /* If the head is still not zero, the ring is dead */
 225        if (__intel_wait_for_register_fw(engine->uncore,
 226                                         RING_CTL(engine->mmio_base),
 227                                         RING_VALID, RING_VALID,
 228                                         5000, 0, NULL))
 229                goto err;
 230
 231        if (INTEL_GEN(engine->i915) > 2)
 232                ENGINE_WRITE_FW(engine,
 233                                RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 234
 235        /* Now awake, let it get started */
 236        if (ring->tail != ring->head) {
 237                ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
 238                ENGINE_POSTING_READ(engine, RING_TAIL);
 239        }
 240
 241        /* Papering over lost _interrupts_ immediately following the restart */
 242        intel_engine_signal_breadcrumbs(engine);
 243        return 0;
 244
 245err:
 246        drm_err(&engine->i915->drm,
 247                "%s initialization failed; "
 248                "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 249                engine->name,
 250                ENGINE_READ(engine, RING_CTL),
 251                ENGINE_READ(engine, RING_CTL) & RING_VALID,
 252                ENGINE_READ(engine, RING_HEAD), ring->head,
 253                ENGINE_READ(engine, RING_TAIL), ring->tail,
 254                ENGINE_READ(engine, RING_START),
 255                i915_ggtt_offset(ring->vma));
 256        return -EIO;
 257}
 258
 259static void sanitize_hwsp(struct intel_engine_cs *engine)
 260{
 261        struct intel_timeline *tl;
 262
 263        list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
 264                intel_timeline_reset_seqno(tl);
 265}
 266
 267static void xcs_sanitize(struct intel_engine_cs *engine)
 268{
 269        /*
 270         * Poison residual state on resume, in case the suspend didn't!
 271         *
 272         * We have to assume that across suspend/resume (or other loss
 273         * of control) that the contents of our pinned buffers has been
 274         * lost, replaced by garbage. Since this doesn't always happen,
 275         * let's poison such state so that we more quickly spot when
 276         * we falsely assume it has been preserved.
 277         */
 278        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 279                memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
 280
 281        /*
 282         * The kernel_context HWSP is stored in the status_page. As above,
 283         * that may be lost on resume/initialisation, and so we need to
 284         * reset the value in the HWSP.
 285         */
 286        sanitize_hwsp(engine);
 287
 288        /* And scrub the dirty cachelines for the HWSP */
 289        clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
 290}
 291
 292static void reset_prepare(struct intel_engine_cs *engine)
 293{
 294        /*
 295         * We stop engines, otherwise we might get failed reset and a
 296         * dead gpu (on elk). Also as modern gpu as kbl can suffer
 297         * from system hang if batchbuffer is progressing when
 298         * the reset is issued, regardless of READY_TO_RESET ack.
 299         * Thus assume it is best to stop engines on all gens
 300         * where we have a gpu reset.
 301         *
 302         * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
 303         *
 304         * WaMediaResetMainRingCleanup:ctg,elk (presumably)
 305         * WaClearRingBufHeadRegAtInit:ctg,elk
 306         *
 307         * FIXME: Wa for more modern gens needs to be validated
 308         */
 309        ENGINE_TRACE(engine, "\n");
 310        intel_engine_stop_cs(engine);
 311
 312        if (!stop_ring(engine)) {
 313                /* G45 ring initialization often fails to reset head to zero */
 314                ENGINE_TRACE(engine,
 315                             "HEAD not reset to zero, "
 316                             "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
 317                             ENGINE_READ_FW(engine, RING_CTL),
 318                             ENGINE_READ_FW(engine, RING_HEAD),
 319                             ENGINE_READ_FW(engine, RING_TAIL),
 320                             ENGINE_READ_FW(engine, RING_START));
 321                if (!stop_ring(engine)) {
 322                        drm_err(&engine->i915->drm,
 323                                "failed to set %s head to zero "
 324                                "ctl %08x head %08x tail %08x start %08x\n",
 325                                engine->name,
 326                                ENGINE_READ_FW(engine, RING_CTL),
 327                                ENGINE_READ_FW(engine, RING_HEAD),
 328                                ENGINE_READ_FW(engine, RING_TAIL),
 329                                ENGINE_READ_FW(engine, RING_START));
 330                }
 331        }
 332}
 333
 334static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 335{
 336        struct i915_request *pos, *rq;
 337        unsigned long flags;
 338        u32 head;
 339
 340        rq = NULL;
 341        spin_lock_irqsave(&engine->active.lock, flags);
 342        rcu_read_lock();
 343        list_for_each_entry(pos, &engine->active.requests, sched.link) {
 344                if (!__i915_request_is_complete(pos)) {
 345                        rq = pos;
 346                        break;
 347                }
 348        }
 349        rcu_read_unlock();
 350
 351        /*
 352         * The guilty request will get skipped on a hung engine.
 353         *
 354         * Users of client default contexts do not rely on logical
 355         * state preserved between batches so it is safe to execute
 356         * queued requests following the hang. Non default contexts
 357         * rely on preserved state, so skipping a batch loses the
 358         * evolution of the state and it needs to be considered corrupted.
 359         * Executing more queued batches on top of corrupted state is
 360         * risky. But we take the risk by trying to advance through
 361         * the queued requests in order to make the client behaviour
 362         * more predictable around resets, by not throwing away random
 363         * amount of batches it has prepared for execution. Sophisticated
 364         * clients can use gem_reset_stats_ioctl and dma fence status
 365         * (exported via sync_file info ioctl on explicit fences) to observe
 366         * when it loses the context state and should rebuild accordingly.
 367         *
 368         * The context ban, and ultimately the client ban, mechanism are safety
 369         * valves if client submission ends up resulting in nothing more than
 370         * subsequent hangs.
 371         */
 372
 373        if (rq) {
 374                /*
 375                 * Try to restore the logical GPU state to match the
 376                 * continuation of the request queue. If we skip the
 377                 * context/PD restore, then the next request may try to execute
 378                 * assuming that its context is valid and loaded on the GPU and
 379                 * so may try to access invalid memory, prompting repeated GPU
 380                 * hangs.
 381                 *
 382                 * If the request was guilty, we still restore the logical
 383                 * state in case the next request requires it (e.g. the
 384                 * aliasing ppgtt), but skip over the hung batch.
 385                 *
 386                 * If the request was innocent, we try to replay the request
 387                 * with the restored context.
 388                 */
 389                __i915_request_reset(rq, stalled);
 390
 391                GEM_BUG_ON(rq->ring != engine->legacy.ring);
 392                head = rq->head;
 393        } else {
 394                head = engine->legacy.ring->tail;
 395        }
 396        engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
 397
 398        spin_unlock_irqrestore(&engine->active.lock, flags);
 399}
 400
 401static void reset_finish(struct intel_engine_cs *engine)
 402{
 403}
 404
 405static void reset_cancel(struct intel_engine_cs *engine)
 406{
 407        struct i915_request *request;
 408        unsigned long flags;
 409
 410        spin_lock_irqsave(&engine->active.lock, flags);
 411
 412        /* Mark all submitted requests as skipped. */
 413        list_for_each_entry(request, &engine->active.requests, sched.link)
 414                i915_request_put(i915_request_mark_eio(request));
 415        intel_engine_signal_breadcrumbs(engine);
 416
 417        /* Remaining _unready_ requests will be nop'ed when submitted */
 418
 419        spin_unlock_irqrestore(&engine->active.lock, flags);
 420}
 421
 422static void i9xx_submit_request(struct i915_request *request)
 423{
 424        i915_request_submit(request);
 425        wmb(); /* paranoid flush writes out of the WCB before mmio */
 426
 427        ENGINE_WRITE(request->engine, RING_TAIL,
 428                     intel_ring_set_tail(request->ring, request->tail));
 429}
 430
 431static void __ring_context_fini(struct intel_context *ce)
 432{
 433        i915_vma_put(ce->state);
 434}
 435
 436static void ring_context_destroy(struct kref *ref)
 437{
 438        struct intel_context *ce = container_of(ref, typeof(*ce), ref);
 439
 440        GEM_BUG_ON(intel_context_is_pinned(ce));
 441
 442        if (ce->state)
 443                __ring_context_fini(ce);
 444
 445        intel_context_fini(ce);
 446        intel_context_free(ce);
 447}
 448
 449static int ring_context_init_default_state(struct intel_context *ce,
 450                                           struct i915_gem_ww_ctx *ww)
 451{
 452        struct drm_i915_gem_object *obj = ce->state->obj;
 453        void *vaddr;
 454
 455        vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 456        if (IS_ERR(vaddr))
 457                return PTR_ERR(vaddr);
 458
 459        shmem_read(ce->engine->default_state, 0,
 460                   vaddr, ce->engine->context_size);
 461
 462        i915_gem_object_flush_map(obj);
 463        __i915_gem_object_release_map(obj);
 464
 465        __set_bit(CONTEXT_VALID_BIT, &ce->flags);
 466        return 0;
 467}
 468
 469static int ring_context_pre_pin(struct intel_context *ce,
 470                                struct i915_gem_ww_ctx *ww,
 471                                void **unused)
 472{
 473        struct i915_address_space *vm;
 474        int err = 0;
 475
 476        if (ce->engine->default_state &&
 477            !test_bit(CONTEXT_VALID_BIT, &ce->flags)) {
 478                err = ring_context_init_default_state(ce, ww);
 479                if (err)
 480                        return err;
 481        }
 482
 483        vm = vm_alias(ce->vm);
 484        if (vm)
 485                err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
 486
 487        return err;
 488}
 489
 490static void __context_unpin_ppgtt(struct intel_context *ce)
 491{
 492        struct i915_address_space *vm;
 493
 494        vm = vm_alias(ce->vm);
 495        if (vm)
 496                gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 497}
 498
 499static void ring_context_unpin(struct intel_context *ce)
 500{
 501}
 502
 503static void ring_context_post_unpin(struct intel_context *ce)
 504{
 505        __context_unpin_ppgtt(ce);
 506}
 507
 508static struct i915_vma *
 509alloc_context_vma(struct intel_engine_cs *engine)
 510{
 511        struct drm_i915_private *i915 = engine->i915;
 512        struct drm_i915_gem_object *obj;
 513        struct i915_vma *vma;
 514        int err;
 515
 516        obj = i915_gem_object_create_shmem(i915, engine->context_size);
 517        if (IS_ERR(obj))
 518                return ERR_CAST(obj);
 519
 520        /*
 521         * Try to make the context utilize L3 as well as LLC.
 522         *
 523         * On VLV we don't have L3 controls in the PTEs so we
 524         * shouldn't touch the cache level, especially as that
 525         * would make the object snooped which might have a
 526         * negative performance impact.
 527         *
 528         * Snooping is required on non-llc platforms in execlist
 529         * mode, but since all GGTT accesses use PAT entry 0 we
 530         * get snooping anyway regardless of cache_level.
 531         *
 532         * This is only applicable for Ivy Bridge devices since
 533         * later platforms don't have L3 control bits in the PTE.
 534         */
 535        if (IS_IVYBRIDGE(i915))
 536                i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
 537
 538        vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
 539        if (IS_ERR(vma)) {
 540                err = PTR_ERR(vma);
 541                goto err_obj;
 542        }
 543
 544        return vma;
 545
 546err_obj:
 547        i915_gem_object_put(obj);
 548        return ERR_PTR(err);
 549}
 550
 551static int ring_context_alloc(struct intel_context *ce)
 552{
 553        struct intel_engine_cs *engine = ce->engine;
 554
 555        /* One ringbuffer to rule them all */
 556        GEM_BUG_ON(!engine->legacy.ring);
 557        ce->ring = engine->legacy.ring;
 558        ce->timeline = intel_timeline_get(engine->legacy.timeline);
 559
 560        GEM_BUG_ON(ce->state);
 561        if (engine->context_size) {
 562                struct i915_vma *vma;
 563
 564                vma = alloc_context_vma(engine);
 565                if (IS_ERR(vma))
 566                        return PTR_ERR(vma);
 567
 568                ce->state = vma;
 569        }
 570
 571        return 0;
 572}
 573
 574static int ring_context_pin(struct intel_context *ce, void *unused)
 575{
 576        return 0;
 577}
 578
 579static void ring_context_reset(struct intel_context *ce)
 580{
 581        intel_ring_reset(ce->ring, ce->ring->emit);
 582        clear_bit(CONTEXT_VALID_BIT, &ce->flags);
 583}
 584
 585static const struct intel_context_ops ring_context_ops = {
 586        .alloc = ring_context_alloc,
 587
 588        .pre_pin = ring_context_pre_pin,
 589        .pin = ring_context_pin,
 590        .unpin = ring_context_unpin,
 591        .post_unpin = ring_context_post_unpin,
 592
 593        .enter = intel_context_enter_engine,
 594        .exit = intel_context_exit_engine,
 595
 596        .reset = ring_context_reset,
 597        .destroy = ring_context_destroy,
 598};
 599
 600static int load_pd_dir(struct i915_request *rq,
 601                       struct i915_address_space *vm,
 602                       u32 valid)
 603{
 604        const struct intel_engine_cs * const engine = rq->engine;
 605        u32 *cs;
 606
 607        cs = intel_ring_begin(rq, 12);
 608        if (IS_ERR(cs))
 609                return PTR_ERR(cs);
 610
 611        *cs++ = MI_LOAD_REGISTER_IMM(1);
 612        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
 613        *cs++ = valid;
 614
 615        *cs++ = MI_LOAD_REGISTER_IMM(1);
 616        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 617        *cs++ = pp_dir(vm);
 618
 619        /* Stall until the page table load is complete? */
 620        *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 621        *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
 622        *cs++ = intel_gt_scratch_offset(engine->gt,
 623                                        INTEL_GT_SCRATCH_FIELD_DEFAULT);
 624
 625        *cs++ = MI_LOAD_REGISTER_IMM(1);
 626        *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
 627        *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
 628
 629        intel_ring_advance(rq, cs);
 630
 631        return rq->engine->emit_flush(rq, EMIT_FLUSH);
 632}
 633
 634static int mi_set_context(struct i915_request *rq,
 635                          struct intel_context *ce,
 636                          u32 flags)
 637{
 638        struct intel_engine_cs *engine = rq->engine;
 639        struct drm_i915_private *i915 = engine->i915;
 640        enum intel_engine_id id;
 641        const int num_engines =
 642                IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0;
 643        bool force_restore = false;
 644        int len;
 645        u32 *cs;
 646
 647        len = 4;
 648        if (IS_GEN(i915, 7))
 649                len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
 650        else if (IS_GEN(i915, 5))
 651                len += 2;
 652        if (flags & MI_FORCE_RESTORE) {
 653                GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
 654                flags &= ~MI_FORCE_RESTORE;
 655                force_restore = true;
 656                len += 2;
 657        }
 658
 659        cs = intel_ring_begin(rq, len);
 660        if (IS_ERR(cs))
 661                return PTR_ERR(cs);
 662
 663        /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
 664        if (IS_GEN(i915, 7)) {
 665                *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 666                if (num_engines) {
 667                        struct intel_engine_cs *signaller;
 668
 669                        *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
 670                        for_each_engine(signaller, engine->gt, id) {
 671                                if (signaller == engine)
 672                                        continue;
 673
 674                                *cs++ = i915_mmio_reg_offset(
 675                                           RING_PSMI_CTL(signaller->mmio_base));
 676                                *cs++ = _MASKED_BIT_ENABLE(
 677                                                GEN6_PSMI_SLEEP_MSG_DISABLE);
 678                        }
 679                }
 680        } else if (IS_GEN(i915, 5)) {
 681                /*
 682                 * This w/a is only listed for pre-production ilk a/b steppings,
 683                 * but is also mentioned for programming the powerctx. To be
 684                 * safe, just apply the workaround; we do not use SyncFlush so
 685                 * this should never take effect and so be a no-op!
 686                 */
 687                *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
 688        }
 689
 690        if (force_restore) {
 691                /*
 692                 * The HW doesn't handle being told to restore the current
 693                 * context very well. Quite often it likes goes to go off and
 694                 * sulk, especially when it is meant to be reloading PP_DIR.
 695                 * A very simple fix to force the reload is to simply switch
 696                 * away from the current context and back again.
 697                 *
 698                 * Note that the kernel_context will contain random state
 699                 * following the INHIBIT_RESTORE. We accept this since we
 700                 * never use the kernel_context state; it is merely a
 701                 * placeholder we use to flush other contexts.
 702                 */
 703                *cs++ = MI_SET_CONTEXT;
 704                *cs++ = i915_ggtt_offset(engine->kernel_context->state) |
 705                        MI_MM_SPACE_GTT |
 706                        MI_RESTORE_INHIBIT;
 707        }
 708
 709        *cs++ = MI_NOOP;
 710        *cs++ = MI_SET_CONTEXT;
 711        *cs++ = i915_ggtt_offset(ce->state) | flags;
 712        /*
 713         * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 714         * WaMiSetContext_Hang:snb,ivb,vlv
 715         */
 716        *cs++ = MI_NOOP;
 717
 718        if (IS_GEN(i915, 7)) {
 719                if (num_engines) {
 720                        struct intel_engine_cs *signaller;
 721                        i915_reg_t last_reg = {}; /* keep gcc quiet */
 722
 723                        *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
 724                        for_each_engine(signaller, engine->gt, id) {
 725                                if (signaller == engine)
 726                                        continue;
 727
 728                                last_reg = RING_PSMI_CTL(signaller->mmio_base);
 729                                *cs++ = i915_mmio_reg_offset(last_reg);
 730                                *cs++ = _MASKED_BIT_DISABLE(
 731                                                GEN6_PSMI_SLEEP_MSG_DISABLE);
 732                        }
 733
 734                        /* Insert a delay before the next switch! */
 735                        *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 736                        *cs++ = i915_mmio_reg_offset(last_reg);
 737                        *cs++ = intel_gt_scratch_offset(engine->gt,
 738                                                        INTEL_GT_SCRATCH_FIELD_DEFAULT);
 739                        *cs++ = MI_NOOP;
 740                }
 741                *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 742        } else if (IS_GEN(i915, 5)) {
 743                *cs++ = MI_SUSPEND_FLUSH;
 744        }
 745
 746        intel_ring_advance(rq, cs);
 747
 748        return 0;
 749}
 750
 751static int remap_l3_slice(struct i915_request *rq, int slice)
 752{
 753#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32))
 754        u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice];
 755        int i;
 756
 757        if (!remap_info)
 758                return 0;
 759
 760        cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2);
 761        if (IS_ERR(cs))
 762                return PTR_ERR(cs);
 763
 764        /*
 765         * Note: We do not worry about the concurrent register cacheline hang
 766         * here because no other code should access these registers other than
 767         * at initialization time.
 768         */
 769        *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW);
 770        for (i = 0; i < L3LOG_DW; i++) {
 771                *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
 772                *cs++ = remap_info[i];
 773        }
 774        *cs++ = MI_NOOP;
 775        intel_ring_advance(rq, cs);
 776
 777        return 0;
 778#undef L3LOG_DW
 779}
 780
 781static int remap_l3(struct i915_request *rq)
 782{
 783        struct i915_gem_context *ctx = i915_request_gem_context(rq);
 784        int i, err;
 785
 786        if (!ctx || !ctx->remap_slice)
 787                return 0;
 788
 789        for (i = 0; i < MAX_L3_SLICES; i++) {
 790                if (!(ctx->remap_slice & BIT(i)))
 791                        continue;
 792
 793                err = remap_l3_slice(rq, i);
 794                if (err)
 795                        return err;
 796        }
 797
 798        ctx->remap_slice = 0;
 799        return 0;
 800}
 801
 802static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
 803{
 804        int ret;
 805
 806        if (!vm)
 807                return 0;
 808
 809        ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
 810        if (ret)
 811                return ret;
 812
 813        /*
 814         * Not only do we need a full barrier (post-sync write) after
 815         * invalidating the TLBs, but we need to wait a little bit
 816         * longer. Whether this is merely delaying us, or the
 817         * subsequent flush is a key part of serialising with the
 818         * post-sync op, this extra pass appears vital before a
 819         * mm switch!
 820         */
 821        ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
 822        if (ret)
 823                return ret;
 824
 825        return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
 826}
 827
 828static int clear_residuals(struct i915_request *rq)
 829{
 830        struct intel_engine_cs *engine = rq->engine;
 831        int ret;
 832
 833        ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
 834        if (ret)
 835                return ret;
 836
 837        if (engine->kernel_context->state) {
 838                ret = mi_set_context(rq,
 839                                     engine->kernel_context,
 840                                     MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
 841                if (ret)
 842                        return ret;
 843        }
 844
 845        ret = engine->emit_bb_start(rq,
 846                                    engine->wa_ctx.vma->node.start, 0,
 847                                    0);
 848        if (ret)
 849                return ret;
 850
 851        ret = engine->emit_flush(rq, EMIT_FLUSH);
 852        if (ret)
 853                return ret;
 854
 855        /* Always invalidate before the next switch_mm() */
 856        return engine->emit_flush(rq, EMIT_INVALIDATE);
 857}
 858
 859static int switch_context(struct i915_request *rq)
 860{
 861        struct intel_engine_cs *engine = rq->engine;
 862        struct intel_context *ce = rq->context;
 863        void **residuals = NULL;
 864        int ret;
 865
 866        GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
 867
 868        if (engine->wa_ctx.vma && ce != engine->kernel_context) {
 869                if (engine->wa_ctx.vma->private != ce &&
 870                    i915_mitigate_clear_residuals()) {
 871                        ret = clear_residuals(rq);
 872                        if (ret)
 873                                return ret;
 874
 875                        residuals = &engine->wa_ctx.vma->private;
 876                }
 877        }
 878
 879        ret = switch_mm(rq, vm_alias(ce->vm));
 880        if (ret)
 881                return ret;
 882
 883        if (ce->state) {
 884                u32 flags;
 885
 886                GEM_BUG_ON(engine->id != RCS0);
 887
 888                /* For resource streamer on HSW+ and power context elsewhere */
 889                BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
 890                BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
 891
 892                flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
 893                if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
 894                        flags |= MI_RESTORE_EXT_STATE_EN;
 895                else
 896                        flags |= MI_RESTORE_INHIBIT;
 897
 898                ret = mi_set_context(rq, ce, flags);
 899                if (ret)
 900                        return ret;
 901        }
 902
 903        ret = remap_l3(rq);
 904        if (ret)
 905                return ret;
 906
 907        /*
 908         * Now past the point of no return, this request _will_ be emitted.
 909         *
 910         * Or at least this preamble will be emitted, the request may be
 911         * interrupted prior to submitting the user payload. If so, we
 912         * still submit the "empty" request in order to preserve global
 913         * state tracking such as this, our tracking of the current
 914         * dirty context.
 915         */
 916        if (residuals) {
 917                intel_context_put(*residuals);
 918                *residuals = intel_context_get(ce);
 919        }
 920
 921        return 0;
 922}
 923
 924static int ring_request_alloc(struct i915_request *request)
 925{
 926        int ret;
 927
 928        GEM_BUG_ON(!intel_context_is_pinned(request->context));
 929        GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
 930
 931        /*
 932         * Flush enough space to reduce the likelihood of waiting after
 933         * we start building the request - in which case we will just
 934         * have to repeat work.
 935         */
 936        request->reserved_space += LEGACY_REQUEST_SIZE;
 937
 938        /* Unconditionally invalidate GPU caches and TLBs. */
 939        ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 940        if (ret)
 941                return ret;
 942
 943        ret = switch_context(request);
 944        if (ret)
 945                return ret;
 946
 947        request->reserved_space -= LEGACY_REQUEST_SIZE;
 948        return 0;
 949}
 950
 951static void gen6_bsd_submit_request(struct i915_request *request)
 952{
 953        struct intel_uncore *uncore = request->engine->uncore;
 954
 955        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 956
 957       /* Every tail move must follow the sequence below */
 958
 959        /* Disable notification that the ring is IDLE. The GT
 960         * will then assume that it is busy and bring it out of rc6.
 961         */
 962        intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
 963                              _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 964
 965        /* Clear the context id. Here be magic! */
 966        intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
 967
 968        /* Wait for the ring not to be idle, i.e. for it to wake up. */
 969        if (__intel_wait_for_register_fw(uncore,
 970                                         GEN6_BSD_SLEEP_PSMI_CONTROL,
 971                                         GEN6_BSD_SLEEP_INDICATOR,
 972                                         0,
 973                                         1000, 0, NULL))
 974                drm_err(&uncore->i915->drm,
 975                        "timed out waiting for the BSD ring to wake up\n");
 976
 977        /* Now that the ring is fully powered up, update the tail */
 978        i9xx_submit_request(request);
 979
 980        /* Let the ring send IDLE messages to the GT again,
 981         * and so let it sleep to conserve power when idle.
 982         */
 983        intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
 984                              _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 985
 986        intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 987}
 988
 989static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 990{
 991        engine->submit_request = i9xx_submit_request;
 992
 993        engine->park = NULL;
 994        engine->unpark = NULL;
 995}
 996
 997static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 998{
 999        i9xx_set_default_submission(engine);
1000        engine->submit_request = gen6_bsd_submit_request;
1001}
1002
1003static void ring_release(struct intel_engine_cs *engine)
1004{
1005        struct drm_i915_private *dev_priv = engine->i915;
1006
1007        drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 &&
1008                    (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
1009
1010        intel_engine_cleanup_common(engine);
1011
1012        if (engine->wa_ctx.vma) {
1013                intel_context_put(engine->wa_ctx.vma->private);
1014                i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1015        }
1016
1017        intel_ring_unpin(engine->legacy.ring);
1018        intel_ring_put(engine->legacy.ring);
1019
1020        intel_timeline_unpin(engine->legacy.timeline);
1021        intel_timeline_put(engine->legacy.timeline);
1022}
1023
1024static void setup_irq(struct intel_engine_cs *engine)
1025{
1026        struct drm_i915_private *i915 = engine->i915;
1027
1028        if (INTEL_GEN(i915) >= 6) {
1029                engine->irq_enable = gen6_irq_enable;
1030                engine->irq_disable = gen6_irq_disable;
1031        } else if (INTEL_GEN(i915) >= 5) {
1032                engine->irq_enable = gen5_irq_enable;
1033                engine->irq_disable = gen5_irq_disable;
1034        } else if (INTEL_GEN(i915) >= 3) {
1035                engine->irq_enable = gen3_irq_enable;
1036                engine->irq_disable = gen3_irq_disable;
1037        } else {
1038                engine->irq_enable = gen2_irq_enable;
1039                engine->irq_disable = gen2_irq_disable;
1040        }
1041}
1042
1043static void setup_common(struct intel_engine_cs *engine)
1044{
1045        struct drm_i915_private *i915 = engine->i915;
1046
1047        /* gen8+ are only supported with execlists */
1048        GEM_BUG_ON(INTEL_GEN(i915) >= 8);
1049
1050        setup_irq(engine);
1051
1052        engine->resume = xcs_resume;
1053        engine->sanitize = xcs_sanitize;
1054
1055        engine->reset.prepare = reset_prepare;
1056        engine->reset.rewind = reset_rewind;
1057        engine->reset.cancel = reset_cancel;
1058        engine->reset.finish = reset_finish;
1059
1060        engine->cops = &ring_context_ops;
1061        engine->request_alloc = ring_request_alloc;
1062
1063        /*
1064         * Using a global execution timeline; the previous final breadcrumb is
1065         * equivalent to our next initial bread so we can elide
1066         * engine->emit_init_breadcrumb().
1067         */
1068        engine->emit_fini_breadcrumb = gen3_emit_breadcrumb;
1069        if (IS_GEN(i915, 5))
1070                engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
1071
1072        engine->set_default_submission = i9xx_set_default_submission;
1073
1074        if (INTEL_GEN(i915) >= 6)
1075                engine->emit_bb_start = gen6_emit_bb_start;
1076        else if (INTEL_GEN(i915) >= 4)
1077                engine->emit_bb_start = gen4_emit_bb_start;
1078        else if (IS_I830(i915) || IS_I845G(i915))
1079                engine->emit_bb_start = i830_emit_bb_start;
1080        else
1081                engine->emit_bb_start = gen3_emit_bb_start;
1082}
1083
1084static void setup_rcs(struct intel_engine_cs *engine)
1085{
1086        struct drm_i915_private *i915 = engine->i915;
1087
1088        if (HAS_L3_DPF(i915))
1089                engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1090
1091        engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
1092
1093        if (INTEL_GEN(i915) >= 7) {
1094                engine->emit_flush = gen7_emit_flush_rcs;
1095                engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs;
1096        } else if (IS_GEN(i915, 6)) {
1097                engine->emit_flush = gen6_emit_flush_rcs;
1098                engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs;
1099        } else if (IS_GEN(i915, 5)) {
1100                engine->emit_flush = gen4_emit_flush_rcs;
1101        } else {
1102                if (INTEL_GEN(i915) < 4)
1103                        engine->emit_flush = gen2_emit_flush;
1104                else
1105                        engine->emit_flush = gen4_emit_flush_rcs;
1106                engine->irq_enable_mask = I915_USER_INTERRUPT;
1107        }
1108
1109        if (IS_HASWELL(i915))
1110                engine->emit_bb_start = hsw_emit_bb_start;
1111}
1112
1113static void setup_vcs(struct intel_engine_cs *engine)
1114{
1115        struct drm_i915_private *i915 = engine->i915;
1116
1117        if (INTEL_GEN(i915) >= 6) {
1118                /* gen6 bsd needs a special wa for tail updates */
1119                if (IS_GEN(i915, 6))
1120                        engine->set_default_submission = gen6_bsd_set_default_submission;
1121                engine->emit_flush = gen6_emit_flush_vcs;
1122                engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1123
1124                if (IS_GEN(i915, 6))
1125                        engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1126                else
1127                        engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1128        } else {
1129                engine->emit_flush = gen4_emit_flush_vcs;
1130                if (IS_GEN(i915, 5))
1131                        engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
1132                else
1133                        engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1134        }
1135}
1136
1137static void setup_bcs(struct intel_engine_cs *engine)
1138{
1139        struct drm_i915_private *i915 = engine->i915;
1140
1141        engine->emit_flush = gen6_emit_flush_xcs;
1142        engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
1143
1144        if (IS_GEN(i915, 6))
1145                engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs;
1146        else
1147                engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1148}
1149
1150static void setup_vecs(struct intel_engine_cs *engine)
1151{
1152        struct drm_i915_private *i915 = engine->i915;
1153
1154        GEM_BUG_ON(INTEL_GEN(i915) < 7);
1155
1156        engine->emit_flush = gen6_emit_flush_xcs;
1157        engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
1158        engine->irq_enable = hsw_irq_enable_vecs;
1159        engine->irq_disable = hsw_irq_disable_vecs;
1160
1161        engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs;
1162}
1163
1164static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
1165                                    struct i915_vma * const vma)
1166{
1167        return gen7_setup_clear_gpr_bb(engine, vma);
1168}
1169
1170static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine,
1171                                   struct i915_gem_ww_ctx *ww,
1172                                   struct i915_vma *vma)
1173{
1174        int err;
1175
1176        err = i915_vma_pin_ww(vma, ww, 0, 0, PIN_USER | PIN_HIGH);
1177        if (err)
1178                return err;
1179
1180        err = i915_vma_sync(vma);
1181        if (err)
1182                goto err_unpin;
1183
1184        err = gen7_ctx_switch_bb_setup(engine, vma);
1185        if (err)
1186                goto err_unpin;
1187
1188        engine->wa_ctx.vma = vma;
1189        return 0;
1190
1191err_unpin:
1192        i915_vma_unpin(vma);
1193        return err;
1194}
1195
1196static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
1197{
1198        struct drm_i915_gem_object *obj;
1199        struct i915_vma *vma;
1200        int size, err;
1201
1202        if (!IS_GEN(engine->i915, 7) || engine->class != RENDER_CLASS)
1203                return 0;
1204
1205        err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
1206        if (err < 0)
1207                return ERR_PTR(err);
1208        if (!err)
1209                return NULL;
1210
1211        size = ALIGN(err, PAGE_SIZE);
1212
1213        obj = i915_gem_object_create_internal(engine->i915, size);
1214        if (IS_ERR(obj))
1215                return ERR_CAST(obj);
1216
1217        vma = i915_vma_instance(obj, engine->gt->vm, NULL);
1218        if (IS_ERR(vma)) {
1219                i915_gem_object_put(obj);
1220                return ERR_CAST(vma);
1221        }
1222
1223        vma->private = intel_context_create(engine); /* dummy residuals */
1224        if (IS_ERR(vma->private)) {
1225                err = PTR_ERR(vma->private);
1226                vma->private = NULL;
1227                i915_gem_object_put(obj);
1228                return ERR_PTR(err);
1229        }
1230
1231        return vma;
1232}
1233
1234int intel_ring_submission_setup(struct intel_engine_cs *engine)
1235{
1236        struct i915_gem_ww_ctx ww;
1237        struct intel_timeline *timeline;
1238        struct intel_ring *ring;
1239        struct i915_vma *gen7_wa_vma;
1240        int err;
1241
1242        setup_common(engine);
1243
1244        switch (engine->class) {
1245        case RENDER_CLASS:
1246                setup_rcs(engine);
1247                break;
1248        case VIDEO_DECODE_CLASS:
1249                setup_vcs(engine);
1250                break;
1251        case COPY_ENGINE_CLASS:
1252                setup_bcs(engine);
1253                break;
1254        case VIDEO_ENHANCEMENT_CLASS:
1255                setup_vecs(engine);
1256                break;
1257        default:
1258                MISSING_CASE(engine->class);
1259                return -ENODEV;
1260        }
1261
1262        timeline = intel_timeline_create_from_engine(engine,
1263                                                     I915_GEM_HWS_SEQNO_ADDR);
1264        if (IS_ERR(timeline)) {
1265                err = PTR_ERR(timeline);
1266                goto err;
1267        }
1268        GEM_BUG_ON(timeline->has_initial_breadcrumb);
1269
1270        ring = intel_engine_create_ring(engine, SZ_16K);
1271        if (IS_ERR(ring)) {
1272                err = PTR_ERR(ring);
1273                goto err_timeline;
1274        }
1275
1276        GEM_BUG_ON(engine->legacy.ring);
1277        engine->legacy.ring = ring;
1278        engine->legacy.timeline = timeline;
1279
1280        gen7_wa_vma = gen7_ctx_vma(engine);
1281        if (IS_ERR(gen7_wa_vma)) {
1282                err = PTR_ERR(gen7_wa_vma);
1283                goto err_ring;
1284        }
1285
1286        i915_gem_ww_ctx_init(&ww, false);
1287
1288retry:
1289        err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
1290        if (!err && gen7_wa_vma)
1291                err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
1292        if (!err && engine->legacy.ring->vma->obj)
1293                err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
1294        if (!err)
1295                err = intel_timeline_pin(timeline, &ww);
1296        if (!err) {
1297                err = intel_ring_pin(ring, &ww);
1298                if (err)
1299                        intel_timeline_unpin(timeline);
1300        }
1301        if (err)
1302                goto out;
1303
1304        GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
1305
1306        if (gen7_wa_vma) {
1307                err = gen7_ctx_switch_bb_init(engine, &ww, gen7_wa_vma);
1308                if (err) {
1309                        intel_ring_unpin(ring);
1310                        intel_timeline_unpin(timeline);
1311                }
1312        }
1313
1314out:
1315        if (err == -EDEADLK) {
1316                err = i915_gem_ww_ctx_backoff(&ww);
1317                if (!err)
1318                        goto retry;
1319        }
1320        i915_gem_ww_ctx_fini(&ww);
1321        if (err)
1322                goto err_gen7_put;
1323
1324        /* Finally, take ownership and responsibility for cleanup! */
1325        engine->release = ring_release;
1326
1327        return 0;
1328
1329err_gen7_put:
1330        if (gen7_wa_vma) {
1331                intel_context_put(gen7_wa_vma->private);
1332                i915_gem_object_put(gen7_wa_vma->obj);
1333        }
1334err_ring:
1335        intel_ring_put(ring);
1336err_timeline:
1337        intel_timeline_put(timeline);
1338err:
1339        intel_engine_cleanup_common(engine);
1340        return err;
1341}
1342
1343#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1344#include "selftest_ring_submission.c"
1345#endif
1346