LXR linux/drivers/gpu/drm/i915/gt/uc/intel_guc

   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014 Intel Corporation
   4 */
   5
   6#include <linux/circ_buf.h>
   7
   8#include "gem/i915_gem_context.h"
   9#include "gt/gen8_engine_cs.h"
  10#include "gt/intel_breadcrumbs.h"
  11#include "gt/intel_context.h"
  12#include "gt/intel_engine_pm.h"
  13#include "gt/intel_gt.h"
  14#include "gt/intel_gt_pm.h"
  15#include "gt/intel_lrc.h"
  16#include "gt/intel_mocs.h"
  17#include "gt/intel_ring.h"
  18
  19#include "intel_guc_submission.h"
  20
  21#include "i915_drv.h"
  22#include "i915_trace.h"
  23
  24/**
  25 * DOC: GuC-based command submission
  26 *
  27 * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
  28 * firmware is moving to an updated submission interface and we plan to
  29 * turn submission back on when that lands. The below documentation (and related
  30 * code) matches the old submission model and will be updated as part of the
  31 * upgrade to the new flow.
  32 *
  33 * GuC stage descriptor:
  34 * During initialization, the driver allocates a static pool of 1024 such
  35 * descriptors, and shares them with the GuC. Currently, we only use one
  36 * descriptor. This stage descriptor lets the GuC know about the workqueue and
  37 * process descriptor. Theoretically, it also lets the GuC know about our HW
  38 * contexts (context ID, etc...), but we actually employ a kind of submission
  39 * where the GuC uses the LRCA sent via the work item instead. This is called
  40 * a "proxy" submission.
  41 *
  42 * The Scratch registers:
  43 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
  44 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
  45 * triggers an interrupt on the GuC via another register write (0xC4C8).
  46 * Firmware writes a success/fail code back to the action register after
  47 * processes the request. The kernel driver polls waiting for this update and
  48 * then proceeds.
  49 *
  50 * Work Items:
  51 * There are several types of work items that the host may place into a
  52 * workqueue, each with its own requirements and limitations. Currently only
  53 * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
  54 * represents in-order queue. The kernel driver packs ring tail pointer and an
  55 * ELSP context descriptor dword into Work Item.
  56 * See guc_add_request()
  57 *
  58 */
  59
  60#define GUC_REQUEST_SIZE 64 /* bytes */
  61
  62static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  63{
  64        return rb_entry(rb, struct i915_priolist, node);
  65}
  66
  67static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
  68{
  69        struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
  70
  71        return &base[id];
  72}
  73
  74static int guc_stage_desc_pool_create(struct intel_guc *guc)
  75{
  76        u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
  77                              GUC_MAX_STAGE_DESCRIPTORS);
  78
  79        return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
  80                                              &guc->stage_desc_pool_vaddr);
  81}
  82
  83static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
  84{
  85        i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
  86}
  87
  88/*
  89 * Initialise/clear the stage descriptor shared with the GuC firmware.
  90 *
  91 * This descriptor tells the GuC where (in GGTT space) to find the important
  92 * data structures related to work submission (process descriptor, write queue,
  93 * etc).
  94 */
  95static void guc_stage_desc_init(struct intel_guc *guc)
  96{
  97        struct guc_stage_desc *desc;
  98
  99        /* we only use 1 stage desc, so hardcode it to 0 */
 100        desc = __get_stage_desc(guc, 0);
 101        memset(desc, 0, sizeof(*desc));
 102
 103        desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
 104                          GUC_STAGE_DESC_ATTR_KERNEL;
 105
 106        desc->stage_id = 0;
 107        desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
 108
 109        desc->wq_size = GUC_WQ_SIZE;
 110}
 111
 112static void guc_stage_desc_fini(struct intel_guc *guc)
 113{
 114        struct guc_stage_desc *desc;
 115
 116        desc = __get_stage_desc(guc, 0);
 117        memset(desc, 0, sizeof(*desc));
 118}
 119
 120static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 121{
 122        /* Leaving stub as this function will be used in future patches */
 123}
 124
 125/*
 126 * When we're doing submissions using regular execlists backend, writing to
 127 * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
 128 * pinned in mappable aperture portion of GGTT are visible to command streamer.
 129 * Writes done by GuC on our behalf are not guaranteeing such ordering,
 130 * therefore, to ensure the flush, we're issuing a POSTING READ.
 131 */
 132static void flush_ggtt_writes(struct i915_vma *vma)
 133{
 134        if (i915_vma_is_map_and_fenceable(vma))
 135                intel_uncore_posting_read_fw(vma->vm->gt->uncore,
 136                                             GUC_STATUS);
 137}
 138
 139static void guc_submit(struct intel_engine_cs *engine,
 140                       struct i915_request **out,
 141                       struct i915_request **end)
 142{
 143        struct intel_guc *guc = &engine->gt->uc.guc;
 144
 145        do {
 146                struct i915_request *rq = *out++;
 147
 148                flush_ggtt_writes(rq->ring->vma);
 149                guc_add_request(guc, rq);
 150        } while (out != end);
 151}
 152
 153static inline int rq_prio(const struct i915_request *rq)
 154{
 155        return rq->sched.attr.priority;
 156}
 157
 158static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 159{
 160        trace_i915_request_in(rq, idx);
 161
 162        /*
 163         * Currently we are not tracking the rq->context being inflight
 164         * (ce->inflight = rq->engine). It is only used by the execlists
 165         * backend at the moment, a similar counting strategy would be
 166         * required if we generalise the inflight tracking.
 167         */
 168
 169        __intel_gt_pm_get(rq->engine->gt);
 170        return i915_request_get(rq);
 171}
 172
 173static void schedule_out(struct i915_request *rq)
 174{
 175        trace_i915_request_out(rq);
 176
 177        intel_gt_pm_put_async(rq->engine->gt);
 178        i915_request_put(rq);
 179}
 180
 181static void __guc_dequeue(struct intel_engine_cs *engine)
 182{
 183        struct intel_engine_execlists * const execlists = &engine->execlists;
 184        struct i915_request **first = execlists->inflight;
 185        struct i915_request ** const last_port = first + execlists->port_mask;
 186        struct i915_request *last = first[0];
 187        struct i915_request **port;
 188        bool submit = false;
 189        struct rb_node *rb;
 190
 191        lockdep_assert_held(&engine->active.lock);
 192
 193        if (last) {
 194                if (*++first)
 195                        return;
 196
 197                last = NULL;
 198        }
 199
 200        /*
 201         * We write directly into the execlists->inflight queue and don't use
 202         * the execlists->pending queue, as we don't have a distinct switch
 203         * event.
 204         */
 205        port = first;
 206        while ((rb = rb_first_cached(&execlists->queue))) {
 207                struct i915_priolist *p = to_priolist(rb);
 208                struct i915_request *rq, *rn;
 209                int i;
 210
 211                priolist_for_each_request_consume(rq, rn, p, i) {
 212                        if (last && rq->context != last->context) {
 213                                if (port == last_port)
 214                                        goto done;
 215
 216                                *port = schedule_in(last,
 217                                                    port - execlists->inflight);
 218                                port++;
 219                        }
 220
 221                        list_del_init(&rq->sched.link);
 222                        __i915_request_submit(rq);
 223                        submit = true;
 224                        last = rq;
 225                }
 226
 227                rb_erase_cached(&p->node, &execlists->queue);
 228                i915_priolist_free(p);
 229        }
 230done:
 231        execlists->queue_priority_hint =
 232                rb ? to_priolist(rb)->priority : INT_MIN;
 233        if (submit) {
 234                *port = schedule_in(last, port - execlists->inflight);
 235                *++port = NULL;
 236                guc_submit(engine, first, port);
 237        }
 238        execlists->active = execlists->inflight;
 239}
 240
 241static void guc_submission_tasklet(unsigned long data)
 242{
 243        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 244        struct intel_engine_execlists * const execlists = &engine->execlists;
 245        struct i915_request **port, *rq;
 246        unsigned long flags;
 247
 248        spin_lock_irqsave(&engine->active.lock, flags);
 249
 250        for (port = execlists->inflight; (rq = *port); port++) {
 251                if (!i915_request_completed(rq))
 252                        break;
 253
 254                schedule_out(rq);
 255        }
 256        if (port != execlists->inflight) {
 257                int idx = port - execlists->inflight;
 258                int rem = ARRAY_SIZE(execlists->inflight) - idx;
 259                memmove(execlists->inflight, port, rem * sizeof(*port));
 260        }
 261
 262        __guc_dequeue(engine);
 263
 264        spin_unlock_irqrestore(&engine->active.lock, flags);
 265}
 266
 267static void guc_reset_prepare(struct intel_engine_cs *engine)
 268{
 269        struct intel_engine_execlists * const execlists = &engine->execlists;
 270
 271        ENGINE_TRACE(engine, "\n");
 272
 273        /*
 274         * Prevent request submission to the hardware until we have
 275         * completed the reset in i915_gem_reset_finish(). If a request
 276         * is completed by one engine, it may then queue a request
 277         * to a second via its execlists->tasklet *just* as we are
 278         * calling engine->init_hw() and also writing the ELSP.
 279         * Turning off the execlists->tasklet until the reset is over
 280         * prevents the race.
 281         */
 282        __tasklet_disable_sync_once(&execlists->tasklet);
 283}
 284
 285static void guc_reset_state(struct intel_context *ce,
 286                            struct intel_engine_cs *engine,
 287                            u32 head,
 288                            bool scrub)
 289{
 290        GEM_BUG_ON(!intel_context_is_pinned(ce));
 291
 292        /*
 293         * We want a simple context + ring to execute the breadcrumb update.
 294         * We cannot rely on the context being intact across the GPU hang,
 295         * so clear it and rebuild just what we need for the breadcrumb.
 296         * All pending requests for this context will be zapped, and any
 297         * future request will be after userspace has had the opportunity
 298         * to recreate its own state.
 299         */
 300        if (scrub)
 301                lrc_init_regs(ce, engine, true);
 302
 303        /* Rerun the request; its payload has been neutered (if guilty). */
 304        lrc_update_regs(ce, engine, head);
 305}
 306
 307static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
 308{
 309        struct intel_engine_execlists * const execlists = &engine->execlists;
 310        struct i915_request *rq;
 311        unsigned long flags;
 312
 313        spin_lock_irqsave(&engine->active.lock, flags);
 314
 315        /* Push back any incomplete requests for replay after the reset. */
 316        rq = execlists_unwind_incomplete_requests(execlists);
 317        if (!rq)
 318                goto out_unlock;
 319
 320        if (!i915_request_started(rq))
 321                stalled = false;
 322
 323        __i915_request_reset(rq, stalled);
 324        guc_reset_state(rq->context, engine, rq->head, stalled);
 325
 326out_unlock:
 327        spin_unlock_irqrestore(&engine->active.lock, flags);
 328}
 329
 330static void guc_reset_cancel(struct intel_engine_cs *engine)
 331{
 332        struct intel_engine_execlists * const execlists = &engine->execlists;
 333        struct i915_request *rq, *rn;
 334        struct rb_node *rb;
 335        unsigned long flags;
 336
 337        ENGINE_TRACE(engine, "\n");
 338
 339        /*
 340         * Before we call engine->cancel_requests(), we should have exclusive
 341         * access to the submission state. This is arranged for us by the
 342         * caller disabling the interrupt generation, the tasklet and other
 343         * threads that may then access the same state, giving us a free hand
 344         * to reset state. However, we still need to let lockdep be aware that
 345         * we know this state may be accessed in hardirq context, so we
 346         * disable the irq around this manipulation and we want to keep
 347         * the spinlock focused on its duties and not accidentally conflate
 348         * coverage to the submission's irq state. (Similarly, although we
 349         * shouldn't need to disable irq around the manipulation of the
 350         * submission's irq state, we also wish to remind ourselves that
 351         * it is irq state.)
 352         */
 353        spin_lock_irqsave(&engine->active.lock, flags);
 354
 355        /* Mark all executing requests as skipped. */
 356        list_for_each_entry(rq, &engine->active.requests, sched.link) {
 357                i915_request_set_error_once(rq, -EIO);
 358                i915_request_mark_complete(rq);
 359        }
 360
 361        /* Flush the queued requests to the timeline list (for retiring). */
 362        while ((rb = rb_first_cached(&execlists->queue))) {
 363                struct i915_priolist *p = to_priolist(rb);
 364                int i;
 365
 366                priolist_for_each_request_consume(rq, rn, p, i) {
 367                        list_del_init(&rq->sched.link);
 368                        __i915_request_submit(rq);
 369                        dma_fence_set_error(&rq->fence, -EIO);
 370                        i915_request_mark_complete(rq);
 371                }
 372
 373                rb_erase_cached(&p->node, &execlists->queue);
 374                i915_priolist_free(p);
 375        }
 376
 377        /* Remaining _unready_ requests will be nop'ed when submitted */
 378
 379        execlists->queue_priority_hint = INT_MIN;
 380        execlists->queue = RB_ROOT_CACHED;
 381
 382        spin_unlock_irqrestore(&engine->active.lock, flags);
 383}
 384
 385static void guc_reset_finish(struct intel_engine_cs *engine)
 386{
 387        struct intel_engine_execlists * const execlists = &engine->execlists;
 388
 389        if (__tasklet_enable(&execlists->tasklet))
 390                /* And kick in case we missed a new request submission. */
 391                tasklet_hi_schedule(&execlists->tasklet);
 392
 393        ENGINE_TRACE(engine, "depth->%d\n",
 394                     atomic_read(&execlists->tasklet.count));
 395}
 396
 397/*
 398 * Set up the memory resources to be shared with the GuC (via the GGTT)
 399 * at firmware loading time.
 400 */
 401int intel_guc_submission_init(struct intel_guc *guc)
 402{
 403        int ret;
 404
 405        if (guc->stage_desc_pool)
 406                return 0;
 407
 408        ret = guc_stage_desc_pool_create(guc);
 409        if (ret)
 410                return ret;
 411        /*
 412         * Keep static analysers happy, let them know that we allocated the
 413         * vma after testing that it didn't exist earlier.
 414         */
 415        GEM_BUG_ON(!guc->stage_desc_pool);
 416
 417        return 0;
 418}
 419
 420void intel_guc_submission_fini(struct intel_guc *guc)
 421{
 422        if (guc->stage_desc_pool) {
 423                guc_stage_desc_pool_destroy(guc);
 424        }
 425}
 426
 427static void guc_interrupts_capture(struct intel_gt *gt)
 428{
 429        struct intel_uncore *uncore = gt->uncore;
 430        u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
 431        u32 dmask = irqs << 16 | irqs;
 432
 433        GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
 434
 435        /* Don't handle the ctx switch interrupt in GuC submission mode */
 436        intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
 437        intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
 438}
 439
 440static void guc_interrupts_release(struct intel_gt *gt)
 441{
 442        struct intel_uncore *uncore = gt->uncore;
 443        u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
 444        u32 dmask = irqs << 16 | irqs;
 445
 446        GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
 447
 448        /* Handle ctx switch interrupts again */
 449        intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
 450        intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
 451}
 452
 453static int guc_context_alloc(struct intel_context *ce)
 454{
 455        return lrc_alloc(ce, ce->engine);
 456}
 457
 458static int guc_context_pre_pin(struct intel_context *ce,
 459                               struct i915_gem_ww_ctx *ww,
 460                               void **vaddr)
 461{
 462        return lrc_pre_pin(ce, ce->engine, ww, vaddr);
 463}
 464
 465static int guc_context_pin(struct intel_context *ce, void *vaddr)
 466{
 467        return lrc_pin(ce, ce->engine, vaddr);
 468}
 469
 470static const struct intel_context_ops guc_context_ops = {
 471        .alloc = guc_context_alloc,
 472
 473        .pre_pin = guc_context_pre_pin,
 474        .pin = guc_context_pin,
 475        .unpin = lrc_unpin,
 476        .post_unpin = lrc_post_unpin,
 477
 478        .enter = intel_context_enter_engine,
 479        .exit = intel_context_exit_engine,
 480
 481        .reset = lrc_reset,
 482        .destroy = lrc_destroy,
 483};
 484
 485static int guc_request_alloc(struct i915_request *request)
 486{
 487        int ret;
 488
 489        GEM_BUG_ON(!intel_context_is_pinned(request->context));
 490
 491        /*
 492         * Flush enough space to reduce the likelihood of waiting after
 493         * we start building the request - in which case we will just
 494         * have to repeat work.
 495         */
 496        request->reserved_space += GUC_REQUEST_SIZE;
 497
 498        /*
 499         * Note that after this point, we have committed to using
 500         * this request as it is being used to both track the
 501         * state of engine initialisation and liveness of the
 502         * golden renderstate above. Think twice before you try
 503         * to cancel/unwind this request now.
 504         */
 505
 506        /* Unconditionally invalidate GPU caches and TLBs. */
 507        ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 508        if (ret)
 509                return ret;
 510
 511        request->reserved_space -= GUC_REQUEST_SIZE;
 512        return 0;
 513}
 514
 515static inline void queue_request(struct intel_engine_cs *engine,
 516                                 struct i915_request *rq,
 517                                 int prio)
 518{
 519        GEM_BUG_ON(!list_empty(&rq->sched.link));
 520        list_add_tail(&rq->sched.link,
 521                      i915_sched_lookup_priolist(engine, prio));
 522        set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 523}
 524
 525static void guc_submit_request(struct i915_request *rq)
 526{
 527        struct intel_engine_cs *engine = rq->engine;
 528        unsigned long flags;
 529
 530        /* Will be called from irq-context when using foreign fences. */
 531        spin_lock_irqsave(&engine->active.lock, flags);
 532
 533        queue_request(engine, rq, rq_prio(rq));
 534
 535        GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 536        GEM_BUG_ON(list_empty(&rq->sched.link));
 537
 538        tasklet_hi_schedule(&engine->execlists.tasklet);
 539
 540        spin_unlock_irqrestore(&engine->active.lock, flags);
 541}
 542
 543static void sanitize_hwsp(struct intel_engine_cs *engine)
 544{
 545        struct intel_timeline *tl;
 546
 547        list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
 548                intel_timeline_reset_seqno(tl);
 549}
 550
 551static void guc_sanitize(struct intel_engine_cs *engine)
 552{
 553        /*
 554         * Poison residual state on resume, in case the suspend didn't!
 555         *
 556         * We have to assume that across suspend/resume (or other loss
 557         * of control) that the contents of our pinned buffers has been
 558         * lost, replaced by garbage. Since this doesn't always happen,
 559         * let's poison such state so that we more quickly spot when
 560         * we falsely assume it has been preserved.
 561         */
 562        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 563                memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
 564
 565        /*
 566         * The kernel_context HWSP is stored in the status_page. As above,
 567         * that may be lost on resume/initialisation, and so we need to
 568         * reset the value in the HWSP.
 569         */
 570        sanitize_hwsp(engine);
 571
 572        /* And scrub the dirty cachelines for the HWSP */
 573        clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
 574}
 575
 576static void setup_hwsp(struct intel_engine_cs *engine)
 577{
 578        intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
 579
 580        ENGINE_WRITE_FW(engine,
 581                        RING_HWS_PGA,
 582                        i915_ggtt_offset(engine->status_page.vma));
 583}
 584
 585static void start_engine(struct intel_engine_cs *engine)
 586{
 587        ENGINE_WRITE_FW(engine,
 588                        RING_MODE_GEN7,
 589                        _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
 590
 591        ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 592        ENGINE_POSTING_READ(engine, RING_MI_MODE);
 593}
 594
 595static int guc_resume(struct intel_engine_cs *engine)
 596{
 597        assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
 598
 599        intel_mocs_init_engine(engine);
 600
 601        intel_breadcrumbs_reset(engine->breadcrumbs);
 602
 603        setup_hwsp(engine);
 604        start_engine(engine);
 605
 606        return 0;
 607}
 608
 609static void guc_set_default_submission(struct intel_engine_cs *engine)
 610{
 611        engine->submit_request = guc_submit_request;
 612        engine->schedule = i915_schedule;
 613        engine->execlists.tasklet.func = guc_submission_tasklet;
 614
 615        engine->reset.prepare = guc_reset_prepare;
 616        engine->reset.rewind = guc_reset_rewind;
 617        engine->reset.cancel = guc_reset_cancel;
 618        engine->reset.finish = guc_reset_finish;
 619
 620        engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 621        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 622
 623        /*
 624         * TODO: GuC supports timeslicing and semaphores as well, but they're
 625         * handled by the firmware so some minor tweaks are required before
 626         * enabling.
 627         *
 628         * engine->flags |= I915_ENGINE_HAS_TIMESLICES;
 629         * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 630         */
 631
 632        engine->emit_bb_start = gen8_emit_bb_start;
 633
 634        /*
 635         * For the breadcrumb irq to work we need the interrupts to stay
 636         * enabled. However, on all platforms on which we'll have support for
 637         * GuC submission we don't allow disabling the interrupts at runtime, so
 638         * we're always safe with the current flow.
 639         */
 640        GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
 641}
 642
 643static void guc_release(struct intel_engine_cs *engine)
 644{
 645        engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
 646
 647        tasklet_kill(&engine->execlists.tasklet);
 648
 649        intel_engine_cleanup_common(engine);
 650        lrc_fini_wa_ctx(engine);
 651}
 652
 653static void guc_default_vfuncs(struct intel_engine_cs *engine)
 654{
 655        /* Default vfuncs which can be overridden by each engine. */
 656
 657        engine->resume = guc_resume;
 658
 659        engine->cops = &guc_context_ops;
 660        engine->request_alloc = guc_request_alloc;
 661
 662        engine->emit_flush = gen8_emit_flush_xcs;
 663        engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 664        engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
 665        if (INTEL_GEN(engine->i915) >= 12) {
 666                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
 667                engine->emit_flush = gen12_emit_flush_xcs;
 668        }
 669        engine->set_default_submission = guc_set_default_submission;
 670}
 671
 672static void rcs_submission_override(struct intel_engine_cs *engine)
 673{
 674        switch (INTEL_GEN(engine->i915)) {
 675        case 12:
 676                engine->emit_flush = gen12_emit_flush_rcs;
 677                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
 678                break;
 679        case 11:
 680                engine->emit_flush = gen11_emit_flush_rcs;
 681                engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
 682                break;
 683        default:
 684                engine->emit_flush = gen8_emit_flush_rcs;
 685                engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
 686                break;
 687        }
 688}
 689
 690static inline void guc_default_irqs(struct intel_engine_cs *engine)
 691{
 692        engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
 693}
 694
 695int intel_guc_submission_setup(struct intel_engine_cs *engine)
 696{
 697        struct drm_i915_private *i915 = engine->i915;
 698
 699        /*
 700         * The setup relies on several assumptions (e.g. irqs always enabled)
 701         * that are only valid on gen11+
 702         */
 703        GEM_BUG_ON(INTEL_GEN(i915) < 11);
 704
 705        tasklet_init(&engine->execlists.tasklet,
 706                     guc_submission_tasklet, (unsigned long)engine);
 707
 708        guc_default_vfuncs(engine);
 709        guc_default_irqs(engine);
 710
 711        if (engine->class == RENDER_CLASS)
 712                rcs_submission_override(engine);
 713
 714        lrc_init_wa_ctx(engine);
 715
 716        /* Finally, take ownership and responsibility for cleanup! */
 717        engine->sanitize = guc_sanitize;
 718        engine->release = guc_release;
 719
 720        return 0;
 721}
 722
 723void intel_guc_submission_enable(struct intel_guc *guc)
 724{
 725        guc_stage_desc_init(guc);
 726
 727        /* Take over from manual control of ELSP (execlists) */
 728        guc_interrupts_capture(guc_to_gt(guc));
 729}
 730
 731void intel_guc_submission_disable(struct intel_guc *guc)
 732{
 733        struct intel_gt *gt = guc_to_gt(guc);
 734
 735        GEM_BUG_ON(gt->awake); /* GT should be parked first */
 736
 737        /* Note: By the time we're here, GuC may have already been reset */
 738
 739        guc_interrupts_release(gt);
 740
 741        guc_stage_desc_fini(guc);
 742}
 743
 744static bool __guc_submission_selected(struct intel_guc *guc)
 745{
 746        struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 747
 748        if (!intel_guc_submission_is_supported(guc))
 749                return false;
 750
 751        return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
 752}
 753
 754void intel_guc_submission_init_early(struct intel_guc *guc)
 755{
 756        guc->submission_selected = __guc_submission_selected(guc);
 757}
 758
 759bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
 760{
 761        return engine->set_default_submission == guc_set_default_submission;
 762}
 763