linux/drivers/gpu/drm/i915/gt/intel_lrc.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2014 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Ben Widawsky <ben@bwidawsk.net>
  25 *    Michel Thierry <michel.thierry@intel.com>
  26 *    Thomas Daniel <thomas.daniel@intel.com>
  27 *    Oscar Mateo <oscar.mateo@intel.com>
  28 *
  29 */
  30
  31/**
  32 * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33 *
  34 * Motivation:
  35 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36 * These expanded contexts enable a number of new abilities, especially
  37 * "Execlists" (also implemented in this file).
  38 *
  39 * One of the main differences with the legacy HW contexts is that logical
  40 * ring contexts incorporate many more things to the context's state, like
  41 * PDPs or ringbuffer control registers:
  42 *
  43 * The reason why PDPs are included in the context is straightforward: as
  44 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46 * instead, the GPU will do it for you on the context switch.
  47 *
  48 * But, what about the ringbuffer control registers (head, tail, etc..)?
  49 * shouldn't we just need a set of those per engine command streamer? This is
  50 * where the name "Logical Rings" starts to make sense: by virtualizing the
  51 * rings, the engine cs shifts to a new "ring buffer" with every context
  52 * switch. When you want to submit a workload to the GPU you: A) choose your
  53 * context, B) find its appropriate virtualized ring, C) write commands to it
  54 * and then, finally, D) tell the GPU to switch to that context.
  55 *
  56 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57 * to a contexts is via a context execution list, ergo "Execlists".
  58 *
  59 * LRC implementation:
  60 * Regarding the creation of contexts, we have:
  61 *
  62 * - One global default context.
  63 * - One local default context for each opened fd.
  64 * - One local extra context for each context create ioctl call.
  65 *
  66 * Now that ringbuffers belong per-context (and not per-engine, like before)
  67 * and that contexts are uniquely tied to a given engine (and not reusable,
  68 * like before) we need:
  69 *
  70 * - One ringbuffer per-engine inside each context.
  71 * - One backing object per-engine inside each context.
  72 *
  73 * The global default context starts its life with these new objects fully
  74 * allocated and populated. The local default context for each opened fd is
  75 * more complex, because we don't know at creation time which engine is going
  76 * to use them. To handle this, we have implemented a deferred creation of LR
  77 * contexts:
  78 *
  79 * The local context starts its life as a hollow or blank holder, that only
  80 * gets populated for a given engine once we receive an execbuffer. If later
  81 * on we receive another execbuffer ioctl for the same context but a different
  82 * engine, we allocate/populate a new ringbuffer and context backing object and
  83 * so on.
  84 *
  85 * Finally, regarding local contexts created using the ioctl call: as they are
  86 * only allowed with the render ring, we can allocate & populate them right
  87 * away (no need to defer anything, at least for now).
  88 *
  89 * Execlists implementation:
  90 * Execlists are the new method by which, on gen8+ hardware, workloads are
  91 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92 * This method works as follows:
  93 *
  94 * When a request is committed, its commands (the BB start and any leading or
  95 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96 * for the appropriate context. The tail pointer in the hardware context is not
  97 * updated at this time, but instead, kept by the driver in the ringbuffer
  98 * structure. A structure representing this request is added to a request queue
  99 * for the appropriate engine: this structure contains a copy of the context's
 100 * tail after the request was written to the ring buffer and a pointer to the
 101 * context itself.
 102 *
 103 * If the engine's request queue was empty before the request was added, the
 104 * queue is processed immediately. Otherwise the queue will be processed during
 105 * a context switch interrupt. In any case, elements on the queue will get sent
 106 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
 107 * globally unique 20-bits submission ID.
 108 *
 109 * When execution of a request completes, the GPU updates the context status
 110 * buffer with a context complete event and generates a context switch interrupt.
 111 * During the interrupt handling, the driver examines the events in the buffer:
 112 * for each context complete event, if the announced ID matches that on the head
 113 * of the request queue, then that request is retired and removed from the queue.
 114 *
 115 * After processing, if any requests were retired and the queue is not empty
 116 * then a new execution list can be submitted. The two requests at the front of
 117 * the queue are next to be submitted but since a context may not occur twice in
 118 * an execution list, if subsequent requests have the same ID as the first then
 119 * the two requests must be combined. This is done simply by discarding requests
 120 * at the head of the queue until either only one requests is left (in which case
 121 * we use a NULL second context) or the first two requests have unique IDs.
 122 *
 123 * By always executing the first two requests in the queue the driver ensures
 124 * that the GPU is kept as busy as possible. In the case where a single context
 125 * completes but a second context is still executing, the request for this second
 126 * context will be at the head of the queue when we remove the first one. This
 127 * request will then be resubmitted along with a new request for a different context,
 128 * which will cause the hardware to continue executing the second request and queue
 129 * the new request (the GPU detects the condition of a context getting preempted
 130 * with the same context and optimizes the context switch flow by not doing
 131 * preemption, but just sampling the new tail pointer).
 132 *
 133 */
 134#include <linux/interrupt.h>
 135
 136#include "i915_drv.h"
 137#include "i915_perf.h"
 138#include "i915_trace.h"
 139#include "i915_vgpu.h"
 140#include "intel_breadcrumbs.h"
 141#include "intel_context.h"
 142#include "intel_engine_pm.h"
 143#include "intel_gt.h"
 144#include "intel_gt_pm.h"
 145#include "intel_gt_requests.h"
 146#include "intel_lrc_reg.h"
 147#include "intel_mocs.h"
 148#include "intel_reset.h"
 149#include "intel_ring.h"
 150#include "intel_workarounds.h"
 151#include "shmem_utils.h"
 152
 153#define RING_EXECLIST_QFULL             (1 << 0x2)
 154#define RING_EXECLIST1_VALID            (1 << 0x3)
 155#define RING_EXECLIST0_VALID            (1 << 0x4)
 156#define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
 157#define RING_EXECLIST1_ACTIVE           (1 << 0x11)
 158#define RING_EXECLIST0_ACTIVE           (1 << 0x12)
 159
 160#define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
 161#define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
 162#define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
 163#define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
 164#define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
 165#define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
 166
 167#define GEN8_CTX_STATUS_COMPLETED_MASK \
 168         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 169
 170#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
 171
 172#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE  (0x1) /* lower csb dword */
 173#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
 174#define GEN12_CSB_SW_CTX_ID_MASK                GENMASK(25, 15)
 175#define GEN12_IDLE_CTX_ID               0x7FF
 176#define GEN12_CSB_CTX_VALID(csb_dw) \
 177        (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
 178
 179/* Typical size of the average request (2 pipecontrols and a MI_BB) */
 180#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 181
 182struct virtual_engine {
 183        struct intel_engine_cs base;
 184        struct intel_context context;
 185        struct rcu_work rcu;
 186
 187        /*
 188         * We allow only a single request through the virtual engine at a time
 189         * (each request in the timeline waits for the completion fence of
 190         * the previous before being submitted). By restricting ourselves to
 191         * only submitting a single request, each request is placed on to a
 192         * physical to maximise load spreading (by virtue of the late greedy
 193         * scheduling -- each real engine takes the next available request
 194         * upon idling).
 195         */
 196        struct i915_request *request;
 197
 198        /*
 199         * We keep a rbtree of available virtual engines inside each physical
 200         * engine, sorted by priority. Here we preallocate the nodes we need
 201         * for the virtual engine, indexed by physical_engine->id.
 202         */
 203        struct ve_node {
 204                struct rb_node rb;
 205                int prio;
 206        } nodes[I915_NUM_ENGINES];
 207
 208        /*
 209         * Keep track of bonded pairs -- restrictions upon on our selection
 210         * of physical engines any particular request may be submitted to.
 211         * If we receive a submit-fence from a master engine, we will only
 212         * use one of sibling_mask physical engines.
 213         */
 214        struct ve_bond {
 215                const struct intel_engine_cs *master;
 216                intel_engine_mask_t sibling_mask;
 217        } *bonds;
 218        unsigned int num_bonds;
 219
 220        /* And finally, which physical engines this virtual engine maps onto. */
 221        unsigned int num_siblings;
 222        struct intel_engine_cs *siblings[];
 223};
 224
 225static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 226{
 227        GEM_BUG_ON(!intel_engine_is_virtual(engine));
 228        return container_of(engine, struct virtual_engine, base);
 229}
 230
 231static int __execlists_context_alloc(struct intel_context *ce,
 232                                     struct intel_engine_cs *engine);
 233
 234static void execlists_init_reg_state(u32 *reg_state,
 235                                     const struct intel_context *ce,
 236                                     const struct intel_engine_cs *engine,
 237                                     const struct intel_ring *ring,
 238                                     bool close);
 239static void
 240__execlists_update_reg_state(const struct intel_context *ce,
 241                             const struct intel_engine_cs *engine,
 242                             u32 head);
 243
 244static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 245{
 246        if (INTEL_GEN(engine->i915) >= 12)
 247                return 0x60;
 248        else if (INTEL_GEN(engine->i915) >= 9)
 249                return 0x54;
 250        else if (engine->class == RENDER_CLASS)
 251                return 0x58;
 252        else
 253                return -1;
 254}
 255
 256static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 257{
 258        if (INTEL_GEN(engine->i915) >= 12)
 259                return 0x74;
 260        else if (INTEL_GEN(engine->i915) >= 9)
 261                return 0x68;
 262        else if (engine->class == RENDER_CLASS)
 263                return 0xd8;
 264        else
 265                return -1;
 266}
 267
 268static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 269{
 270        if (INTEL_GEN(engine->i915) >= 12)
 271                return 0x12;
 272        else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 273                return 0x18;
 274        else
 275                return -1;
 276}
 277
 278static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 279{
 280        int x;
 281
 282        x = lrc_ring_wa_bb_per_ctx(engine);
 283        if (x < 0)
 284                return x;
 285
 286        return x + 2;
 287}
 288
 289static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 290{
 291        int x;
 292
 293        x = lrc_ring_indirect_ptr(engine);
 294        if (x < 0)
 295                return x;
 296
 297        return x + 2;
 298}
 299
 300static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 301{
 302        if (engine->class != RENDER_CLASS)
 303                return -1;
 304
 305        if (INTEL_GEN(engine->i915) >= 12)
 306                return 0xb6;
 307        else if (INTEL_GEN(engine->i915) >= 11)
 308                return 0xaa;
 309        else
 310                return -1;
 311}
 312
 313static u32
 314lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 315{
 316        switch (INTEL_GEN(engine->i915)) {
 317        default:
 318                MISSING_CASE(INTEL_GEN(engine->i915));
 319                fallthrough;
 320        case 12:
 321                return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 322        case 11:
 323                return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 324        case 10:
 325                return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 326        case 9:
 327                return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 328        case 8:
 329                return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 330        }
 331}
 332
 333static void
 334lrc_ring_setup_indirect_ctx(u32 *regs,
 335                            const struct intel_engine_cs *engine,
 336                            u32 ctx_bb_ggtt_addr,
 337                            u32 size)
 338{
 339        GEM_BUG_ON(!size);
 340        GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 341        GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 342        regs[lrc_ring_indirect_ptr(engine) + 1] =
 343                ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 344
 345        GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 346        regs[lrc_ring_indirect_offset(engine) + 1] =
 347                lrc_ring_indirect_offset_default(engine) << 6;
 348}
 349
 350static u32 intel_context_get_runtime(const struct intel_context *ce)
 351{
 352        /*
 353         * We can use either ppHWSP[16] which is recorded before the context
 354         * switch (and so excludes the cost of context switches) or use the
 355         * value from the context image itself, which is saved/restored earlier
 356         * and so includes the cost of the save.
 357         */
 358        return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
 359}
 360
 361static void mark_eio(struct i915_request *rq)
 362{
 363        if (i915_request_completed(rq))
 364                return;
 365
 366        GEM_BUG_ON(i915_request_signaled(rq));
 367
 368        i915_request_set_error_once(rq, -EIO);
 369        i915_request_mark_complete(rq);
 370}
 371
 372static struct i915_request *
 373active_request(const struct intel_timeline * const tl, struct i915_request *rq)
 374{
 375        struct i915_request *active = rq;
 376
 377        rcu_read_lock();
 378        list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
 379                if (i915_request_completed(rq))
 380                        break;
 381
 382                active = rq;
 383        }
 384        rcu_read_unlock();
 385
 386        return active;
 387}
 388
 389static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 390{
 391        return (i915_ggtt_offset(engine->status_page.vma) +
 392                I915_GEM_HWS_PREEMPT_ADDR);
 393}
 394
 395static inline void
 396ring_set_paused(const struct intel_engine_cs *engine, int state)
 397{
 398        /*
 399         * We inspect HWS_PREEMPT with a semaphore inside
 400         * engine->emit_fini_breadcrumb. If the dword is true,
 401         * the ring is paused as the semaphore will busywait
 402         * until the dword is false.
 403         */
 404        engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
 405        if (state)
 406                wmb();
 407}
 408
 409static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 410{
 411        return rb_entry(rb, struct i915_priolist, node);
 412}
 413
 414static inline int rq_prio(const struct i915_request *rq)
 415{
 416        return READ_ONCE(rq->sched.attr.priority);
 417}
 418
 419static int effective_prio(const struct i915_request *rq)
 420{
 421        int prio = rq_prio(rq);
 422
 423        /*
 424         * If this request is special and must not be interrupted at any
 425         * cost, so be it. Note we are only checking the most recent request
 426         * in the context and so may be masking an earlier vip request. It
 427         * is hoped that under the conditions where nopreempt is used, this
 428         * will not matter (i.e. all requests to that context will be
 429         * nopreempt for as long as desired).
 430         */
 431        if (i915_request_has_nopreempt(rq))
 432                prio = I915_PRIORITY_UNPREEMPTABLE;
 433
 434        return prio;
 435}
 436
 437static int queue_prio(const struct intel_engine_execlists *execlists)
 438{
 439        struct i915_priolist *p;
 440        struct rb_node *rb;
 441
 442        rb = rb_first_cached(&execlists->queue);
 443        if (!rb)
 444                return INT_MIN;
 445
 446        /*
 447         * As the priolist[] are inverted, with the highest priority in [0],
 448         * we have to flip the index value to become priority.
 449         */
 450        p = to_priolist(rb);
 451        if (!I915_USER_PRIORITY_SHIFT)
 452                return p->priority;
 453
 454        return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
 455}
 456
 457static inline bool need_preempt(const struct intel_engine_cs *engine,
 458                                const struct i915_request *rq,
 459                                struct rb_node *rb)
 460{
 461        int last_prio;
 462
 463        if (!intel_engine_has_semaphores(engine))
 464                return false;
 465
 466        /*
 467         * Check if the current priority hint merits a preemption attempt.
 468         *
 469         * We record the highest value priority we saw during rescheduling
 470         * prior to this dequeue, therefore we know that if it is strictly
 471         * less than the current tail of ESLP[0], we do not need to force
 472         * a preempt-to-idle cycle.
 473         *
 474         * However, the priority hint is a mere hint that we may need to
 475         * preempt. If that hint is stale or we may be trying to preempt
 476         * ourselves, ignore the request.
 477         *
 478         * More naturally we would write
 479         *      prio >= max(0, last);
 480         * except that we wish to prevent triggering preemption at the same
 481         * priority level: the task that is running should remain running
 482         * to preserve FIFO ordering of dependencies.
 483         */
 484        last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
 485        if (engine->execlists.queue_priority_hint <= last_prio)
 486                return false;
 487
 488        /*
 489         * Check against the first request in ELSP[1], it will, thanks to the
 490         * power of PI, be the highest priority of that context.
 491         */
 492        if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
 493            rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 494                return true;
 495
 496        if (rb) {
 497                struct virtual_engine *ve =
 498                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 499                bool preempt = false;
 500
 501                if (engine == ve->siblings[0]) { /* only preempt one sibling */
 502                        struct i915_request *next;
 503
 504                        rcu_read_lock();
 505                        next = READ_ONCE(ve->request);
 506                        if (next)
 507                                preempt = rq_prio(next) > last_prio;
 508                        rcu_read_unlock();
 509                }
 510
 511                if (preempt)
 512                        return preempt;
 513        }
 514
 515        /*
 516         * If the inflight context did not trigger the preemption, then maybe
 517         * it was the set of queued requests? Pick the highest priority in
 518         * the queue (the first active priolist) and see if it deserves to be
 519         * running instead of ELSP[0].
 520         *
 521         * The highest priority request in the queue can not be either
 522         * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
 523         * context, it's priority would not exceed ELSP[0] aka last_prio.
 524         */
 525        return queue_prio(&engine->execlists) > last_prio;
 526}
 527
 528__maybe_unused static inline bool
 529assert_priority_queue(const struct i915_request *prev,
 530                      const struct i915_request *next)
 531{
 532        /*
 533         * Without preemption, the prev may refer to the still active element
 534         * which we refuse to let go.
 535         *
 536         * Even with preemption, there are times when we think it is better not
 537         * to preempt and leave an ostensibly lower priority request in flight.
 538         */
 539        if (i915_request_is_active(prev))
 540                return true;
 541
 542        return rq_prio(prev) >= rq_prio(next);
 543}
 544
 545/*
 546 * The context descriptor encodes various attributes of a context,
 547 * including its GTT address and some flags. Because it's fairly
 548 * expensive to calculate, we'll just do it once and cache the result,
 549 * which remains valid until the context is unpinned.
 550 *
 551 * This is what a descriptor looks like, from LSB to MSB::
 552 *
 553 *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
 554 *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
 555 *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
 556 *      bits 53-54:    mbz, reserved for use by hardware
 557 *      bits 55-63:    group ID, currently unused and set to 0
 558 *
 559 * Starting from Gen11, the upper dword of the descriptor has a new format:
 560 *
 561 *      bits 32-36:    reserved
 562 *      bits 37-47:    SW context ID
 563 *      bits 48:53:    engine instance
 564 *      bit 54:        mbz, reserved for use by hardware
 565 *      bits 55-60:    SW counter
 566 *      bits 61-63:    engine class
 567 *
 568 * engine info, SW context ID and SW counter need to form a unique number
 569 * (Context ID) per lrc.
 570 */
 571static u32
 572lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 573{
 574        u32 desc;
 575
 576        desc = INTEL_LEGACY_32B_CONTEXT;
 577        if (i915_vm_is_4lvl(ce->vm))
 578                desc = INTEL_LEGACY_64B_CONTEXT;
 579        desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
 580
 581        desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 582        if (IS_GEN(engine->i915, 8))
 583                desc |= GEN8_CTX_L3LLC_COHERENT;
 584
 585        return i915_ggtt_offset(ce->state) | desc;
 586}
 587
 588static inline unsigned int dword_in_page(void *addr)
 589{
 590        return offset_in_page(addr) / sizeof(u32);
 591}
 592
 593static void set_offsets(u32 *regs,
 594                        const u8 *data,
 595                        const struct intel_engine_cs *engine,
 596                        bool clear)
 597#define NOP(x) (BIT(7) | (x))
 598#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
 599#define POSTED BIT(0)
 600#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 601#define REG16(x) \
 602        (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 603        (((x) >> 2) & 0x7f)
 604#define END(total_state_size) 0, (total_state_size)
 605{
 606        const u32 base = engine->mmio_base;
 607
 608        while (*data) {
 609                u8 count, flags;
 610
 611                if (*data & BIT(7)) { /* skip */
 612                        count = *data++ & ~BIT(7);
 613                        if (clear)
 614                                memset32(regs, MI_NOOP, count);
 615                        regs += count;
 616                        continue;
 617                }
 618
 619                count = *data & 0x3f;
 620                flags = *data >> 6;
 621                data++;
 622
 623                *regs = MI_LOAD_REGISTER_IMM(count);
 624                if (flags & POSTED)
 625                        *regs |= MI_LRI_FORCE_POSTED;
 626                if (INTEL_GEN(engine->i915) >= 11)
 627                        *regs |= MI_LRI_LRM_CS_MMIO;
 628                regs++;
 629
 630                GEM_BUG_ON(!count);
 631                do {
 632                        u32 offset = 0;
 633                        u8 v;
 634
 635                        do {
 636                                v = *data++;
 637                                offset <<= 7;
 638                                offset |= v & ~BIT(7);
 639                        } while (v & BIT(7));
 640
 641                        regs[0] = base + (offset << 2);
 642                        if (clear)
 643                                regs[1] = 0;
 644                        regs += 2;
 645                } while (--count);
 646        }
 647
 648        if (clear) {
 649                u8 count = *++data;
 650
 651                /* Clear past the tail for HW access */
 652                GEM_BUG_ON(dword_in_page(regs) > count);
 653                memset32(regs, MI_NOOP, count - dword_in_page(regs));
 654
 655                /* Close the batch; used mainly by live_lrc_layout() */
 656                *regs = MI_BATCH_BUFFER_END;
 657                if (INTEL_GEN(engine->i915) >= 10)
 658                        *regs |= BIT(0);
 659        }
 660}
 661
 662static const u8 gen8_xcs_offsets[] = {
 663        NOP(1),
 664        LRI(11, 0),
 665        REG16(0x244),
 666        REG(0x034),
 667        REG(0x030),
 668        REG(0x038),
 669        REG(0x03c),
 670        REG(0x168),
 671        REG(0x140),
 672        REG(0x110),
 673        REG(0x11c),
 674        REG(0x114),
 675        REG(0x118),
 676
 677        NOP(9),
 678        LRI(9, 0),
 679        REG16(0x3a8),
 680        REG16(0x28c),
 681        REG16(0x288),
 682        REG16(0x284),
 683        REG16(0x280),
 684        REG16(0x27c),
 685        REG16(0x278),
 686        REG16(0x274),
 687        REG16(0x270),
 688
 689        NOP(13),
 690        LRI(2, 0),
 691        REG16(0x200),
 692        REG(0x028),
 693
 694        END(80)
 695};
 696
 697static const u8 gen9_xcs_offsets[] = {
 698        NOP(1),
 699        LRI(14, POSTED),
 700        REG16(0x244),
 701        REG(0x034),
 702        REG(0x030),
 703        REG(0x038),
 704        REG(0x03c),
 705        REG(0x168),
 706        REG(0x140),
 707        REG(0x110),
 708        REG(0x11c),
 709        REG(0x114),
 710        REG(0x118),
 711        REG(0x1c0),
 712        REG(0x1c4),
 713        REG(0x1c8),
 714
 715        NOP(3),
 716        LRI(9, POSTED),
 717        REG16(0x3a8),
 718        REG16(0x28c),
 719        REG16(0x288),
 720        REG16(0x284),
 721        REG16(0x280),
 722        REG16(0x27c),
 723        REG16(0x278),
 724        REG16(0x274),
 725        REG16(0x270),
 726
 727        NOP(13),
 728        LRI(1, POSTED),
 729        REG16(0x200),
 730
 731        NOP(13),
 732        LRI(44, POSTED),
 733        REG(0x028),
 734        REG(0x09c),
 735        REG(0x0c0),
 736        REG(0x178),
 737        REG(0x17c),
 738        REG16(0x358),
 739        REG(0x170),
 740        REG(0x150),
 741        REG(0x154),
 742        REG(0x158),
 743        REG16(0x41c),
 744        REG16(0x600),
 745        REG16(0x604),
 746        REG16(0x608),
 747        REG16(0x60c),
 748        REG16(0x610),
 749        REG16(0x614),
 750        REG16(0x618),
 751        REG16(0x61c),
 752        REG16(0x620),
 753        REG16(0x624),
 754        REG16(0x628),
 755        REG16(0x62c),
 756        REG16(0x630),
 757        REG16(0x634),
 758        REG16(0x638),
 759        REG16(0x63c),
 760        REG16(0x640),
 761        REG16(0x644),
 762        REG16(0x648),
 763        REG16(0x64c),
 764        REG16(0x650),
 765        REG16(0x654),
 766        REG16(0x658),
 767        REG16(0x65c),
 768        REG16(0x660),
 769        REG16(0x664),
 770        REG16(0x668),
 771        REG16(0x66c),
 772        REG16(0x670),
 773        REG16(0x674),
 774        REG16(0x678),
 775        REG16(0x67c),
 776        REG(0x068),
 777
 778        END(176)
 779};
 780
 781static const u8 gen12_xcs_offsets[] = {
 782        NOP(1),
 783        LRI(13, POSTED),
 784        REG16(0x244),
 785        REG(0x034),
 786        REG(0x030),
 787        REG(0x038),
 788        REG(0x03c),
 789        REG(0x168),
 790        REG(0x140),
 791        REG(0x110),
 792        REG(0x1c0),
 793        REG(0x1c4),
 794        REG(0x1c8),
 795        REG(0x180),
 796        REG16(0x2b4),
 797
 798        NOP(5),
 799        LRI(9, POSTED),
 800        REG16(0x3a8),
 801        REG16(0x28c),
 802        REG16(0x288),
 803        REG16(0x284),
 804        REG16(0x280),
 805        REG16(0x27c),
 806        REG16(0x278),
 807        REG16(0x274),
 808        REG16(0x270),
 809
 810        END(80)
 811};
 812
 813static const u8 gen8_rcs_offsets[] = {
 814        NOP(1),
 815        LRI(14, POSTED),
 816        REG16(0x244),
 817        REG(0x034),
 818        REG(0x030),
 819        REG(0x038),
 820        REG(0x03c),
 821        REG(0x168),
 822        REG(0x140),
 823        REG(0x110),
 824        REG(0x11c),
 825        REG(0x114),
 826        REG(0x118),
 827        REG(0x1c0),
 828        REG(0x1c4),
 829        REG(0x1c8),
 830
 831        NOP(3),
 832        LRI(9, POSTED),
 833        REG16(0x3a8),
 834        REG16(0x28c),
 835        REG16(0x288),
 836        REG16(0x284),
 837        REG16(0x280),
 838        REG16(0x27c),
 839        REG16(0x278),
 840        REG16(0x274),
 841        REG16(0x270),
 842
 843        NOP(13),
 844        LRI(1, 0),
 845        REG(0x0c8),
 846
 847        END(80)
 848};
 849
 850static const u8 gen9_rcs_offsets[] = {
 851        NOP(1),
 852        LRI(14, POSTED),
 853        REG16(0x244),
 854        REG(0x34),
 855        REG(0x30),
 856        REG(0x38),
 857        REG(0x3c),
 858        REG(0x168),
 859        REG(0x140),
 860        REG(0x110),
 861        REG(0x11c),
 862        REG(0x114),
 863        REG(0x118),
 864        REG(0x1c0),
 865        REG(0x1c4),
 866        REG(0x1c8),
 867
 868        NOP(3),
 869        LRI(9, POSTED),
 870        REG16(0x3a8),
 871        REG16(0x28c),
 872        REG16(0x288),
 873        REG16(0x284),
 874        REG16(0x280),
 875        REG16(0x27c),
 876        REG16(0x278),
 877        REG16(0x274),
 878        REG16(0x270),
 879
 880        NOP(13),
 881        LRI(1, 0),
 882        REG(0xc8),
 883
 884        NOP(13),
 885        LRI(44, POSTED),
 886        REG(0x28),
 887        REG(0x9c),
 888        REG(0xc0),
 889        REG(0x178),
 890        REG(0x17c),
 891        REG16(0x358),
 892        REG(0x170),
 893        REG(0x150),
 894        REG(0x154),
 895        REG(0x158),
 896        REG16(0x41c),
 897        REG16(0x600),
 898        REG16(0x604),
 899        REG16(0x608),
 900        REG16(0x60c),
 901        REG16(0x610),
 902        REG16(0x614),
 903        REG16(0x618),
 904        REG16(0x61c),
 905        REG16(0x620),
 906        REG16(0x624),
 907        REG16(0x628),
 908        REG16(0x62c),
 909        REG16(0x630),
 910        REG16(0x634),
 911        REG16(0x638),
 912        REG16(0x63c),
 913        REG16(0x640),
 914        REG16(0x644),
 915        REG16(0x648),
 916        REG16(0x64c),
 917        REG16(0x650),
 918        REG16(0x654),
 919        REG16(0x658),
 920        REG16(0x65c),
 921        REG16(0x660),
 922        REG16(0x664),
 923        REG16(0x668),
 924        REG16(0x66c),
 925        REG16(0x670),
 926        REG16(0x674),
 927        REG16(0x678),
 928        REG16(0x67c),
 929        REG(0x68),
 930
 931        END(176)
 932};
 933
 934static const u8 gen11_rcs_offsets[] = {
 935        NOP(1),
 936        LRI(15, POSTED),
 937        REG16(0x244),
 938        REG(0x034),
 939        REG(0x030),
 940        REG(0x038),
 941        REG(0x03c),
 942        REG(0x168),
 943        REG(0x140),
 944        REG(0x110),
 945        REG(0x11c),
 946        REG(0x114),
 947        REG(0x118),
 948        REG(0x1c0),
 949        REG(0x1c4),
 950        REG(0x1c8),
 951        REG(0x180),
 952
 953        NOP(1),
 954        LRI(9, POSTED),
 955        REG16(0x3a8),
 956        REG16(0x28c),
 957        REG16(0x288),
 958        REG16(0x284),
 959        REG16(0x280),
 960        REG16(0x27c),
 961        REG16(0x278),
 962        REG16(0x274),
 963        REG16(0x270),
 964
 965        LRI(1, POSTED),
 966        REG(0x1b0),
 967
 968        NOP(10),
 969        LRI(1, 0),
 970        REG(0x0c8),
 971
 972        END(80)
 973};
 974
 975static const u8 gen12_rcs_offsets[] = {
 976        NOP(1),
 977        LRI(13, POSTED),
 978        REG16(0x244),
 979        REG(0x034),
 980        REG(0x030),
 981        REG(0x038),
 982        REG(0x03c),
 983        REG(0x168),
 984        REG(0x140),
 985        REG(0x110),
 986        REG(0x1c0),
 987        REG(0x1c4),
 988        REG(0x1c8),
 989        REG(0x180),
 990        REG16(0x2b4),
 991
 992        NOP(5),
 993        LRI(9, POSTED),
 994        REG16(0x3a8),
 995        REG16(0x28c),
 996        REG16(0x288),
 997        REG16(0x284),
 998        REG16(0x280),
 999        REG16(0x27c),
1000        REG16(0x278),
1001        REG16(0x274),
1002        REG16(0x270),
1003
1004        LRI(3, POSTED),
1005        REG(0x1b0),
1006        REG16(0x5a8),
1007        REG16(0x5ac),
1008
1009        NOP(6),
1010        LRI(1, 0),
1011        REG(0x0c8),
1012        NOP(3 + 9 + 1),
1013
1014        LRI(51, POSTED),
1015        REG16(0x588),
1016        REG16(0x588),
1017        REG16(0x588),
1018        REG16(0x588),
1019        REG16(0x588),
1020        REG16(0x588),
1021        REG(0x028),
1022        REG(0x09c),
1023        REG(0x0c0),
1024        REG(0x178),
1025        REG(0x17c),
1026        REG16(0x358),
1027        REG(0x170),
1028        REG(0x150),
1029        REG(0x154),
1030        REG(0x158),
1031        REG16(0x41c),
1032        REG16(0x600),
1033        REG16(0x604),
1034        REG16(0x608),
1035        REG16(0x60c),
1036        REG16(0x610),
1037        REG16(0x614),
1038        REG16(0x618),
1039        REG16(0x61c),
1040        REG16(0x620),
1041        REG16(0x624),
1042        REG16(0x628),
1043        REG16(0x62c),
1044        REG16(0x630),
1045        REG16(0x634),
1046        REG16(0x638),
1047        REG16(0x63c),
1048        REG16(0x640),
1049        REG16(0x644),
1050        REG16(0x648),
1051        REG16(0x64c),
1052        REG16(0x650),
1053        REG16(0x654),
1054        REG16(0x658),
1055        REG16(0x65c),
1056        REG16(0x660),
1057        REG16(0x664),
1058        REG16(0x668),
1059        REG16(0x66c),
1060        REG16(0x670),
1061        REG16(0x674),
1062        REG16(0x678),
1063        REG16(0x67c),
1064        REG(0x068),
1065        REG(0x084),
1066        NOP(1),
1067
1068        END(192)
1069};
1070
1071#undef END
1072#undef REG16
1073#undef REG
1074#undef LRI
1075#undef NOP
1076
1077static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1078{
1079        /*
1080         * The gen12+ lists only have the registers we program in the basic
1081         * default state. We rely on the context image using relative
1082         * addressing to automatic fixup the register state between the
1083         * physical engines for virtual engine.
1084         */
1085        GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1086                   !intel_engine_has_relative_mmio(engine));
1087
1088        if (engine->class == RENDER_CLASS) {
1089                if (INTEL_GEN(engine->i915) >= 12)
1090                        return gen12_rcs_offsets;
1091                else if (INTEL_GEN(engine->i915) >= 11)
1092                        return gen11_rcs_offsets;
1093                else if (INTEL_GEN(engine->i915) >= 9)
1094                        return gen9_rcs_offsets;
1095                else
1096                        return gen8_rcs_offsets;
1097        } else {
1098                if (INTEL_GEN(engine->i915) >= 12)
1099                        return gen12_xcs_offsets;
1100                else if (INTEL_GEN(engine->i915) >= 9)
1101                        return gen9_xcs_offsets;
1102                else
1103                        return gen8_xcs_offsets;
1104        }
1105}
1106
1107static struct i915_request *
1108__unwind_incomplete_requests(struct intel_engine_cs *engine)
1109{
1110        struct i915_request *rq, *rn, *active = NULL;
1111        struct list_head *pl;
1112        int prio = I915_PRIORITY_INVALID;
1113
1114        lockdep_assert_held(&engine->active.lock);
1115
1116        list_for_each_entry_safe_reverse(rq, rn,
1117                                         &engine->active.requests,
1118                                         sched.link) {
1119                if (i915_request_completed(rq))
1120                        continue; /* XXX */
1121
1122                __i915_request_unsubmit(rq);
1123
1124                /*
1125                 * Push the request back into the queue for later resubmission.
1126                 * If this request is not native to this physical engine (i.e.
1127                 * it came from a virtual source), push it back onto the virtual
1128                 * engine so that it can be moved across onto another physical
1129                 * engine as load dictates.
1130                 */
1131                if (likely(rq->execution_mask == engine->mask)) {
1132                        GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1133                        if (rq_prio(rq) != prio) {
1134                                prio = rq_prio(rq);
1135                                pl = i915_sched_lookup_priolist(engine, prio);
1136                        }
1137                        GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1138
1139                        list_move(&rq->sched.link, pl);
1140                        set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1141
1142                        /* Check in case we rollback so far we wrap [size/2] */
1143                        if (intel_ring_direction(rq->ring,
1144                                                 rq->tail,
1145                                                 rq->ring->tail + 8) > 0)
1146                                rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1147
1148                        active = rq;
1149                } else {
1150                        struct intel_engine_cs *owner = rq->context->engine;
1151
1152                        WRITE_ONCE(rq->engine, owner);
1153                        owner->submit_request(rq);
1154                        active = NULL;
1155                }
1156        }
1157
1158        return active;
1159}
1160
1161struct i915_request *
1162execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1163{
1164        struct intel_engine_cs *engine =
1165                container_of(execlists, typeof(*engine), execlists);
1166
1167        return __unwind_incomplete_requests(engine);
1168}
1169
1170static inline void
1171execlists_context_status_change(struct i915_request *rq, unsigned long status)
1172{
1173        /*
1174         * Only used when GVT-g is enabled now. When GVT-g is disabled,
1175         * The compiler should eliminate this function as dead-code.
1176         */
1177        if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1178                return;
1179
1180        atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1181                                   status, rq);
1182}
1183
1184static void intel_engine_context_in(struct intel_engine_cs *engine)
1185{
1186        unsigned long flags;
1187
1188        if (atomic_add_unless(&engine->stats.active, 1, 0))
1189                return;
1190
1191        write_seqlock_irqsave(&engine->stats.lock, flags);
1192        if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1193                engine->stats.start = ktime_get();
1194                atomic_inc(&engine->stats.active);
1195        }
1196        write_sequnlock_irqrestore(&engine->stats.lock, flags);
1197}
1198
1199static void intel_engine_context_out(struct intel_engine_cs *engine)
1200{
1201        unsigned long flags;
1202
1203        GEM_BUG_ON(!atomic_read(&engine->stats.active));
1204
1205        if (atomic_add_unless(&engine->stats.active, -1, 1))
1206                return;
1207
1208        write_seqlock_irqsave(&engine->stats.lock, flags);
1209        if (atomic_dec_and_test(&engine->stats.active)) {
1210                engine->stats.total =
1211                        ktime_add(engine->stats.total,
1212                                  ktime_sub(ktime_get(), engine->stats.start));
1213        }
1214        write_sequnlock_irqrestore(&engine->stats.lock, flags);
1215}
1216
1217static void
1218execlists_check_context(const struct intel_context *ce,
1219                        const struct intel_engine_cs *engine)
1220{
1221        const struct intel_ring *ring = ce->ring;
1222        u32 *regs = ce->lrc_reg_state;
1223        bool valid = true;
1224        int x;
1225
1226        if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1227                pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1228                       engine->name,
1229                       regs[CTX_RING_START],
1230                       i915_ggtt_offset(ring->vma));
1231                regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1232                valid = false;
1233        }
1234
1235        if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1236            (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1237                pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1238                       engine->name,
1239                       regs[CTX_RING_CTL],
1240                       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1241                regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1242                valid = false;
1243        }
1244
1245        x = lrc_ring_mi_mode(engine);
1246        if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1247                pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1248                       engine->name, regs[x + 1]);
1249                regs[x + 1] &= ~STOP_RING;
1250                regs[x + 1] |= STOP_RING << 16;
1251                valid = false;
1252        }
1253
1254        WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1255}
1256
1257static void restore_default_state(struct intel_context *ce,
1258                                  struct intel_engine_cs *engine)
1259{
1260        u32 *regs;
1261
1262        regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1263        execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1264
1265        ce->runtime.last = intel_context_get_runtime(ce);
1266}
1267
1268static void reset_active(struct i915_request *rq,
1269                         struct intel_engine_cs *engine)
1270{
1271        struct intel_context * const ce = rq->context;
1272        u32 head;
1273
1274        /*
1275         * The executing context has been cancelled. We want to prevent
1276         * further execution along this context and propagate the error on
1277         * to anything depending on its results.
1278         *
1279         * In __i915_request_submit(), we apply the -EIO and remove the
1280         * requests' payloads for any banned requests. But first, we must
1281         * rewind the context back to the start of the incomplete request so
1282         * that we do not jump back into the middle of the batch.
1283         *
1284         * We preserve the breadcrumbs and semaphores of the incomplete
1285         * requests so that inter-timeline dependencies (i.e other timelines)
1286         * remain correctly ordered. And we defer to __i915_request_submit()
1287         * so that all asynchronous waits are correctly handled.
1288         */
1289        ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1290                     rq->fence.context, rq->fence.seqno);
1291
1292        /* On resubmission of the active request, payload will be scrubbed */
1293        if (i915_request_completed(rq))
1294                head = rq->tail;
1295        else
1296                head = active_request(ce->timeline, rq)->head;
1297        head = intel_ring_wrap(ce->ring, head);
1298
1299        /* Scrub the context image to prevent replaying the previous batch */
1300        restore_default_state(ce, engine);
1301        __execlists_update_reg_state(ce, engine, head);
1302
1303        /* We've switched away, so this should be a no-op, but intent matters */
1304        ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1305}
1306
1307static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1308{
1309#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1310        ce->runtime.num_underflow += dt < 0;
1311        ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1312#endif
1313}
1314
1315static void intel_context_update_runtime(struct intel_context *ce)
1316{
1317        u32 old;
1318        s32 dt;
1319
1320        if (intel_context_is_barrier(ce))
1321                return;
1322
1323        old = ce->runtime.last;
1324        ce->runtime.last = intel_context_get_runtime(ce);
1325        dt = ce->runtime.last - old;
1326
1327        if (unlikely(dt <= 0)) {
1328                CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1329                         old, ce->runtime.last, dt);
1330                st_update_runtime_underflow(ce, dt);
1331                return;
1332        }
1333
1334        ewma_runtime_add(&ce->runtime.avg, dt);
1335        ce->runtime.total += dt;
1336}
1337
1338static inline struct intel_engine_cs *
1339__execlists_schedule_in(struct i915_request *rq)
1340{
1341        struct intel_engine_cs * const engine = rq->engine;
1342        struct intel_context * const ce = rq->context;
1343
1344        intel_context_get(ce);
1345
1346        if (unlikely(intel_context_is_banned(ce)))
1347                reset_active(rq, engine);
1348
1349        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1350                execlists_check_context(ce, engine);
1351
1352        if (ce->tag) {
1353                /* Use a fixed tag for OA and friends */
1354                GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1355                ce->lrc.ccid = ce->tag;
1356        } else {
1357                /* We don't need a strict matching tag, just different values */
1358                unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1359
1360                GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1361                clear_bit(tag - 1, &engine->context_tag);
1362                ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1363
1364                BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1365        }
1366
1367        ce->lrc.ccid |= engine->execlists.ccid;
1368
1369        __intel_gt_pm_get(engine->gt);
1370        if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1371                intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1372        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1373        intel_engine_context_in(engine);
1374
1375        return engine;
1376}
1377
1378static inline struct i915_request *
1379execlists_schedule_in(struct i915_request *rq, int idx)
1380{
1381        struct intel_context * const ce = rq->context;
1382        struct intel_engine_cs *old;
1383
1384        GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1385        trace_i915_request_in(rq, idx);
1386
1387        old = READ_ONCE(ce->inflight);
1388        do {
1389                if (!old) {
1390                        WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1391                        break;
1392                }
1393        } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1394
1395        GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1396        return i915_request_get(rq);
1397}
1398
1399static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1400{
1401        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1402        struct i915_request *next = READ_ONCE(ve->request);
1403
1404        if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1405                tasklet_hi_schedule(&ve->base.execlists.tasklet);
1406}
1407
1408static inline void
1409__execlists_schedule_out(struct i915_request *rq,
1410                         struct intel_engine_cs * const engine,
1411                         unsigned int ccid)
1412{
1413        struct intel_context * const ce = rq->context;
1414
1415        /*
1416         * NB process_csb() is not under the engine->active.lock and hence
1417         * schedule_out can race with schedule_in meaning that we should
1418         * refrain from doing non-trivial work here.
1419         */
1420
1421        /*
1422         * If we have just completed this context, the engine may now be
1423         * idle and we want to re-enter powersaving.
1424         */
1425        if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1426            i915_request_completed(rq))
1427                intel_engine_add_retire(engine, ce->timeline);
1428
1429        ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1430        ccid &= GEN12_MAX_CONTEXT_HW_ID;
1431        if (ccid < BITS_PER_LONG) {
1432                GEM_BUG_ON(ccid == 0);
1433                GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1434                set_bit(ccid - 1, &engine->context_tag);
1435        }
1436
1437        intel_context_update_runtime(ce);
1438        intel_engine_context_out(engine);
1439        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1440        if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1441                intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1442        intel_gt_pm_put_async(engine->gt);
1443
1444        /*
1445         * If this is part of a virtual engine, its next request may
1446         * have been blocked waiting for access to the active context.
1447         * We have to kick all the siblings again in case we need to
1448         * switch (e.g. the next request is not runnable on this
1449         * engine). Hopefully, we will already have submitted the next
1450         * request before the tasklet runs and do not need to rebuild
1451         * each virtual tree and kick everyone again.
1452         */
1453        if (ce->engine != engine)
1454                kick_siblings(rq, ce);
1455
1456        intel_context_put(ce);
1457}
1458
1459static inline void
1460execlists_schedule_out(struct i915_request *rq)
1461{
1462        struct intel_context * const ce = rq->context;
1463        struct intel_engine_cs *cur, *old;
1464        u32 ccid;
1465
1466        trace_i915_request_out(rq);
1467
1468        ccid = rq->context->lrc.ccid;
1469        old = READ_ONCE(ce->inflight);
1470        do
1471                cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1472        while (!try_cmpxchg(&ce->inflight, &old, cur));
1473        if (!cur)
1474                __execlists_schedule_out(rq, old, ccid);
1475
1476        i915_request_put(rq);
1477}
1478
1479static u64 execlists_update_context(struct i915_request *rq)
1480{
1481        struct intel_context *ce = rq->context;
1482        u64 desc = ce->lrc.desc;
1483        u32 tail, prev;
1484
1485        /*
1486         * WaIdleLiteRestore:bdw,skl
1487         *
1488         * We should never submit the context with the same RING_TAIL twice
1489         * just in case we submit an empty ring, which confuses the HW.
1490         *
1491         * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1492         * the normal request to be able to always advance the RING_TAIL on
1493         * subsequent resubmissions (for lite restore). Should that fail us,
1494         * and we try and submit the same tail again, force the context
1495         * reload.
1496         *
1497         * If we need to return to a preempted context, we need to skip the
1498         * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1499         * HW has a tendency to ignore us rewinding the TAIL to the end of
1500         * an earlier request.
1501         */
1502        GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1503        prev = rq->ring->tail;
1504        tail = intel_ring_set_tail(rq->ring, rq->tail);
1505        if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1506                desc |= CTX_DESC_FORCE_RESTORE;
1507        ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1508        rq->tail = rq->wa_tail;
1509
1510        /*
1511         * Make sure the context image is complete before we submit it to HW.
1512         *
1513         * Ostensibly, writes (including the WCB) should be flushed prior to
1514         * an uncached write such as our mmio register access, the empirical
1515         * evidence (esp. on Braswell) suggests that the WC write into memory
1516         * may not be visible to the HW prior to the completion of the UC
1517         * register write and that we may begin execution from the context
1518         * before its image is complete leading to invalid PD chasing.
1519         */
1520        wmb();
1521
1522        ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1523        return desc;
1524}
1525
1526static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1527{
1528        if (execlists->ctrl_reg) {
1529                writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1530                writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1531        } else {
1532                writel(upper_32_bits(desc), execlists->submit_reg);
1533                writel(lower_32_bits(desc), execlists->submit_reg);
1534        }
1535}
1536
1537static __maybe_unused char *
1538dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1539{
1540        if (!rq)
1541                return "";
1542
1543        snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1544                 prefix,
1545                 rq->context->lrc.ccid,
1546                 rq->fence.context, rq->fence.seqno,
1547                 i915_request_completed(rq) ? "!" :
1548                 i915_request_started(rq) ? "*" :
1549                 "",
1550                 rq_prio(rq));
1551
1552        return buf;
1553}
1554
1555static __maybe_unused void
1556trace_ports(const struct intel_engine_execlists *execlists,
1557            const char *msg,
1558            struct i915_request * const *ports)
1559{
1560        const struct intel_engine_cs *engine =
1561                container_of(execlists, typeof(*engine), execlists);
1562        char __maybe_unused p0[40], p1[40];
1563
1564        if (!ports[0])
1565                return;
1566
1567        ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1568                     dump_port(p0, sizeof(p0), "", ports[0]),
1569                     dump_port(p1, sizeof(p1), ", ", ports[1]));
1570}
1571
1572static inline bool
1573reset_in_progress(const struct intel_engine_execlists *execlists)
1574{
1575        return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1576}
1577
1578static __maybe_unused bool
1579assert_pending_valid(const struct intel_engine_execlists *execlists,
1580                     const char *msg)
1581{
1582        struct intel_engine_cs *engine =
1583                container_of(execlists, typeof(*engine), execlists);
1584        struct i915_request * const *port, *rq;
1585        struct intel_context *ce = NULL;
1586        bool sentinel = false;
1587        u32 ccid = -1;
1588
1589        trace_ports(execlists, msg, execlists->pending);
1590
1591        /* We may be messing around with the lists during reset, lalala */
1592        if (reset_in_progress(execlists))
1593                return true;
1594
1595        if (!execlists->pending[0]) {
1596                GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1597                              engine->name);
1598                return false;
1599        }
1600
1601        if (execlists->pending[execlists_num_ports(execlists)]) {
1602                GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1603                              engine->name, execlists_num_ports(execlists));
1604                return false;
1605        }
1606
1607        for (port = execlists->pending; (rq = *port); port++) {
1608                unsigned long flags;
1609                bool ok = true;
1610
1611                GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1612                GEM_BUG_ON(!i915_request_is_active(rq));
1613
1614                if (ce == rq->context) {
1615                        GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1616                                      engine->name,
1617                                      ce->timeline->fence_context,
1618                                      port - execlists->pending);
1619                        return false;
1620                }
1621                ce = rq->context;
1622
1623                if (ccid == ce->lrc.ccid) {
1624                        GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1625                                      engine->name,
1626                                      ccid, ce->timeline->fence_context,
1627                                      port - execlists->pending);
1628                        return false;
1629                }
1630                ccid = ce->lrc.ccid;
1631
1632                /*
1633                 * Sentinels are supposed to be the last request so they flush
1634                 * the current execution off the HW. Check that they are the only
1635                 * request in the pending submission.
1636                 */
1637                if (sentinel) {
1638                        GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1639                                      engine->name,
1640                                      ce->timeline->fence_context,
1641                                      port - execlists->pending);
1642                        return false;
1643                }
1644                sentinel = i915_request_has_sentinel(rq);
1645
1646                /* Hold tightly onto the lock to prevent concurrent retires! */
1647                if (!spin_trylock_irqsave(&rq->lock, flags))
1648                        continue;
1649
1650                if (i915_request_completed(rq))
1651                        goto unlock;
1652
1653                if (i915_active_is_idle(&ce->active) &&
1654                    !intel_context_is_barrier(ce)) {
1655                        GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1656                                      engine->name,
1657                                      ce->timeline->fence_context,
1658                                      port - execlists->pending);
1659                        ok = false;
1660                        goto unlock;
1661                }
1662
1663                if (!i915_vma_is_pinned(ce->state)) {
1664                        GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1665                                      engine->name,
1666                                      ce->timeline->fence_context,
1667                                      port - execlists->pending);
1668                        ok = false;
1669                        goto unlock;
1670                }
1671
1672                if (!i915_vma_is_pinned(ce->ring->vma)) {
1673                        GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1674                                      engine->name,
1675                                      ce->timeline->fence_context,
1676                                      port - execlists->pending);
1677                        ok = false;
1678                        goto unlock;
1679                }
1680
1681unlock:
1682                spin_unlock_irqrestore(&rq->lock, flags);
1683                if (!ok)
1684                        return false;
1685        }
1686
1687        return ce;
1688}
1689
1690static void execlists_submit_ports(struct intel_engine_cs *engine)
1691{
1692        struct intel_engine_execlists *execlists = &engine->execlists;
1693        unsigned int n;
1694
1695        GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1696
1697        /*
1698         * We can skip acquiring intel_runtime_pm_get() here as it was taken
1699         * on our behalf by the request (see i915_gem_mark_busy()) and it will
1700         * not be relinquished until the device is idle (see
1701         * i915_gem_idle_work_handler()). As a precaution, we make sure
1702         * that all ELSP are drained i.e. we have processed the CSB,
1703         * before allowing ourselves to idle and calling intel_runtime_pm_put().
1704         */
1705        GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1706
1707        /*
1708         * ELSQ note: the submit queue is not cleared after being submitted
1709         * to the HW so we need to make sure we always clean it up. This is
1710         * currently ensured by the fact that we always write the same number
1711         * of elsq entries, keep this in mind before changing the loop below.
1712         */
1713        for (n = execlists_num_ports(execlists); n--; ) {
1714                struct i915_request *rq = execlists->pending[n];
1715
1716                write_desc(execlists,
1717                           rq ? execlists_update_context(rq) : 0,
1718                           n);
1719        }
1720
1721        /* we need to manually load the submit queue */
1722        if (execlists->ctrl_reg)
1723                writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1724}
1725
1726static bool ctx_single_port_submission(const struct intel_context *ce)
1727{
1728        return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1729                intel_context_force_single_submission(ce));
1730}
1731
1732static bool can_merge_ctx(const struct intel_context *prev,
1733                          const struct intel_context *next)
1734{
1735        if (prev != next)
1736                return false;
1737
1738        if (ctx_single_port_submission(prev))
1739                return false;
1740
1741        return true;
1742}
1743
1744static unsigned long i915_request_flags(const struct i915_request *rq)
1745{
1746        return READ_ONCE(rq->fence.flags);
1747}
1748
1749static bool can_merge_rq(const struct i915_request *prev,
1750                         const struct i915_request *next)
1751{
1752        GEM_BUG_ON(prev == next);
1753        GEM_BUG_ON(!assert_priority_queue(prev, next));
1754
1755        /*
1756         * We do not submit known completed requests. Therefore if the next
1757         * request is already completed, we can pretend to merge it in
1758         * with the previous context (and we will skip updating the ELSP
1759         * and tracking). Thus hopefully keeping the ELSP full with active
1760         * contexts, despite the best efforts of preempt-to-busy to confuse
1761         * us.
1762         */
1763        if (i915_request_completed(next))
1764                return true;
1765
1766        if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1767                     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1768                      BIT(I915_FENCE_FLAG_SENTINEL))))
1769                return false;
1770
1771        if (!can_merge_ctx(prev->context, next->context))
1772                return false;
1773
1774        GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1775        return true;
1776}
1777
1778static void virtual_update_register_offsets(u32 *regs,
1779                                            struct intel_engine_cs *engine)
1780{
1781        set_offsets(regs, reg_offsets(engine), engine, false);
1782}
1783
1784static bool virtual_matches(const struct virtual_engine *ve,
1785                            const struct i915_request *rq,
1786                            const struct intel_engine_cs *engine)
1787{
1788        const struct intel_engine_cs *inflight;
1789
1790        if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1791                return false;
1792
1793        /*
1794         * We track when the HW has completed saving the context image
1795         * (i.e. when we have seen the final CS event switching out of
1796         * the context) and must not overwrite the context image before
1797         * then. This restricts us to only using the active engine
1798         * while the previous virtualized request is inflight (so
1799         * we reuse the register offsets). This is a very small
1800         * hystersis on the greedy seelction algorithm.
1801         */
1802        inflight = intel_context_inflight(&ve->context);
1803        if (inflight && inflight != engine)
1804                return false;
1805
1806        return true;
1807}
1808
1809static void virtual_xfer_context(struct virtual_engine *ve,
1810                                 struct intel_engine_cs *engine)
1811{
1812        unsigned int n;
1813
1814        if (likely(engine == ve->siblings[0]))
1815                return;
1816
1817        GEM_BUG_ON(READ_ONCE(ve->context.inflight));
1818        if (!intel_engine_has_relative_mmio(engine))
1819                virtual_update_register_offsets(ve->context.lrc_reg_state,
1820                                                engine);
1821
1822        /*
1823         * Move the bound engine to the top of the list for
1824         * future execution. We then kick this tasklet first
1825         * before checking others, so that we preferentially
1826         * reuse this set of bound registers.
1827         */
1828        for (n = 1; n < ve->num_siblings; n++) {
1829                if (ve->siblings[n] == engine) {
1830                        swap(ve->siblings[n], ve->siblings[0]);
1831                        break;
1832                }
1833        }
1834}
1835
1836#define for_each_waiter(p__, rq__) \
1837        list_for_each_entry_lockless(p__, \
1838                                     &(rq__)->sched.waiters_list, \
1839                                     wait_link)
1840
1841#define for_each_signaler(p__, rq__) \
1842        list_for_each_entry_rcu(p__, \
1843                                &(rq__)->sched.signalers_list, \
1844                                signal_link)
1845
1846static void defer_request(struct i915_request *rq, struct list_head * const pl)
1847{
1848        LIST_HEAD(list);
1849
1850        /*
1851         * We want to move the interrupted request to the back of
1852         * the round-robin list (i.e. its priority level), but
1853         * in doing so, we must then move all requests that were in
1854         * flight and were waiting for the interrupted request to
1855         * be run after it again.
1856         */
1857        do {
1858                struct i915_dependency *p;
1859
1860                GEM_BUG_ON(i915_request_is_active(rq));
1861                list_move_tail(&rq->sched.link, pl);
1862
1863                for_each_waiter(p, rq) {
1864                        struct i915_request *w =
1865                                container_of(p->waiter, typeof(*w), sched);
1866
1867                        if (p->flags & I915_DEPENDENCY_WEAK)
1868                                continue;
1869
1870                        /* Leave semaphores spinning on the other engines */
1871                        if (w->engine != rq->engine)
1872                                continue;
1873
1874                        /* No waiter should start before its signaler */
1875                        GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1876                                   i915_request_started(w) &&
1877                                   !i915_request_completed(rq));
1878
1879                        GEM_BUG_ON(i915_request_is_active(w));
1880                        if (!i915_request_is_ready(w))
1881                                continue;
1882
1883                        if (rq_prio(w) < rq_prio(rq))
1884                                continue;
1885
1886                        GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1887                        list_move_tail(&w->sched.link, &list);
1888                }
1889
1890                rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1891        } while (rq);
1892}
1893
1894static void defer_active(struct intel_engine_cs *engine)
1895{
1896        struct i915_request *rq;
1897
1898        rq = __unwind_incomplete_requests(engine);
1899        if (!rq)
1900                return;
1901
1902        defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1903}
1904
1905static bool
1906need_timeslice(const struct intel_engine_cs *engine,
1907               const struct i915_request *rq,
1908               const struct rb_node *rb)
1909{
1910        int hint;
1911
1912        if (!intel_engine_has_timeslices(engine))
1913                return false;
1914
1915        hint = engine->execlists.queue_priority_hint;
1916
1917        if (rb) {
1918                const struct virtual_engine *ve =
1919                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1920                const struct intel_engine_cs *inflight =
1921                        intel_context_inflight(&ve->context);
1922
1923                if (!inflight || inflight == engine) {
1924                        struct i915_request *next;
1925
1926                        rcu_read_lock();
1927                        next = READ_ONCE(ve->request);
1928                        if (next)
1929                                hint = max(hint, rq_prio(next));
1930                        rcu_read_unlock();
1931                }
1932        }
1933
1934        if (!list_is_last(&rq->sched.link, &engine->active.requests))
1935                hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1936
1937        GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1938        return hint >= effective_prio(rq);
1939}
1940
1941static bool
1942timeslice_yield(const struct intel_engine_execlists *el,
1943                const struct i915_request *rq)
1944{
1945        /*
1946         * Once bitten, forever smitten!
1947         *
1948         * If the active context ever busy-waited on a semaphore,
1949         * it will be treated as a hog until the end of its timeslice (i.e.
1950         * until it is scheduled out and replaced by a new submission,
1951         * possibly even its own lite-restore). The HW only sends an interrupt
1952         * on the first miss, and we do know if that semaphore has been
1953         * signaled, or even if it is now stuck on another semaphore. Play
1954         * safe, yield if it might be stuck -- it will be given a fresh
1955         * timeslice in the near future.
1956         */
1957        return rq->context->lrc.ccid == READ_ONCE(el->yield);
1958}
1959
1960static bool
1961timeslice_expired(const struct intel_engine_execlists *el,
1962                  const struct i915_request *rq)
1963{
1964        return timer_expired(&el->timer) || timeslice_yield(el, rq);
1965}
1966
1967static int
1968switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1969{
1970        if (list_is_last(&rq->sched.link, &engine->active.requests))
1971                return engine->execlists.queue_priority_hint;
1972
1973        return rq_prio(list_next_entry(rq, sched.link));
1974}
1975
1976static inline unsigned long
1977timeslice(const struct intel_engine_cs *engine)
1978{
1979        return READ_ONCE(engine->props.timeslice_duration_ms);
1980}
1981
1982static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1983{
1984        const struct intel_engine_execlists *execlists = &engine->execlists;
1985        const struct i915_request *rq = *execlists->active;
1986
1987        if (!rq || i915_request_completed(rq))
1988                return 0;
1989
1990        if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1991                return 0;
1992
1993        return timeslice(engine);
1994}
1995
1996static void set_timeslice(struct intel_engine_cs *engine)
1997{
1998        unsigned long duration;
1999
2000        if (!intel_engine_has_timeslices(engine))
2001                return;
2002
2003        duration = active_timeslice(engine);
2004        ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2005
2006        set_timer_ms(&engine->execlists.timer, duration);
2007}
2008
2009static void start_timeslice(struct intel_engine_cs *engine, int prio)
2010{
2011        struct intel_engine_execlists *execlists = &engine->execlists;
2012        unsigned long duration;
2013
2014        if (!intel_engine_has_timeslices(engine))
2015                return;
2016
2017        WRITE_ONCE(execlists->switch_priority_hint, prio);
2018        if (prio == INT_MIN)
2019                return;
2020
2021        if (timer_pending(&execlists->timer))
2022                return;
2023
2024        duration = timeslice(engine);
2025        ENGINE_TRACE(engine,
2026                     "start timeslicing, prio:%d, interval:%lu",
2027                     prio, duration);
2028
2029        set_timer_ms(&execlists->timer, duration);
2030}
2031
2032static void record_preemption(struct intel_engine_execlists *execlists)
2033{
2034        (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2035}
2036
2037static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2038                                            const struct i915_request *rq)
2039{
2040        if (!rq)
2041                return 0;
2042
2043        /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2044        if (unlikely(intel_context_is_banned(rq->context)))
2045                return 1;
2046
2047        return READ_ONCE(engine->props.preempt_timeout_ms);
2048}
2049
2050static void set_preempt_timeout(struct intel_engine_cs *engine,
2051                                const struct i915_request *rq)
2052{
2053        if (!intel_engine_has_preempt_reset(engine))
2054                return;
2055
2056        set_timer_ms(&engine->execlists.preempt,
2057                     active_preempt_timeout(engine, rq));
2058}
2059
2060static inline void clear_ports(struct i915_request **ports, int count)
2061{
2062        memset_p((void **)ports, NULL, count);
2063}
2064
2065static inline void
2066copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2067{
2068        /* A memcpy_p() would be very useful here! */
2069        while (count--)
2070                WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2071}
2072
2073static void execlists_dequeue(struct intel_engine_cs *engine)
2074{
2075        struct intel_engine_execlists * const execlists = &engine->execlists;
2076        struct i915_request **port = execlists->pending;
2077        struct i915_request ** const last_port = port + execlists->port_mask;
2078        struct i915_request * const *active;
2079        struct i915_request *last;
2080        struct rb_node *rb;
2081        bool submit = false;
2082
2083        /*
2084         * Hardware submission is through 2 ports. Conceptually each port
2085         * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2086         * static for a context, and unique to each, so we only execute
2087         * requests belonging to a single context from each ring. RING_HEAD
2088         * is maintained by the CS in the context image, it marks the place
2089         * where it got up to last time, and through RING_TAIL we tell the CS
2090         * where we want to execute up to this time.
2091         *
2092         * In this list the requests are in order of execution. Consecutive
2093         * requests from the same context are adjacent in the ringbuffer. We
2094         * can combine these requests into a single RING_TAIL update:
2095         *
2096         *              RING_HEAD...req1...req2
2097         *                                    ^- RING_TAIL
2098         * since to execute req2 the CS must first execute req1.
2099         *
2100         * Our goal then is to point each port to the end of a consecutive
2101         * sequence of requests as being the most optimal (fewest wake ups
2102         * and context switches) submission.
2103         */
2104
2105        for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2106                struct virtual_engine *ve =
2107                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2108                struct i915_request *rq = READ_ONCE(ve->request);
2109
2110                if (!rq) { /* lazily cleanup after another engine handled rq */
2111                        rb_erase_cached(rb, &execlists->virtual);
2112                        RB_CLEAR_NODE(rb);
2113                        rb = rb_first_cached(&execlists->virtual);
2114                        continue;
2115                }
2116
2117                if (!virtual_matches(ve, rq, engine)) {
2118                        rb = rb_next(rb);
2119                        continue;
2120                }
2121
2122                break;
2123        }
2124
2125        /*
2126         * If the queue is higher priority than the last
2127         * request in the currently active context, submit afresh.
2128         * We will resubmit again afterwards in case we need to split
2129         * the active context to interject the preemption request,
2130         * i.e. we will retrigger preemption following the ack in case
2131         * of trouble.
2132         */
2133        active = READ_ONCE(execlists->active);
2134
2135        /*
2136         * In theory we can skip over completed contexts that have not
2137         * yet been processed by events (as those events are in flight):
2138         *
2139         * while ((last = *active) && i915_request_completed(last))
2140         *      active++;
2141         *
2142         * However, the GPU cannot handle this as it will ultimately
2143         * find itself trying to jump back into a context it has just
2144         * completed and barf.
2145         */
2146
2147        if ((last = *active)) {
2148                if (need_preempt(engine, last, rb)) {
2149                        if (i915_request_completed(last)) {
2150                                tasklet_hi_schedule(&execlists->tasklet);
2151                                return;
2152                        }
2153
2154                        ENGINE_TRACE(engine,
2155                                     "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2156                                     last->fence.context,
2157                                     last->fence.seqno,
2158                                     last->sched.attr.priority,
2159                                     execlists->queue_priority_hint);
2160                        record_preemption(execlists);
2161
2162                        /*
2163                         * Don't let the RING_HEAD advance past the breadcrumb
2164                         * as we unwind (and until we resubmit) so that we do
2165                         * not accidentally tell it to go backwards.
2166                         */
2167                        ring_set_paused(engine, 1);
2168
2169                        /*
2170                         * Note that we have not stopped the GPU at this point,
2171                         * so we are unwinding the incomplete requests as they
2172                         * remain inflight and so by the time we do complete
2173                         * the preemption, some of the unwound requests may
2174                         * complete!
2175                         */
2176                        __unwind_incomplete_requests(engine);
2177
2178                        last = NULL;
2179                } else if (need_timeslice(engine, last, rb) &&
2180                           timeslice_expired(execlists, last)) {
2181                        if (i915_request_completed(last)) {
2182                                tasklet_hi_schedule(&execlists->tasklet);
2183                                return;
2184                        }
2185
2186                        ENGINE_TRACE(engine,
2187                                     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2188                                     last->fence.context,
2189                                     last->fence.seqno,
2190                                     last->sched.attr.priority,
2191                                     execlists->queue_priority_hint,
2192                                     yesno(timeslice_yield(execlists, last)));
2193
2194                        ring_set_paused(engine, 1);
2195                        defer_active(engine);
2196
2197                        /*
2198                         * Unlike for preemption, if we rewind and continue
2199                         * executing the same context as previously active,
2200                         * the order of execution will remain the same and
2201                         * the tail will only advance. We do not need to
2202                         * force a full context restore, as a lite-restore
2203                         * is sufficient to resample the monotonic TAIL.
2204                         *
2205                         * If we switch to any other context, similarly we
2206                         * will not rewind TAIL of current context, and
2207                         * normal save/restore will preserve state and allow
2208                         * us to later continue executing the same request.
2209                         */
2210                        last = NULL;
2211                } else {
2212                        /*
2213                         * Otherwise if we already have a request pending
2214                         * for execution after the current one, we can
2215                         * just wait until the next CS event before
2216                         * queuing more. In either case we will force a
2217                         * lite-restore preemption event, but if we wait
2218                         * we hopefully coalesce several updates into a single
2219                         * submission.
2220                         */
2221                        if (!list_is_last(&last->sched.link,
2222                                          &engine->active.requests)) {
2223                                /*
2224                                 * Even if ELSP[1] is occupied and not worthy
2225                                 * of timeslices, our queue might be.
2226                                 */
2227                                start_timeslice(engine, queue_prio(execlists));
2228                                return;
2229                        }
2230                }
2231        }
2232
2233        while (rb) { /* XXX virtual is always taking precedence */
2234                struct virtual_engine *ve =
2235                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2236                struct i915_request *rq;
2237
2238                spin_lock(&ve->base.active.lock);
2239
2240                rq = ve->request;
2241                if (unlikely(!rq)) { /* lost the race to a sibling */
2242                        spin_unlock(&ve->base.active.lock);
2243                        rb_erase_cached(rb, &execlists->virtual);
2244                        RB_CLEAR_NODE(rb);
2245                        rb = rb_first_cached(&execlists->virtual);
2246                        continue;
2247                }
2248
2249                GEM_BUG_ON(rq != ve->request);
2250                GEM_BUG_ON(rq->engine != &ve->base);
2251                GEM_BUG_ON(rq->context != &ve->context);
2252
2253                if (rq_prio(rq) >= queue_prio(execlists)) {
2254                        if (!virtual_matches(ve, rq, engine)) {
2255                                spin_unlock(&ve->base.active.lock);
2256                                rb = rb_next(rb);
2257                                continue;
2258                        }
2259
2260                        if (last && !can_merge_rq(last, rq)) {
2261                                spin_unlock(&ve->base.active.lock);
2262                                start_timeslice(engine, rq_prio(rq));
2263                                return; /* leave this for another sibling */
2264                        }
2265
2266                        ENGINE_TRACE(engine,
2267                                     "virtual rq=%llx:%lld%s, new engine? %s\n",
2268                                     rq->fence.context,
2269                                     rq->fence.seqno,
2270                                     i915_request_completed(rq) ? "!" :
2271                                     i915_request_started(rq) ? "*" :
2272                                     "",
2273                                     yesno(engine != ve->siblings[0]));
2274
2275                        WRITE_ONCE(ve->request, NULL);
2276                        WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2277                                   INT_MIN);
2278                        rb_erase_cached(rb, &execlists->virtual);
2279                        RB_CLEAR_NODE(rb);
2280
2281                        GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2282                        WRITE_ONCE(rq->engine, engine);
2283
2284                        if (__i915_request_submit(rq)) {
2285                                /*
2286                                 * Only after we confirm that we will submit
2287                                 * this request (i.e. it has not already
2288                                 * completed), do we want to update the context.
2289                                 *
2290                                 * This serves two purposes. It avoids
2291                                 * unnecessary work if we are resubmitting an
2292                                 * already completed request after timeslicing.
2293                                 * But more importantly, it prevents us altering
2294                                 * ve->siblings[] on an idle context, where
2295                                 * we may be using ve->siblings[] in
2296                                 * virtual_context_enter / virtual_context_exit.
2297                                 */
2298                                virtual_xfer_context(ve, engine);
2299                                GEM_BUG_ON(ve->siblings[0] != engine);
2300
2301                                submit = true;
2302                                last = rq;
2303                        }
2304                        i915_request_put(rq);
2305
2306                        /*
2307                         * Hmm, we have a bunch of virtual engine requests,
2308                         * but the first one was already completed (thanks
2309                         * preempt-to-busy!). Keep looking at the veng queue
2310                         * until we have no more relevant requests (i.e.
2311                         * the normal submit queue has higher priority).
2312                         */
2313                        if (!submit) {
2314                                spin_unlock(&ve->base.active.lock);
2315                                rb = rb_first_cached(&execlists->virtual);
2316                                continue;
2317                        }
2318                }
2319
2320                spin_unlock(&ve->base.active.lock);
2321                break;
2322        }
2323
2324        while ((rb = rb_first_cached(&execlists->queue))) {
2325                struct i915_priolist *p = to_priolist(rb);
2326                struct i915_request *rq, *rn;
2327                int i;
2328
2329                priolist_for_each_request_consume(rq, rn, p, i) {
2330                        bool merge = true;
2331
2332                        /*
2333                         * Can we combine this request with the current port?
2334                         * It has to be the same context/ringbuffer and not
2335                         * have any exceptions (e.g. GVT saying never to
2336                         * combine contexts).
2337                         *
2338                         * If we can combine the requests, we can execute both
2339                         * by updating the RING_TAIL to point to the end of the
2340                         * second request, and so we never need to tell the
2341                         * hardware about the first.
2342                         */
2343                        if (last && !can_merge_rq(last, rq)) {
2344                                /*
2345                                 * If we are on the second port and cannot
2346                                 * combine this request with the last, then we
2347                                 * are done.
2348                                 */
2349                                if (port == last_port)
2350                                        goto done;
2351
2352                                /*
2353                                 * We must not populate both ELSP[] with the
2354                                 * same LRCA, i.e. we must submit 2 different
2355                                 * contexts if we submit 2 ELSP.
2356                                 */
2357                                if (last->context == rq->context)
2358                                        goto done;
2359
2360                                if (i915_request_has_sentinel(last))
2361                                        goto done;
2362
2363                                /*
2364                                 * If GVT overrides us we only ever submit
2365                                 * port[0], leaving port[1] empty. Note that we
2366                                 * also have to be careful that we don't queue
2367                                 * the same context (even though a different
2368                                 * request) to the second port.
2369                                 */
2370                                if (ctx_single_port_submission(last->context) ||
2371                                    ctx_single_port_submission(rq->context))
2372                                        goto done;
2373
2374                                merge = false;
2375                        }
2376
2377                        if (__i915_request_submit(rq)) {
2378                                if (!merge) {
2379                                        *port = execlists_schedule_in(last, port - execlists->pending);
2380                                        port++;
2381                                        last = NULL;
2382                                }
2383
2384                                GEM_BUG_ON(last &&
2385                                           !can_merge_ctx(last->context,
2386                                                          rq->context));
2387                                GEM_BUG_ON(last &&
2388                                           i915_seqno_passed(last->fence.seqno,
2389                                                             rq->fence.seqno));
2390
2391                                submit = true;
2392                                last = rq;
2393                        }
2394                }
2395
2396                rb_erase_cached(&p->node, &execlists->queue);
2397                i915_priolist_free(p);
2398        }
2399
2400done:
2401        /*
2402         * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2403         *
2404         * We choose the priority hint such that if we add a request of greater
2405         * priority than this, we kick the submission tasklet to decide on
2406         * the right order of submitting the requests to hardware. We must
2407         * also be prepared to reorder requests as they are in-flight on the
2408         * HW. We derive the priority hint then as the first "hole" in
2409         * the HW submission ports and if there are no available slots,
2410         * the priority of the lowest executing request, i.e. last.
2411         *
2412         * When we do receive a higher priority request ready to run from the
2413         * user, see queue_request(), the priority hint is bumped to that
2414         * request triggering preemption on the next dequeue (or subsequent
2415         * interrupt for secondary ports).
2416         */
2417        execlists->queue_priority_hint = queue_prio(execlists);
2418
2419        if (submit) {
2420                *port = execlists_schedule_in(last, port - execlists->pending);
2421                execlists->switch_priority_hint =
2422                        switch_prio(engine, *execlists->pending);
2423
2424                /*
2425                 * Skip if we ended up with exactly the same set of requests,
2426                 * e.g. trying to timeslice a pair of ordered contexts
2427                 */
2428                if (!memcmp(active, execlists->pending,
2429                            (port - execlists->pending + 1) * sizeof(*port))) {
2430                        do
2431                                execlists_schedule_out(fetch_and_zero(port));
2432                        while (port-- != execlists->pending);
2433
2434                        goto skip_submit;
2435                }
2436                clear_ports(port + 1, last_port - port);
2437
2438                WRITE_ONCE(execlists->yield, -1);
2439                set_preempt_timeout(engine, *active);
2440                execlists_submit_ports(engine);
2441        } else {
2442                start_timeslice(engine, execlists->queue_priority_hint);
2443skip_submit:
2444                ring_set_paused(engine, 0);
2445        }
2446}
2447
2448static void
2449cancel_port_requests(struct intel_engine_execlists * const execlists)
2450{
2451        struct i915_request * const *port;
2452
2453        for (port = execlists->pending; *port; port++)
2454                execlists_schedule_out(*port);
2455        clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2456
2457        /* Mark the end of active before we overwrite *active */
2458        for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2459                execlists_schedule_out(*port);
2460        clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2461
2462        smp_wmb(); /* complete the seqlock for execlists_active() */
2463        WRITE_ONCE(execlists->active, execlists->inflight);
2464}
2465
2466static inline void
2467invalidate_csb_entries(const u64 *first, const u64 *last)
2468{
2469        clflush((void *)first);
2470        clflush((void *)last);
2471}
2472
2473/*
2474 * Starting with Gen12, the status has a new format:
2475 *
2476 *     bit  0:     switched to new queue
2477 *     bit  1:     reserved
2478 *     bit  2:     semaphore wait mode (poll or signal), only valid when
2479 *                 switch detail is set to "wait on semaphore"
2480 *     bits 3-5:   engine class
2481 *     bits 6-11:  engine instance
2482 *     bits 12-14: reserved
2483 *     bits 15-25: sw context id of the lrc the GT switched to
2484 *     bits 26-31: sw counter of the lrc the GT switched to
2485 *     bits 32-35: context switch detail
2486 *                  - 0: ctx complete
2487 *                  - 1: wait on sync flip
2488 *                  - 2: wait on vblank
2489 *                  - 3: wait on scanline
2490 *                  - 4: wait on semaphore
2491 *                  - 5: context preempted (not on SEMAPHORE_WAIT or
2492 *                       WAIT_FOR_EVENT)
2493 *     bit  36:    reserved
2494 *     bits 37-43: wait detail (for switch detail 1 to 4)
2495 *     bits 44-46: reserved
2496 *     bits 47-57: sw context id of the lrc the GT switched away from
2497 *     bits 58-63: sw counter of the lrc the GT switched away from
2498 */
2499static inline bool gen12_csb_parse(const u64 *csb)
2500{
2501        bool ctx_away_valid;
2502        bool new_queue;
2503        u64 entry;
2504
2505        /* HSD#22011248461 */
2506        entry = READ_ONCE(*csb);
2507        if (unlikely(entry == -1)) {
2508                preempt_disable();
2509                if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
2510                        GEM_WARN_ON("50us CSB timeout");
2511                preempt_enable();
2512        }
2513        WRITE_ONCE(*(u64 *)csb, -1);
2514
2515        ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
2516        new_queue =
2517                lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2518
2519        /*
2520         * The context switch detail is not guaranteed to be 5 when a preemption
2521         * occurs, so we can't just check for that. The check below works for
2522         * all the cases we care about, including preemptions of WAIT
2523         * instructions and lite-restore. Preempt-to-idle via the CTRL register
2524         * would require some extra handling, but we don't support that.
2525         */
2526        if (!ctx_away_valid || new_queue) {
2527                GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
2528                return true;
2529        }
2530
2531        /*
2532         * switch detail = 5 is covered by the case above and we do not expect a
2533         * context switch on an unsuccessful wait instruction since we always
2534         * use polling mode.
2535         */
2536        GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
2537        return false;
2538}
2539
2540static inline bool gen8_csb_parse(const u64 *csb)
2541{
2542        return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2543}
2544
2545static void process_csb(struct intel_engine_cs *engine)
2546{
2547        struct intel_engine_execlists * const execlists = &engine->execlists;
2548        const u64 * const buf = execlists->csb_status;
2549        const u8 num_entries = execlists->csb_size;
2550        u8 head, tail;
2551
2552        /*
2553         * As we modify our execlists state tracking we require exclusive
2554         * access. Either we are inside the tasklet, or the tasklet is disabled
2555         * and we assume that is only inside the reset paths and so serialised.
2556         */
2557        GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2558                   !reset_in_progress(execlists));
2559        GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2560
2561        /*
2562         * Note that csb_write, csb_status may be either in HWSP or mmio.
2563         * When reading from the csb_write mmio register, we have to be
2564         * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2565         * the low 4bits. As it happens we know the next 4bits are always
2566         * zero and so we can simply masked off the low u8 of the register
2567         * and treat it identically to reading from the HWSP (without having
2568         * to use explicit shifting and masking, and probably bifurcating
2569         * the code to handle the legacy mmio read).
2570         */
2571        head = execlists->csb_head;
2572        tail = READ_ONCE(*execlists->csb_write);
2573        if (unlikely(head == tail))
2574                return;
2575
2576        /*
2577         * We will consume all events from HW, or at least pretend to.
2578         *
2579         * The sequence of events from the HW is deterministic, and derived
2580         * from our writes to the ELSP, with a smidgen of variability for
2581         * the arrival of the asynchronous requests wrt to the inflight
2582         * execution. If the HW sends an event that does not correspond with
2583         * the one we are expecting, we have to abandon all hope as we lose
2584         * all tracking of what the engine is actually executing. We will
2585         * only detect we are out of sequence with the HW when we get an
2586         * 'impossible' event because we have already drained our own
2587         * preemption/promotion queue. If this occurs, we know that we likely
2588         * lost track of execution earlier and must unwind and restart, the
2589         * simplest way is by stop processing the event queue and force the
2590         * engine to reset.
2591         */
2592        execlists->csb_head = tail;
2593        ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2594
2595        /*
2596         * Hopefully paired with a wmb() in HW!
2597         *
2598         * We must complete the read of the write pointer before any reads
2599         * from the CSB, so that we do not see stale values. Without an rmb
2600         * (lfence) the HW may speculatively perform the CSB[] reads *before*
2601         * we perform the READ_ONCE(*csb_write).
2602         */
2603        rmb();
2604        do {
2605                bool promote;
2606
2607                if (++head == num_entries)
2608                        head = 0;
2609
2610                /*
2611                 * We are flying near dragons again.
2612                 *
2613                 * We hold a reference to the request in execlist_port[]
2614                 * but no more than that. We are operating in softirq
2615                 * context and so cannot hold any mutex or sleep. That
2616                 * prevents us stopping the requests we are processing
2617                 * in port[] from being retired simultaneously (the
2618                 * breadcrumb will be complete before we see the
2619                 * context-switch). As we only hold the reference to the
2620                 * request, any pointer chasing underneath the request
2621                 * is subject to a potential use-after-free. Thus we
2622                 * store all of the bookkeeping within port[] as
2623                 * required, and avoid using unguarded pointers beneath
2624                 * request itself. The same applies to the atomic
2625                 * status notifier.
2626                 */
2627
2628                ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2629                             head,
2630                             upper_32_bits(buf[head]),
2631                             lower_32_bits(buf[head]));
2632
2633                if (INTEL_GEN(engine->i915) >= 12)
2634                        promote = gen12_csb_parse(buf + head);
2635                else
2636                        promote = gen8_csb_parse(buf + head);
2637                if (promote) {
2638                        struct i915_request * const *old = execlists->active;
2639
2640                        if (GEM_WARN_ON(!*execlists->pending)) {
2641                                execlists->error_interrupt |= ERROR_CSB;
2642                                break;
2643                        }
2644
2645                        ring_set_paused(engine, 0);
2646
2647                        /* Point active to the new ELSP; prevent overwriting */
2648                        WRITE_ONCE(execlists->active, execlists->pending);
2649                        smp_wmb(); /* notify execlists_active() */
2650
2651                        /* cancel old inflight, prepare for switch */
2652                        trace_ports(execlists, "preempted", old);
2653                        while (*old)
2654                                execlists_schedule_out(*old++);
2655
2656                        /* switch pending to inflight */
2657                        GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2658                        copy_ports(execlists->inflight,
2659                                   execlists->pending,
2660                                   execlists_num_ports(execlists));
2661                        smp_wmb(); /* complete the seqlock */
2662                        WRITE_ONCE(execlists->active, execlists->inflight);
2663
2664                        /* XXX Magic delay for tgl */
2665                        ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
2666
2667                        WRITE_ONCE(execlists->pending[0], NULL);
2668                } else {
2669                        if (GEM_WARN_ON(!*execlists->active)) {
2670                                execlists->error_interrupt |= ERROR_CSB;
2671                                break;
2672                        }
2673
2674                        /* port0 completed, advanced to port1 */
2675                        trace_ports(execlists, "completed", execlists->active);
2676
2677                        /*
2678                         * We rely on the hardware being strongly
2679                         * ordered, that the breadcrumb write is
2680                         * coherent (visible from the CPU) before the
2681                         * user interrupt is processed. One might assume
2682                         * that the breadcrumb write being before the
2683                         * user interrupt and the CS event for the context
2684                         * switch would therefore be before the CS event
2685                         * itself...
2686                         */
2687                        if (GEM_SHOW_DEBUG() &&
2688                            !i915_request_completed(*execlists->active)) {
2689                                struct i915_request *rq = *execlists->active;
2690                                const u32 *regs __maybe_unused =
2691                                        rq->context->lrc_reg_state;
2692
2693                                ENGINE_TRACE(engine,
2694                                             "context completed before request!\n");
2695                                ENGINE_TRACE(engine,
2696                                             "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2697                                             ENGINE_READ(engine, RING_START),
2698                                             ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2699                                             ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2700                                             ENGINE_READ(engine, RING_CTL),
2701                                             ENGINE_READ(engine, RING_MI_MODE));
2702                                ENGINE_TRACE(engine,
2703                                             "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2704                                             i915_ggtt_offset(rq->ring->vma),
2705                                             rq->head, rq->tail,
2706                                             rq->fence.context,
2707                                             lower_32_bits(rq->fence.seqno),
2708                                             hwsp_seqno(rq));
2709                                ENGINE_TRACE(engine,
2710                                             "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2711                                             regs[CTX_RING_START],
2712                                             regs[CTX_RING_HEAD],
2713                                             regs[CTX_RING_TAIL]);
2714                        }
2715
2716                        execlists_schedule_out(*execlists->active++);
2717
2718                        GEM_BUG_ON(execlists->active - execlists->inflight >
2719                                   execlists_num_ports(execlists));
2720                }
2721        } while (head != tail);
2722
2723        set_timeslice(engine);
2724
2725        /*
2726         * Gen11 has proven to fail wrt global observation point between
2727         * entry and tail update, failing on the ordering and thus
2728         * we see an old entry in the context status buffer.
2729         *
2730         * Forcibly evict out entries for the next gpu csb update,
2731         * to increase the odds that we get a fresh entries with non
2732         * working hardware. The cost for doing so comes out mostly with
2733         * the wash as hardware, working or not, will need to do the
2734         * invalidation before.
2735         */
2736        invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2737}
2738
2739static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2740{
2741        lockdep_assert_held(&engine->active.lock);
2742        if (!READ_ONCE(engine->execlists.pending[0])) {
2743                rcu_read_lock(); /* protect peeking at execlists->active */
2744                execlists_dequeue(engine);
2745                rcu_read_unlock();
2746        }
2747}
2748
2749static void __execlists_hold(struct i915_request *rq)
2750{
2751        LIST_HEAD(list);
2752
2753        do {
2754                struct i915_dependency *p;
2755
2756                if (i915_request_is_active(rq))
2757                        __i915_request_unsubmit(rq);
2758
2759                clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2760                list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2761                i915_request_set_hold(rq);
2762                RQ_TRACE(rq, "on hold\n");
2763
2764                for_each_waiter(p, rq) {
2765                        struct i915_request *w =
2766                                container_of(p->waiter, typeof(*w), sched);
2767
2768                        /* Leave semaphores spinning on the other engines */
2769                        if (w->engine != rq->engine)
2770                                continue;
2771
2772                        if (!i915_request_is_ready(w))
2773                                continue;
2774
2775                        if (i915_request_completed(w))
2776                                continue;
2777
2778                        if (i915_request_on_hold(w))
2779                                continue;
2780
2781                        list_move_tail(&w->sched.link, &list);
2782                }
2783
2784                rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2785        } while (rq);
2786}
2787
2788static bool execlists_hold(struct intel_engine_cs *engine,
2789                           struct i915_request *rq)
2790{
2791        if (i915_request_on_hold(rq))
2792                return false;
2793
2794        spin_lock_irq(&engine->active.lock);
2795
2796        if (i915_request_completed(rq)) { /* too late! */
2797                rq = NULL;
2798                goto unlock;
2799        }
2800
2801        if (rq->engine != engine) { /* preempted virtual engine */
2802                struct virtual_engine *ve = to_virtual_engine(rq->engine);
2803
2804                /*
2805                 * intel_context_inflight() is only protected by virtue
2806                 * of process_csb() being called only by the tasklet (or
2807                 * directly from inside reset while the tasklet is suspended).
2808                 * Assert that neither of those are allowed to run while we
2809                 * poke at the request queues.
2810                 */
2811                GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2812
2813                /*
2814                 * An unsubmitted request along a virtual engine will
2815                 * remain on the active (this) engine until we are able
2816                 * to process the context switch away (and so mark the
2817                 * context as no longer in flight). That cannot have happened
2818                 * yet, otherwise we would not be hanging!
2819                 */
2820                spin_lock(&ve->base.active.lock);
2821                GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2822                GEM_BUG_ON(ve->request != rq);
2823                ve->request = NULL;
2824                spin_unlock(&ve->base.active.lock);
2825                i915_request_put(rq);
2826
2827                rq->engine = engine;
2828        }
2829
2830        /*
2831         * Transfer this request onto the hold queue to prevent it
2832         * being resumbitted to HW (and potentially completed) before we have
2833         * released it. Since we may have already submitted following
2834         * requests, we need to remove those as well.
2835         */
2836        GEM_BUG_ON(i915_request_on_hold(rq));
2837        GEM_BUG_ON(rq->engine != engine);
2838        __execlists_hold(rq);
2839        GEM_BUG_ON(list_empty(&engine->active.hold));
2840
2841unlock:
2842        spin_unlock_irq(&engine->active.lock);
2843        return rq;
2844}
2845
2846static bool hold_request(const struct i915_request *rq)
2847{
2848        struct i915_dependency *p;
2849        bool result = false;
2850
2851        /*
2852         * If one of our ancestors is on hold, we must also be on hold,
2853         * otherwise we will bypass it and execute before it.
2854         */
2855        rcu_read_lock();
2856        for_each_signaler(p, rq) {
2857                const struct i915_request *s =
2858                        container_of(p->signaler, typeof(*s), sched);
2859
2860                if (s->engine != rq->engine)
2861                        continue;
2862
2863                result = i915_request_on_hold(s);
2864                if (result)
2865                        break;
2866        }
2867        rcu_read_unlock();
2868
2869        return result;
2870}
2871
2872static void __execlists_unhold(struct i915_request *rq)
2873{
2874        LIST_HEAD(list);
2875
2876        do {
2877                struct i915_dependency *p;
2878
2879                RQ_TRACE(rq, "hold release\n");
2880
2881                GEM_BUG_ON(!i915_request_on_hold(rq));
2882                GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2883
2884                i915_request_clear_hold(rq);
2885                list_move_tail(&rq->sched.link,
2886                               i915_sched_lookup_priolist(rq->engine,
2887                                                          rq_prio(rq)));
2888                set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2889
2890                /* Also release any children on this engine that are ready */
2891                for_each_waiter(p, rq) {
2892                        struct i915_request *w =
2893                                container_of(p->waiter, typeof(*w), sched);
2894
2895                        /* Propagate any change in error status */
2896                        if (rq->fence.error)
2897                                i915_request_set_error_once(w, rq->fence.error);
2898
2899                        if (w->engine != rq->engine)
2900                                continue;
2901
2902                        if (!i915_request_on_hold(w))
2903                                continue;
2904
2905                        /* Check that no other parents are also on hold */
2906                        if (hold_request(w))
2907                                continue;
2908
2909                        list_move_tail(&w->sched.link, &list);
2910                }
2911
2912                rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2913        } while (rq);
2914}
2915
2916static void execlists_unhold(struct intel_engine_cs *engine,
2917                             struct i915_request *rq)
2918{
2919        spin_lock_irq(&engine->active.lock);
2920
2921        /*
2922         * Move this request back to the priority queue, and all of its
2923         * children and grandchildren that were suspended along with it.
2924         */
2925        __execlists_unhold(rq);
2926
2927        if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2928                engine->execlists.queue_priority_hint = rq_prio(rq);
2929                tasklet_hi_schedule(&engine->execlists.tasklet);
2930        }
2931
2932        spin_unlock_irq(&engine->active.lock);
2933}
2934
2935struct execlists_capture {
2936        struct work_struct work;
2937        struct i915_request *rq;
2938        struct i915_gpu_coredump *error;
2939};
2940
2941static void execlists_capture_work(struct work_struct *work)
2942{
2943        struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2944        const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2945        struct intel_engine_cs *engine = cap->rq->engine;
2946        struct intel_gt_coredump *gt = cap->error->gt;
2947        struct intel_engine_capture_vma *vma;
2948
2949        /* Compress all the objects attached to the request, slow! */
2950        vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2951        if (vma) {
2952                struct i915_vma_compress *compress =
2953                        i915_vma_capture_prepare(gt);
2954
2955                intel_engine_coredump_add_vma(gt->engine, vma, compress);
2956                i915_vma_capture_finish(gt, compress);
2957        }
2958
2959        gt->simulated = gt->engine->simulated;
2960        cap->error->simulated = gt->simulated;
2961
2962        /* Publish the error state, and announce it to the world */
2963        i915_error_state_store(cap->error);
2964        i915_gpu_coredump_put(cap->error);
2965
2966        /* Return this request and all that depend upon it for signaling */
2967        execlists_unhold(engine, cap->rq);
2968        i915_request_put(cap->rq);
2969
2970        kfree(cap);
2971}
2972
2973static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2974{
2975        const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2976        struct execlists_capture *cap;
2977
2978        cap = kmalloc(sizeof(*cap), gfp);
2979        if (!cap)
2980                return NULL;
2981
2982        cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2983        if (!cap->error)
2984                goto err_cap;
2985
2986        cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2987        if (!cap->error->gt)
2988                goto err_gpu;
2989
2990        cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2991        if (!cap->error->gt->engine)
2992                goto err_gt;
2993
2994        return cap;
2995
2996err_gt:
2997        kfree(cap->error->gt);
2998err_gpu:
2999        kfree(cap->error);
3000err_cap:
3001        kfree(cap);
3002        return NULL;
3003}
3004
3005static struct i915_request *
3006active_context(struct intel_engine_cs *engine, u32 ccid)
3007{
3008        const struct intel_engine_execlists * const el = &engine->execlists;
3009        struct i915_request * const *port, *rq;
3010
3011        /*
3012         * Use the most recent result from process_csb(), but just in case
3013         * we trigger an error (via interrupt) before the first CS event has
3014         * been written, peek at the next submission.
3015         */
3016
3017        for (port = el->active; (rq = *port); port++) {
3018                if (rq->context->lrc.ccid == ccid) {
3019                        ENGINE_TRACE(engine,
3020                                     "ccid found at active:%zd\n",
3021                                     port - el->active);
3022                        return rq;
3023                }
3024        }
3025
3026        for (port = el->pending; (rq = *port); port++) {
3027                if (rq->context->lrc.ccid == ccid) {
3028                        ENGINE_TRACE(engine,
3029                                     "ccid found at pending:%zd\n",
3030                                     port - el->pending);
3031                        return rq;
3032                }
3033        }
3034
3035        ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3036        return NULL;
3037}
3038
3039static u32 active_ccid(struct intel_engine_cs *engine)
3040{
3041        return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3042}
3043
3044static void execlists_capture(struct intel_engine_cs *engine)
3045{
3046        struct execlists_capture *cap;
3047
3048        if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3049                return;
3050
3051        /*
3052         * We need to _quickly_ capture the engine state before we reset.
3053         * We are inside an atomic section (softirq) here and we are delaying
3054         * the forced preemption event.
3055         */
3056        cap = capture_regs(engine);
3057        if (!cap)
3058                return;
3059
3060        spin_lock_irq(&engine->active.lock);
3061        cap->rq = active_context(engine, active_ccid(engine));
3062        if (cap->rq) {
3063                cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3064                cap->rq = i915_request_get_rcu(cap->rq);
3065        }
3066        spin_unlock_irq(&engine->active.lock);
3067        if (!cap->rq)
3068                goto err_free;
3069
3070        /*
3071         * Remove the request from the execlists queue, and take ownership
3072         * of the request. We pass it to our worker who will _slowly_ compress
3073         * all the pages the _user_ requested for debugging their batch, after
3074         * which we return it to the queue for signaling.
3075         *
3076         * By removing them from the execlists queue, we also remove the
3077         * requests from being processed by __unwind_incomplete_requests()
3078         * during the intel_engine_reset(), and so they will *not* be replayed
3079         * afterwards.
3080         *
3081         * Note that because we have not yet reset the engine at this point,
3082         * it is possible for the request that we have identified as being
3083         * guilty, did in fact complete and we will then hit an arbitration
3084         * point allowing the outstanding preemption to succeed. The likelihood
3085         * of that is very low (as capturing of the engine registers should be
3086         * fast enough to run inside an irq-off atomic section!), so we will
3087         * simply hold that request accountable for being non-preemptible
3088         * long enough to force the reset.
3089         */
3090        if (!execlists_hold(engine, cap->rq))
3091                goto err_rq;
3092
3093        INIT_WORK(&cap->work, execlists_capture_work);
3094        schedule_work(&cap->work);
3095        return;
3096
3097err_rq:
3098        i915_request_put(cap->rq);
3099err_free:
3100        i915_gpu_coredump_put(cap->error);
3101        kfree(cap);
3102}
3103
3104static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3105{
3106        const unsigned int bit = I915_RESET_ENGINE + engine->id;
3107        unsigned long *lock = &engine->gt->reset.flags;
3108
3109        if (!intel_has_reset_engine(engine->gt))
3110                return;
3111
3112        if (test_and_set_bit(bit, lock))
3113                return;
3114
3115        ENGINE_TRACE(engine, "reset for %s\n", msg);
3116
3117        /* Mark this tasklet as disabled to avoid waiting for it to complete */
3118        tasklet_disable_nosync(&engine->execlists.tasklet);
3119
3120        ring_set_paused(engine, 1); /* Freeze the current request in place */
3121        execlists_capture(engine);
3122        intel_engine_reset(engine, msg);
3123
3124        tasklet_enable(&engine->execlists.tasklet);
3125        clear_and_wake_up_bit(bit, lock);
3126}
3127
3128static bool preempt_timeout(const struct intel_engine_cs *const engine)
3129{
3130        const struct timer_list *t = &engine->execlists.preempt;
3131
3132        if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3133                return false;
3134
3135        if (!timer_expired(t))
3136                return false;
3137
3138        return READ_ONCE(engine->execlists.pending[0]);
3139}
3140
3141/*
3142 * Check the unread Context Status Buffers and manage the submission of new
3143 * contexts to the ELSP accordingly.
3144 */
3145static void execlists_submission_tasklet(unsigned long data)
3146{
3147        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3148        bool timeout = preempt_timeout(engine);
3149
3150        process_csb(engine);
3151
3152        if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3153                const char *msg;
3154
3155                /* Generate the error message in priority wrt to the user! */
3156                if (engine->execlists.error_interrupt & GENMASK(15, 0))
3157                        msg = "CS error"; /* thrown by a user payload */
3158                else if (engine->execlists.error_interrupt & ERROR_CSB)
3159                        msg = "invalid CSB event";
3160                else
3161                        msg = "internal error";
3162
3163                engine->execlists.error_interrupt = 0;
3164                execlists_reset(engine, msg);
3165        }
3166
3167        if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3168                unsigned long flags;
3169
3170                spin_lock_irqsave(&engine->active.lock, flags);
3171                __execlists_submission_tasklet(engine);
3172                spin_unlock_irqrestore(&engine->active.lock, flags);
3173
3174                /* Recheck after serialising with direct-submission */
3175                if (unlikely(timeout && preempt_timeout(engine))) {
3176                        cancel_timer(&engine->execlists.preempt);
3177                        execlists_reset(engine, "preemption time out");
3178                }
3179        }
3180}
3181
3182static void __execlists_kick(struct intel_engine_execlists *execlists)
3183{
3184        /* Kick the tasklet for some interrupt coalescing and reset handling */
3185        tasklet_hi_schedule(&execlists->tasklet);
3186}
3187
3188#define execlists_kick(t, member) \
3189        __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3190
3191static void execlists_timeslice(struct timer_list *timer)
3192{
3193        execlists_kick(timer, timer);
3194}
3195
3196static void execlists_preempt(struct timer_list *timer)
3197{
3198        execlists_kick(timer, preempt);
3199}
3200
3201static void queue_request(struct intel_engine_cs *engine,
3202                          struct i915_request *rq)
3203{
3204        GEM_BUG_ON(!list_empty(&rq->sched.link));
3205        list_add_tail(&rq->sched.link,
3206                      i915_sched_lookup_priolist(engine, rq_prio(rq)));
3207        set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3208}
3209
3210static void __submit_queue_imm(struct intel_engine_cs *engine)
3211{
3212        struct intel_engine_execlists * const execlists = &engine->execlists;
3213
3214        if (reset_in_progress(execlists))
3215                return; /* defer until we restart the engine following reset */
3216
3217        __execlists_submission_tasklet(engine);
3218}
3219
3220static void submit_queue(struct intel_engine_cs *engine,
3221                         const struct i915_request *rq)
3222{
3223        struct intel_engine_execlists *execlists = &engine->execlists;
3224
3225        if (rq_prio(rq) <= execlists->queue_priority_hint)
3226                return;
3227
3228        execlists->queue_priority_hint = rq_prio(rq);
3229        __submit_queue_imm(engine);
3230}
3231
3232static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3233                             const struct i915_request *rq)
3234{
3235        GEM_BUG_ON(i915_request_on_hold(rq));
3236        return !list_empty(&engine->active.hold) && hold_request(rq);
3237}
3238
3239static void flush_csb(struct intel_engine_cs *engine)
3240{
3241        struct intel_engine_execlists *el = &engine->execlists;
3242
3243        if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3244                if (!reset_in_progress(el))
3245                        process_csb(engine);
3246                tasklet_unlock(&el->tasklet);
3247        }
3248}
3249
3250static void execlists_submit_request(struct i915_request *request)
3251{
3252        struct intel_engine_cs *engine = request->engine;
3253        unsigned long flags;
3254
3255        /* Hopefully we clear execlists->pending[] to let us through */
3256        flush_csb(engine);
3257
3258        /* Will be called from irq-context when using foreign fences. */
3259        spin_lock_irqsave(&engine->active.lock, flags);
3260
3261        if (unlikely(ancestor_on_hold(engine, request))) {
3262                RQ_TRACE(request, "ancestor on hold\n");
3263                list_add_tail(&request->sched.link, &engine->active.hold);
3264                i915_request_set_hold(request);
3265        } else {
3266                queue_request(engine, request);
3267
3268                GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3269                GEM_BUG_ON(list_empty(&request->sched.link));
3270
3271                submit_queue(engine, request);
3272        }
3273
3274        spin_unlock_irqrestore(&engine->active.lock, flags);
3275}
3276
3277static void __execlists_context_fini(struct intel_context *ce)
3278{
3279        intel_ring_put(ce->ring);
3280        i915_vma_put(ce->state);
3281}
3282
3283static void execlists_context_destroy(struct kref *kref)
3284{
3285        struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3286
3287        GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3288        GEM_BUG_ON(intel_context_is_pinned(ce));
3289
3290        if (ce->state)
3291                __execlists_context_fini(ce);
3292
3293        intel_context_fini(ce);
3294        intel_context_free(ce);
3295}
3296
3297static void
3298set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3299{
3300        if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3301                return;
3302
3303        vaddr += engine->context_size;
3304
3305        memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3306}
3307
3308static void
3309check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3310{
3311        if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3312                return;
3313
3314        vaddr += engine->context_size;
3315
3316        if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3317                drm_err_once(&engine->i915->drm,
3318                             "%s context redzone overwritten!\n",
3319                             engine->name);
3320}
3321
3322static void execlists_context_unpin(struct intel_context *ce)
3323{
3324        check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3325                      ce->engine);
3326}
3327
3328static void execlists_context_post_unpin(struct intel_context *ce)
3329{
3330        i915_gem_object_unpin_map(ce->state->obj);
3331}
3332
3333static u32 *
3334gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3335{
3336        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3337                MI_SRM_LRM_GLOBAL_GTT |
3338                MI_LRI_LRM_CS_MMIO;
3339        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3340        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3341                CTX_TIMESTAMP * sizeof(u32);
3342        *cs++ = 0;
3343
3344        *cs++ = MI_LOAD_REGISTER_REG |
3345                MI_LRR_SOURCE_CS_MMIO |
3346                MI_LRI_LRM_CS_MMIO;
3347        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3348        *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3349
3350        *cs++ = MI_LOAD_REGISTER_REG |
3351                MI_LRR_SOURCE_CS_MMIO |
3352                MI_LRI_LRM_CS_MMIO;
3353        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3354        *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3355
3356        return cs;
3357}
3358
3359static u32 *
3360gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3361{
3362        GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3363
3364        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3365                MI_SRM_LRM_GLOBAL_GTT |
3366                MI_LRI_LRM_CS_MMIO;
3367        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3368        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3369                (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3370        *cs++ = 0;
3371
3372        return cs;
3373}
3374
3375static u32 *
3376gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3377{
3378        GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3379
3380        *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3381                MI_SRM_LRM_GLOBAL_GTT |
3382                MI_LRI_LRM_CS_MMIO;
3383        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3384        *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3385                (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3386        *cs++ = 0;
3387
3388        *cs++ = MI_LOAD_REGISTER_REG |
3389                MI_LRR_SOURCE_CS_MMIO |
3390                MI_LRI_LRM_CS_MMIO;
3391        *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3392        *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3393
3394        return cs;
3395}
3396
3397static u32 *
3398gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3399{
3400        cs = gen12_emit_timestamp_wa(ce, cs);
3401        cs = gen12_emit_cmd_buf_wa(ce, cs);
3402        cs = gen12_emit_restore_scratch(ce, cs);
3403
3404        return cs;
3405}
3406
3407static u32 *
3408gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3409{
3410        cs = gen12_emit_timestamp_wa(ce, cs);
3411        cs = gen12_emit_restore_scratch(ce, cs);
3412
3413        return cs;
3414}
3415
3416static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3417{
3418        return PAGE_SIZE * ce->wa_bb_page;
3419}
3420
3421static u32 *context_indirect_bb(const struct intel_context *ce)
3422{
3423        void *ptr;
3424
3425        GEM_BUG_ON(!ce->wa_bb_page);
3426
3427        ptr = ce->lrc_reg_state;
3428        ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3429        ptr += context_wa_bb_offset(ce);
3430
3431        return ptr;
3432}
3433
3434static void
3435setup_indirect_ctx_bb(const struct intel_context *ce,
3436                      const struct intel_engine_cs *engine,
3437                      u32 *(*emit)(const struct intel_context *, u32 *))
3438{
3439        u32 * const start = context_indirect_bb(ce);
3440        u32 *cs;
3441
3442        cs = emit(ce, start);
3443        GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3444        while ((unsigned long)cs % CACHELINE_BYTES)
3445                *cs++ = MI_NOOP;
3446
3447        lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3448                                    i915_ggtt_offset(ce->state) +
3449                                    context_wa_bb_offset(ce),
3450                                    (cs - start) * sizeof(*cs));
3451}
3452
3453static void
3454__execlists_update_reg_state(const struct intel_context *ce,
3455                             const struct intel_engine_cs *engine,
3456                             u32 head)
3457{
3458        struct intel_ring *ring = ce->ring;
3459        u32 *regs = ce->lrc_reg_state;
3460
3461        GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3462        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3463
3464        regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3465        regs[CTX_RING_HEAD] = head;
3466        regs[CTX_RING_TAIL] = ring->tail;
3467        regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3468
3469        /* RPCS */
3470        if (engine->class == RENDER_CLASS) {
3471                regs[CTX_R_PWR_CLK_STATE] =
3472                        intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3473
3474                i915_oa_init_reg_state(ce, engine);
3475        }
3476
3477        if (ce->wa_bb_page) {
3478                u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3479
3480                fn = gen12_emit_indirect_ctx_xcs;
3481                if (ce->engine->class == RENDER_CLASS)
3482                        fn = gen12_emit_indirect_ctx_rcs;
3483
3484                /* Mutually exclusive wrt to global indirect bb */
3485                GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3486                setup_indirect_ctx_bb(ce, engine, fn);
3487        }
3488}
3489
3490static int
3491execlists_context_pre_pin(struct intel_context *ce,
3492                          struct i915_gem_ww_ctx *ww, void **vaddr)
3493{
3494        GEM_BUG_ON(!ce->state);
3495        GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3496
3497        *vaddr = i915_gem_object_pin_map(ce->state->obj,
3498                                        i915_coherent_map_type(ce->engine->i915) |
3499                                        I915_MAP_OVERRIDE);
3500
3501        return PTR_ERR_OR_ZERO(*vaddr);
3502}
3503
3504static int
3505__execlists_context_pin(struct intel_context *ce,
3506                        struct intel_engine_cs *engine,
3507                        void *vaddr)
3508{
3509        ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3510        ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3511        __execlists_update_reg_state(ce, engine, ce->ring->tail);
3512
3513        return 0;
3514}
3515
3516static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3517{
3518        return __execlists_context_pin(ce, ce->engine, vaddr);
3519}
3520
3521static int execlists_context_alloc(struct intel_context *ce)
3522{
3523        return __execlists_context_alloc(ce, ce->engine);
3524}
3525
3526static void execlists_context_reset(struct intel_context *ce)
3527{
3528        CE_TRACE(ce, "reset\n");
3529        GEM_BUG_ON(!intel_context_is_pinned(ce));
3530
3531        intel_ring_reset(ce->ring, ce->ring->emit);
3532
3533        /* Scrub away the garbage */
3534        execlists_init_reg_state(ce->lrc_reg_state,
3535                                 ce, ce->engine, ce->ring, true);
3536        __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3537
3538        ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3539}
3540
3541static const struct intel_context_ops execlists_context_ops = {
3542        .alloc = execlists_context_alloc,
3543
3544        .pre_pin = execlists_context_pre_pin,
3545        .pin = execlists_context_pin,
3546        .unpin = execlists_context_unpin,
3547        .post_unpin = execlists_context_post_unpin,
3548
3549        .enter = intel_context_enter_engine,
3550        .exit = intel_context_exit_engine,
3551
3552        .reset = execlists_context_reset,
3553        .destroy = execlists_context_destroy,
3554};
3555
3556static u32 hwsp_offset(const struct i915_request *rq)
3557{
3558        const struct intel_timeline_cacheline *cl;
3559
3560        /* Before the request is executed, the timeline/cachline is fixed */
3561
3562        cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3563        if (cl)
3564                return cl->ggtt_offset;
3565
3566        return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3567}
3568
3569static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3570{
3571        u32 *cs;
3572
3573        GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3574        if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3575                return 0;
3576
3577        cs = intel_ring_begin(rq, 6);
3578        if (IS_ERR(cs))
3579                return PTR_ERR(cs);
3580
3581        /*
3582         * Check if we have been preempted before we even get started.
3583         *
3584         * After this point i915_request_started() reports true, even if
3585         * we get preempted and so are no longer running.
3586         */
3587        *cs++ = MI_ARB_CHECK;
3588        *cs++ = MI_NOOP;
3589
3590        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3591        *cs++ = hwsp_offset(rq);
3592        *cs++ = 0;
3593        *cs++ = rq->fence.seqno - 1;
3594
3595        intel_ring_advance(rq, cs);
3596
3597        /* Record the updated position of the request's payload */
3598        rq->infix = intel_ring_offset(rq, cs);
3599
3600        __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3601
3602        return 0;
3603}
3604
3605static int emit_pdps(struct i915_request *rq)
3606{
3607        const struct intel_engine_cs * const engine = rq->engine;
3608        struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3609        int err, i;
3610        u32 *cs;
3611
3612        GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3613
3614        /*
3615         * Beware ye of the dragons, this sequence is magic!
3616         *
3617         * Small changes to this sequence can cause anything from
3618         * GPU hangs to forcewake errors and machine lockups!
3619         */
3620
3621        /* Flush any residual operations from the context load */
3622        err = engine->emit_flush(rq, EMIT_FLUSH);
3623        if (err)
3624                return err;
3625
3626        /* Magic required to prevent forcewake errors! */
3627        err = engine->emit_flush(rq, EMIT_INVALIDATE);
3628        if (err)
3629                return err;
3630
3631        cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3632        if (IS_ERR(cs))
3633                return PTR_ERR(cs);
3634
3635        /* Ensure the LRI have landed before we invalidate & continue */
3636        *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3637        for (i = GEN8_3LVL_PDPES; i--; ) {
3638                const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3639                u32 base = engine->mmio_base;
3640
3641                *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3642                *cs++ = upper_32_bits(pd_daddr);
3643                *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3644                *cs++ = lower_32_bits(pd_daddr);
3645        }
3646        *cs++ = MI_NOOP;
3647
3648        intel_ring_advance(rq, cs);
3649
3650        return 0;
3651}
3652
3653static int execlists_request_alloc(struct i915_request *request)
3654{
3655        int ret;
3656
3657        GEM_BUG_ON(!intel_context_is_pinned(request->context));
3658
3659        /*
3660         * Flush enough space to reduce the likelihood of waiting after
3661         * we start building the request - in which case we will just
3662         * have to repeat work.
3663         */
3664        request->reserved_space += EXECLISTS_REQUEST_SIZE;
3665
3666        /*
3667         * Note that after this point, we have committed to using
3668         * this request as it is being used to both track the
3669         * state of engine initialisation and liveness of the
3670         * golden renderstate above. Think twice before you try
3671         * to cancel/unwind this request now.
3672         */
3673
3674        if (!i915_vm_is_4lvl(request->context->vm)) {
3675                ret = emit_pdps(request);
3676                if (ret)
3677                        return ret;
3678        }
3679
3680        /* Unconditionally invalidate GPU caches and TLBs. */
3681        ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3682        if (ret)
3683                return ret;
3684
3685        request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3686        return 0;
3687}
3688
3689/*
3690 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3691 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3692 * but there is a slight complication as this is applied in WA batch where the
3693 * values are only initialized once so we cannot take register value at the
3694 * beginning and reuse it further; hence we save its value to memory, upload a
3695 * constant value with bit21 set and then we restore it back with the saved value.
3696 * To simplify the WA, a constant value is formed by using the default value
3697 * of this register. This shouldn't be a problem because we are only modifying
3698 * it for a short period and this batch in non-premptible. We can ofcourse
3699 * use additional instructions that read the actual value of the register
3700 * at that time and set our bit of interest but it makes the WA complicated.
3701 *
3702 * This WA is also required for Gen9 so extracting as a function avoids
3703 * code duplication.
3704 */
3705static u32 *
3706gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3707{
3708        /* NB no one else is allowed to scribble over scratch + 256! */
3709        *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3710        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3711        *batch++ = intel_gt_scratch_offset(engine->gt,
3712                                           INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3713        *batch++ = 0;
3714
3715        *batch++ = MI_LOAD_REGISTER_IMM(1);
3716        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3717        *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3718
3719        batch = gen8_emit_pipe_control(batch,
3720                                       PIPE_CONTROL_CS_STALL |
3721                                       PIPE_CONTROL_DC_FLUSH_ENABLE,
3722                                       0);
3723
3724        *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3725        *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3726        *batch++ = intel_gt_scratch_offset(engine->gt,
3727                                           INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3728        *batch++ = 0;
3729
3730        return batch;
3731}
3732
3733/*
3734 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3735 * initialized at the beginning and shared across all contexts but this field
3736 * helps us to have multiple batches at different offsets and select them based
3737 * on a criteria. At the moment this batch always start at the beginning of the page
3738 * and at this point we don't have multiple wa_ctx batch buffers.
3739 *
3740 * The number of WA applied are not known at the beginning; we use this field
3741 * to return the no of DWORDS written.
3742 *
3743 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3744 * so it adds NOOPs as padding to make it cacheline aligned.
3745 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3746 * makes a complete batch buffer.
3747 */
3748static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3749{
3750        /* WaDisableCtxRestoreArbitration:bdw,chv */
3751        *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3752
3753        /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3754        if (IS_BROADWELL(engine->i915))
3755                batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3756
3757        /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3758        /* Actual scratch location is at 128 bytes offset */
3759        batch = gen8_emit_pipe_control(batch,
3760                                       PIPE_CONTROL_FLUSH_L3 |
3761                                       PIPE_CONTROL_STORE_DATA_INDEX |
3762                                       PIPE_CONTROL_CS_STALL |
3763                                       PIPE_CONTROL_QW_WRITE,
3764                                       LRC_PPHWSP_SCRATCH_ADDR);
3765
3766        *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3767
3768        /* Pad to end of cacheline */
3769        while ((unsigned long)batch % CACHELINE_BYTES)
3770                *batch++ = MI_NOOP;
3771
3772        /*
3773         * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3774         * execution depends on the length specified in terms of cache lines
3775         * in the register CTX_RCS_INDIRECT_CTX
3776         */
3777
3778        return batch;
3779}
3780
3781struct lri {
3782        i915_reg_t reg;
3783        u32 value;
3784};
3785
3786static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3787{
3788        GEM_BUG_ON(!count || count > 63);
3789
3790        *batch++ = MI_LOAD_REGISTER_IMM(count);
3791        do {
3792                *batch++ = i915_mmio_reg_offset(lri->reg);
3793                *batch++ = lri->value;
3794        } while (lri++, --count);
3795        *batch++ = MI_NOOP;
3796
3797        return batch;
3798}
3799
3800static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3801{
3802        static const struct lri lri[] = {
3803                /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3804                {
3805                        COMMON_SLICE_CHICKEN2,
3806                        __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3807                                       0),
3808                },
3809
3810                /* BSpec: 11391 */
3811                {
3812                        FF_SLICE_CHICKEN,
3813                        __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3814                                       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3815                },
3816
3817                /* BSpec: 11299 */
3818                {
3819                        _3D_CHICKEN3,
3820                        __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3821                                       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3822                }
3823        };
3824
3825        *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3826
3827        /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3828        batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3829
3830        /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3831        batch = gen8_emit_pipe_control(batch,
3832                                       PIPE_CONTROL_FLUSH_L3 |
3833                                       PIPE_CONTROL_STORE_DATA_INDEX |
3834                                       PIPE_CONTROL_CS_STALL |
3835                                       PIPE_CONTROL_QW_WRITE,
3836                                       LRC_PPHWSP_SCRATCH_ADDR);
3837
3838        batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3839
3840        /* WaMediaPoolStateCmdInWABB:bxt,glk */
3841        if (HAS_POOLED_EU(engine->i915)) {
3842                /*
3843                 * EU pool configuration is setup along with golden context
3844                 * during context initialization. This value depends on
3845                 * device type (2x6 or 3x6) and needs to be updated based
3846                 * on which subslice is disabled especially for 2x6
3847                 * devices, however it is safe to load default
3848                 * configuration of 3x6 device instead of masking off
3849                 * corresponding bits because HW ignores bits of a disabled
3850                 * subslice and drops down to appropriate config. Please
3851                 * see render_state_setup() in i915_gem_render_state.c for
3852                 * possible configurations, to avoid duplication they are
3853                 * not shown here again.
3854                 */
3855                *batch++ = GEN9_MEDIA_POOL_STATE;
3856                *batch++ = GEN9_MEDIA_POOL_ENABLE;
3857                *batch++ = 0x00777000;
3858                *batch++ = 0;
3859                *batch++ = 0;
3860                *batch++ = 0;
3861        }
3862
3863        *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3864
3865        /* Pad to end of cacheline */
3866        while ((unsigned long)batch % CACHELINE_BYTES)
3867                *batch++ = MI_NOOP;
3868
3869        return batch;
3870}
3871
3872static u32 *
3873gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3874{
3875        int i;
3876
3877        /*
3878         * WaPipeControlBefore3DStateSamplePattern: cnl
3879         *
3880         * Ensure the engine is idle prior to programming a
3881         * 3DSTATE_SAMPLE_PATTERN during a context restore.
3882         */
3883        batch = gen8_emit_pipe_control(batch,
3884                                       PIPE_CONTROL_CS_STALL,
3885                                       0);
3886        /*
3887         * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3888         * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3889         * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3890         * confusing. Since gen8_emit_pipe_control() already advances the
3891         * batch by 6 dwords, we advance the other 10 here, completing a
3892         * cacheline. It's not clear if the workaround requires this padding
3893         * before other commands, or if it's just the regular padding we would
3894         * already have for the workaround bb, so leave it here for now.
3895         */
3896        for (i = 0; i < 10; i++)
3897                *batch++ = MI_NOOP;
3898
3899        /* Pad to end of cacheline */
3900        while ((unsigned long)batch % CACHELINE_BYTES)
3901                *batch++ = MI_NOOP;
3902
3903        return batch;
3904}
3905
3906#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3907
3908static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3909{
3910        struct drm_i915_gem_object *obj;
3911        struct i915_vma *vma;
3912        int err;
3913
3914        obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3915        if (IS_ERR(obj))
3916                return PTR_ERR(obj);
3917
3918        vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3919        if (IS_ERR(vma)) {
3920                err = PTR_ERR(vma);
3921                goto err;
3922        }
3923
3924        err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
3925        if (err)
3926                goto err;
3927
3928        engine->wa_ctx.vma = vma;
3929        return 0;
3930
3931err:
3932        i915_gem_object_put(obj);
3933        return err;
3934}
3935
3936static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3937{
3938        i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3939}
3940
3941typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3942
3943static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3944{
3945        struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3946        struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3947                                            &wa_ctx->per_ctx };
3948        wa_bb_func_t wa_bb_fn[2];
3949        void *batch, *batch_ptr;
3950        unsigned int i;
3951        int ret;
3952
3953        if (engine->class != RENDER_CLASS)
3954                return 0;
3955
3956        switch (INTEL_GEN(engine->i915)) {
3957        case 12:
3958        case 11:
3959                return 0;
3960        case 10:
3961                wa_bb_fn[0] = gen10_init_indirectctx_bb;
3962                wa_bb_fn[1] = NULL;
3963                break;
3964        case 9:
3965                wa_bb_fn[0] = gen9_init_indirectctx_bb;
3966                wa_bb_fn[1] = NULL;
3967                break;
3968        case 8:
3969                wa_bb_fn[0] = gen8_init_indirectctx_bb;
3970                wa_bb_fn[1] = NULL;
3971                break;
3972        default:
3973                MISSING_CASE(INTEL_GEN(engine->i915));
3974                return 0;
3975        }
3976
3977        ret = lrc_setup_wa_ctx(engine);
3978        if (ret) {
3979                drm_dbg(&engine->i915->drm,
3980                        "Failed to setup context WA page: %d\n", ret);
3981                return ret;
3982        }
3983
3984        batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
3985
3986        /*
3987         * Emit the two workaround batch buffers, recording the offset from the
3988         * start of the workaround batch buffer object for each and their
3989         * respective sizes.
3990         */
3991        batch_ptr = batch;
3992        for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3993                wa_bb[i]->offset = batch_ptr - batch;
3994                if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3995                                                  CACHELINE_BYTES))) {
3996                        ret = -EINVAL;
3997                        break;
3998                }
3999                if (wa_bb_fn[i])
4000                        batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4001                wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4002        }
4003        GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
4004
4005        __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4006        __i915_gem_object_release_map(wa_ctx->vma->obj);
4007        if (ret)
4008                lrc_destroy_wa_ctx(engine);
4009
4010        return ret;
4011}
4012
4013static void reset_csb_pointers(struct intel_engine_cs *engine)
4014{
4015        struct intel_engine_execlists * const execlists = &engine->execlists;
4016        const unsigned int reset_value = execlists->csb_size - 1;
4017
4018        ring_set_paused(engine, 0);
4019
4020        /*
4021         * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4022         * Bludgeon them with a mmio update to be sure.
4023         */
4024        ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4025                     0xffff << 16 | reset_value << 8 | reset_value);
4026        ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4027
4028        /*
4029         * After a reset, the HW starts writing into CSB entry [0]. We
4030         * therefore have to set our HEAD pointer back one entry so that
4031         * the *first* entry we check is entry 0. To complicate this further,
4032         * as we don't wait for the first interrupt after reset, we have to
4033         * fake the HW write to point back to the last entry so that our
4034         * inline comparison of our cached head position against the last HW
4035         * write works even before the first interrupt.
4036         */
4037        execlists->csb_head = reset_value;
4038        WRITE_ONCE(*execlists->csb_write, reset_value);
4039        wmb(); /* Make sure this is visible to HW (paranoia?) */
4040
4041        /* Check that the GPU does indeed update the CSB entries! */
4042        memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
4043        invalidate_csb_entries(&execlists->csb_status[0],
4044                               &execlists->csb_status[reset_value]);
4045
4046        /* Once more for luck and our trusty paranoia */
4047        ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4048                     0xffff << 16 | reset_value << 8 | reset_value);
4049        ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4050
4051        GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4052}
4053
4054static void execlists_sanitize(struct intel_engine_cs *engine)
4055{
4056        /*
4057         * Poison residual state on resume, in case the suspend didn't!
4058         *
4059         * We have to assume that across suspend/resume (or other loss
4060         * of control) that the contents of our pinned buffers has been
4061         * lost, replaced by garbage. Since this doesn't always happen,
4062         * let's poison such state so that we more quickly spot when
4063         * we falsely assume it has been preserved.
4064         */
4065        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4066                memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4067
4068        reset_csb_pointers(engine);
4069
4070        /*
4071         * The kernel_context HWSP is stored in the status_page. As above,
4072         * that may be lost on resume/initialisation, and so we need to
4073         * reset the value in the HWSP.
4074         */
4075        intel_timeline_reset_seqno(engine->kernel_context->timeline);
4076
4077        /* And scrub the dirty cachelines for the HWSP */
4078        clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4079}
4080
4081static void enable_error_interrupt(struct intel_engine_cs *engine)
4082{
4083        u32 status;
4084
4085        engine->execlists.error_interrupt = 0;
4086        ENGINE_WRITE(engine, RING_EMR, ~0u);
4087        ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4088
4089        status = ENGINE_READ(engine, RING_ESR);
4090        if (unlikely(status)) {
4091                drm_err(&engine->i915->drm,
4092                        "engine '%s' resumed still in error: %08x\n",
4093                        engine->name, status);
4094                __intel_gt_reset(engine->gt, engine->mask);
4095        }
4096
4097        /*
4098         * On current gen8+, we have 2 signals to play with
4099         *
4100         * - I915_ERROR_INSTUCTION (bit 0)
4101         *
4102         *    Generate an error if the command parser encounters an invalid
4103         *    instruction
4104         *
4105         *    This is a fatal error.
4106         *
4107         * - CP_PRIV (bit 2)
4108         *
4109         *    Generate an error on privilege violation (where the CP replaces
4110         *    the instruction with a no-op). This also fires for writes into
4111         *    read-only scratch pages.
4112         *
4113         *    This is a non-fatal error, parsing continues.
4114         *
4115         * * there are a few others defined for odd HW that we do not use
4116         *
4117         * Since CP_PRIV fires for cases where we have chosen to ignore the
4118         * error (as the HW is validating and suppressing the mistakes), we
4119         * only unmask the instruction error bit.
4120         */
4121        ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4122}
4123
4124static void enable_execlists(struct intel_engine_cs *engine)
4125{
4126        u32 mode;
4127
4128        assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4129
4130        intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4131
4132        if (INTEL_GEN(engine->i915) >= 11)
4133                mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4134        else
4135                mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4136        ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4137
4138        ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4139
4140        ENGINE_WRITE_FW(engine,
4141                        RING_HWS_PGA,
4142                        i915_ggtt_offset(engine->status_page.vma));
4143        ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4144
4145        enable_error_interrupt(engine);
4146
4147        engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4148}
4149
4150static bool unexpected_starting_state(struct intel_engine_cs *engine)
4151{
4152        bool unexpected = false;
4153
4154        if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4155                drm_dbg(&engine->i915->drm,
4156                        "STOP_RING still set in RING_MI_MODE\n");
4157                unexpected = true;
4158        }
4159
4160        return unexpected;
4161}
4162
4163static int execlists_resume(struct intel_engine_cs *engine)
4164{
4165        intel_mocs_init_engine(engine);
4166
4167        intel_breadcrumbs_reset(engine->breadcrumbs);
4168
4169        if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4170                struct drm_printer p = drm_debug_printer(__func__);
4171
4172                intel_engine_dump(engine, &p, NULL);
4173        }
4174
4175        enable_execlists(engine);
4176
4177        return 0;
4178}
4179
4180static void execlists_reset_prepare(struct intel_engine_cs *engine)
4181{
4182        struct intel_engine_execlists * const execlists = &engine->execlists;
4183        unsigned long flags;
4184
4185        ENGINE_TRACE(engine, "depth<-%d\n",
4186                     atomic_read(&execlists->tasklet.count));
4187
4188        /*
4189         * Prevent request submission to the hardware until we have
4190         * completed the reset in i915_gem_reset_finish(). If a request
4191         * is completed by one engine, it may then queue a request
4192         * to a second via its execlists->tasklet *just* as we are
4193         * calling engine->resume() and also writing the ELSP.
4194         * Turning off the execlists->tasklet until the reset is over
4195         * prevents the race.
4196         */
4197        __tasklet_disable_sync_once(&execlists->tasklet);
4198        GEM_BUG_ON(!reset_in_progress(execlists));
4199
4200        /* And flush any current direct submission. */
4201        spin_lock_irqsave(&engine->active.lock, flags);
4202        spin_unlock_irqrestore(&engine->active.lock, flags);
4203
4204        /*
4205         * We stop engines, otherwise we might get failed reset and a
4206         * dead gpu (on elk). Also as modern gpu as kbl can suffer
4207         * from system hang if batchbuffer is progressing when
4208         * the reset is issued, regardless of READY_TO_RESET ack.
4209         * Thus assume it is best to stop engines on all gens
4210         * where we have a gpu reset.
4211         *
4212         * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4213         *
4214         * FIXME: Wa for more modern gens needs to be validated
4215         */
4216        ring_set_paused(engine, 1);
4217        intel_engine_stop_cs(engine);
4218
4219        engine->execlists.reset_ccid = active_ccid(engine);
4220}
4221
4222static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4223{
4224        int x;
4225
4226        x = lrc_ring_mi_mode(engine);
4227        if (x != -1) {
4228                regs[x + 1] &= ~STOP_RING;
4229                regs[x + 1] |= STOP_RING << 16;
4230        }
4231}
4232
4233static void __execlists_reset_reg_state(const struct intel_context *ce,
4234                                        const struct intel_engine_cs *engine)
4235{
4236        u32 *regs = ce->lrc_reg_state;
4237
4238        __reset_stop_ring(regs, engine);
4239}
4240
4241static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4242{
4243        struct intel_engine_execlists * const execlists = &engine->execlists;
4244        struct intel_context *ce;
4245        struct i915_request *rq;
4246        u32 head;
4247
4248        mb(); /* paranoia: read the CSB pointers from after the reset */
4249        clflush(execlists->csb_write);
4250        mb();
4251
4252        process_csb(engine); /* drain preemption events */
4253
4254        /* Following the reset, we need to reload the CSB read/write pointers */
4255        reset_csb_pointers(engine);
4256
4257        /*
4258         * Save the currently executing context, even if we completed
4259         * its request, it was still running at the time of the
4260         * reset and will have been clobbered.
4261         */
4262        rq = active_context(engine, engine->execlists.reset_ccid);
4263        if (!rq)
4264                goto unwind;
4265
4266        ce = rq->context;
4267        GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4268
4269        if (i915_request_completed(rq)) {
4270                /* Idle context; tidy up the ring so we can restart afresh */
4271                head = intel_ring_wrap(ce->ring, rq->tail);
4272                goto out_replay;
4273        }
4274
4275        /* We still have requests in-flight; the engine should be active */
4276        GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4277
4278        /* Context has requests still in-flight; it should not be idle! */
4279        GEM_BUG_ON(i915_active_is_idle(&ce->active));
4280
4281        rq = active_request(ce->timeline, rq);
4282        head = intel_ring_wrap(ce->ring, rq->head);
4283        GEM_BUG_ON(head == ce->ring->tail);
4284
4285        /*
4286         * If this request hasn't started yet, e.g. it is waiting on a
4287         * semaphore, we need to avoid skipping the request or else we
4288         * break the signaling chain. However, if the context is corrupt
4289         * the request will not restart and we will be stuck with a wedged
4290         * device. It is quite often the case that if we issue a reset
4291         * while the GPU is loading the context image, that the context
4292         * image becomes corrupt.
4293         *
4294         * Otherwise, if we have not started yet, the request should replay
4295         * perfectly and we do not need to flag the result as being erroneous.
4296         */
4297        if (!i915_request_started(rq))
4298                goto out_replay;
4299
4300        /*
4301         * If the request was innocent, we leave the request in the ELSP
4302         * and will try to replay it on restarting. The context image may
4303         * have been corrupted by the reset, in which case we may have
4304         * to service a new GPU hang, but more likely we can continue on
4305         * without impact.
4306         *
4307         * If the request was guilty, we presume the context is corrupt
4308         * and have to at least restore the RING register in the context
4309         * image back to the expected values to skip over the guilty request.
4310         */
4311        __i915_request_reset(rq, stalled);
4312
4313        /*
4314         * We want a simple context + ring to execute the breadcrumb update.
4315         * We cannot rely on the context being intact across the GPU hang,
4316         * so clear it and rebuild just what we need for the breadcrumb.
4317         * All pending requests for this context will be zapped, and any
4318         * future request will be after userspace has had the opportunity
4319         * to recreate its own state.
4320         */
4321out_replay:
4322        ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4323                     head, ce->ring->tail);
4324        __execlists_reset_reg_state(ce, engine);
4325        __execlists_update_reg_state(ce, engine, head);
4326        ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4327
4328unwind:
4329        /* Push back any incomplete requests for replay after the reset. */
4330        cancel_port_requests(execlists);
4331        __unwind_incomplete_requests(engine);
4332}
4333
4334static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4335{
4336        unsigned long flags;
4337
4338        ENGINE_TRACE(engine, "\n");
4339
4340        spin_lock_irqsave(&engine->active.lock, flags);
4341
4342        __execlists_reset(engine, stalled);
4343
4344        spin_unlock_irqrestore(&engine->active.lock, flags);
4345}
4346
4347static void nop_submission_tasklet(unsigned long data)
4348{
4349        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4350
4351        /* The driver is wedged; don't process any more events. */
4352        WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4353}
4354
4355static void execlists_reset_cancel(struct intel_engine_cs *engine)
4356{
4357        struct intel_engine_execlists * const execlists = &engine->execlists;
4358        struct i915_request *rq, *rn;
4359        struct rb_node *rb;
4360        unsigned long flags;
4361
4362        ENGINE_TRACE(engine, "\n");
4363
4364        /*
4365         * Before we call engine->cancel_requests(), we should have exclusive
4366         * access to the submission state. This is arranged for us by the
4367         * caller disabling the interrupt generation, the tasklet and other
4368         * threads that may then access the same state, giving us a free hand
4369         * to reset state. However, we still need to let lockdep be aware that
4370         * we know this state may be accessed in hardirq context, so we
4371         * disable the irq around this manipulation and we want to keep
4372         * the spinlock focused on its duties and not accidentally conflate
4373         * coverage to the submission's irq state. (Similarly, although we
4374         * shouldn't need to disable irq around the manipulation of the
4375         * submission's irq state, we also wish to remind ourselves that
4376         * it is irq state.)
4377         */
4378        spin_lock_irqsave(&engine->active.lock, flags);
4379
4380        __execlists_reset(engine, true);
4381
4382        /* Mark all executing requests as skipped. */
4383        list_for_each_entry(rq, &engine->active.requests, sched.link)
4384                mark_eio(rq);
4385
4386        /* Flush the queued requests to the timeline list (for retiring). */
4387        while ((rb = rb_first_cached(&execlists->queue))) {
4388                struct i915_priolist *p = to_priolist(rb);
4389                int i;
4390
4391                priolist_for_each_request_consume(rq, rn, p, i) {
4392                        mark_eio(rq);
4393                        __i915_request_submit(rq);
4394                }
4395
4396                rb_erase_cached(&p->node, &execlists->queue);
4397                i915_priolist_free(p);
4398        }
4399
4400        /* On-hold requests will be flushed to timeline upon their release */
4401        list_for_each_entry(rq, &engine->active.hold, sched.link)
4402                mark_eio(rq);
4403
4404        /* Cancel all attached virtual engines */
4405        while ((rb = rb_first_cached(&execlists->virtual))) {
4406                struct virtual_engine *ve =
4407                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4408
4409                rb_erase_cached(rb, &execlists->virtual);
4410                RB_CLEAR_NODE(rb);
4411
4412                spin_lock(&ve->base.active.lock);
4413                rq = fetch_and_zero(&ve->request);
4414                if (rq) {
4415                        mark_eio(rq);
4416
4417                        rq->engine = engine;
4418                        __i915_request_submit(rq);
4419                        i915_request_put(rq);
4420
4421                        ve->base.execlists.queue_priority_hint = INT_MIN;
4422                }
4423                spin_unlock(&ve->base.active.lock);
4424        }
4425
4426        /* Remaining _unready_ requests will be nop'ed when submitted */
4427
4428        execlists->queue_priority_hint = INT_MIN;
4429        execlists->queue = RB_ROOT_CACHED;
4430
4431        GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4432        execlists->tasklet.func = nop_submission_tasklet;
4433
4434        spin_unlock_irqrestore(&engine->active.lock, flags);
4435}
4436
4437static void execlists_reset_finish(struct intel_engine_cs *engine)
4438{
4439        struct intel_engine_execlists * const execlists = &engine->execlists;
4440
4441        /*
4442         * After a GPU reset, we may have requests to replay. Do so now while
4443         * we still have the forcewake to be sure that the GPU is not allowed
4444         * to sleep before we restart and reload a context.
4445         */
4446        GEM_BUG_ON(!reset_in_progress(execlists));
4447        if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4448                execlists->tasklet.func(execlists->tasklet.data);
4449
4450        if (__tasklet_enable(&execlists->tasklet))
4451                /* And kick in case we missed a new request submission. */
4452                tasklet_hi_schedule(&execlists->tasklet);
4453        ENGINE_TRACE(engine, "depth->%d\n",
4454                     atomic_read(&execlists->tasklet.count));
4455}
4456
4457static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4458                                    u64 offset, u32 len,
4459                                    const unsigned int flags)
4460{
4461        u32 *cs;
4462
4463        cs = intel_ring_begin(rq, 4);
4464        if (IS_ERR(cs))
4465                return PTR_ERR(cs);
4466
4467        /*
4468         * WaDisableCtxRestoreArbitration:bdw,chv
4469         *
4470         * We don't need to perform MI_ARB_ENABLE as often as we do (in
4471         * particular all the gen that do not need the w/a at all!), if we
4472         * took care to make sure that on every switch into this context
4473         * (both ordinary and for preemption) that arbitrartion was enabled
4474         * we would be fine.  However, for gen8 there is another w/a that
4475         * requires us to not preempt inside GPGPU execution, so we keep
4476         * arbitration disabled for gen8 batches. Arbitration will be
4477         * re-enabled before we close the request
4478         * (engine->emit_fini_breadcrumb).
4479         */
4480        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4481
4482        /* FIXME(BDW+): Address space and security selectors. */
4483        *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4484                (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4485        *cs++ = lower_32_bits(offset);
4486        *cs++ = upper_32_bits(offset);
4487
4488        intel_ring_advance(rq, cs);
4489
4490        return 0;
4491}
4492
4493static int gen8_emit_bb_start(struct i915_request *rq,
4494                              u64 offset, u32 len,
4495                              const unsigned int flags)
4496{
4497        u32 *cs;
4498
4499        cs = intel_ring_begin(rq, 6);
4500        if (IS_ERR(cs))
4501                return PTR_ERR(cs);
4502
4503        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4504
4505        *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4506                (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4507        *cs++ = lower_32_bits(offset);
4508        *cs++ = upper_32_bits(offset);
4509
4510        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4511        *cs++ = MI_NOOP;
4512
4513        intel_ring_advance(rq, cs);
4514
4515        return 0;
4516}
4517
4518static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4519{
4520        ENGINE_WRITE(engine, RING_IMR,
4521                     ~(engine->irq_enable_mask | engine->irq_keep_mask));
4522        ENGINE_POSTING_READ(engine, RING_IMR);
4523}
4524
4525static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4526{
4527        ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4528}
4529
4530static int gen8_emit_flush(struct i915_request *request, u32 mode)
4531{
4532        u32 cmd, *cs;
4533
4534        cs = intel_ring_begin(request, 4);
4535        if (IS_ERR(cs))
4536                return PTR_ERR(cs);
4537
4538        cmd = MI_FLUSH_DW + 1;
4539
4540        /* We always require a command barrier so that subsequent
4541         * commands, such as breadcrumb interrupts, are strictly ordered
4542         * wrt the contents of the write cache being flushed to memory
4543         * (and thus being coherent from the CPU).
4544         */
4545        cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4546
4547        if (mode & EMIT_INVALIDATE) {
4548                cmd |= MI_INVALIDATE_TLB;
4549                if (request->engine->class == VIDEO_DECODE_CLASS)
4550                        cmd |= MI_INVALIDATE_BSD;
4551        }
4552
4553        *cs++ = cmd;
4554        *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4555        *cs++ = 0; /* upper addr */
4556        *cs++ = 0; /* value */
4557        intel_ring_advance(request, cs);
4558
4559        return 0;
4560}
4561
4562static int gen8_emit_flush_render(struct i915_request *request,
4563                                  u32 mode)
4564{
4565        bool vf_flush_wa = false, dc_flush_wa = false;
4566        u32 *cs, flags = 0;
4567        int len;
4568
4569        flags |= PIPE_CONTROL_CS_STALL;
4570
4571        if (mode & EMIT_FLUSH) {
4572                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4573                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4574                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4575                flags |= PIPE_CONTROL_FLUSH_ENABLE;
4576        }
4577
4578        if (mode & EMIT_INVALIDATE) {
4579                flags |= PIPE_CONTROL_TLB_INVALIDATE;
4580                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4581                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4582                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4583                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4584                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4585                flags |= PIPE_CONTROL_QW_WRITE;
4586                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4587
4588                /*
4589                 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4590                 * pipe control.
4591                 */
4592                if (IS_GEN(request->engine->i915, 9))
4593                        vf_flush_wa = true;
4594
4595                /* WaForGAMHang:kbl */
4596                if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
4597                        dc_flush_wa = true;
4598        }
4599
4600        len = 6;
4601
4602        if (vf_flush_wa)
4603                len += 6;
4604
4605        if (dc_flush_wa)
4606                len += 12;
4607
4608        cs = intel_ring_begin(request, len);
4609        if (IS_ERR(cs))
4610                return PTR_ERR(cs);
4611
4612        if (vf_flush_wa)
4613                cs = gen8_emit_pipe_control(cs, 0, 0);
4614
4615        if (dc_flush_wa)
4616                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4617                                            0);
4618
4619        cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4620
4621        if (dc_flush_wa)
4622                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4623
4624        intel_ring_advance(request, cs);
4625
4626        return 0;
4627}
4628
4629static int gen11_emit_flush_render(struct i915_request *request,
4630                                   u32 mode)
4631{
4632        if (mode & EMIT_FLUSH) {
4633                u32 *cs;
4634                u32 flags = 0;
4635
4636                flags |= PIPE_CONTROL_CS_STALL;
4637
4638                flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4639                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4640                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4641                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4642                flags |= PIPE_CONTROL_FLUSH_ENABLE;
4643                flags |= PIPE_CONTROL_QW_WRITE;
4644                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4645
4646                cs = intel_ring_begin(request, 6);
4647                if (IS_ERR(cs))
4648                        return PTR_ERR(cs);
4649
4650                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4651                intel_ring_advance(request, cs);
4652        }
4653
4654        if (mode & EMIT_INVALIDATE) {
4655                u32 *cs;
4656                u32 flags = 0;
4657
4658                flags |= PIPE_CONTROL_CS_STALL;
4659
4660                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4661                flags |= PIPE_CONTROL_TLB_INVALIDATE;
4662                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4663                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4664                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4665                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4666                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4667                flags |= PIPE_CONTROL_QW_WRITE;
4668                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4669
4670                cs = intel_ring_begin(request, 6);
4671                if (IS_ERR(cs))
4672                        return PTR_ERR(cs);
4673
4674                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4675                intel_ring_advance(request, cs);
4676        }
4677
4678        return 0;
4679}
4680
4681static u32 preparser_disable(bool state)
4682{
4683        return MI_ARB_CHECK | 1 << 8 | state;
4684}
4685
4686static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4687{
4688        static const i915_reg_t vd[] = {
4689                GEN12_VD0_AUX_NV,
4690                GEN12_VD1_AUX_NV,
4691                GEN12_VD2_AUX_NV,
4692                GEN12_VD3_AUX_NV,
4693        };
4694
4695        static const i915_reg_t ve[] = {
4696                GEN12_VE0_AUX_NV,
4697                GEN12_VE1_AUX_NV,
4698        };
4699
4700        if (engine->class == VIDEO_DECODE_CLASS)
4701                return vd[engine->instance];
4702
4703        if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4704                return ve[engine->instance];
4705
4706        GEM_BUG_ON("unknown aux_inv_reg\n");
4707
4708        return INVALID_MMIO_REG;
4709}
4710
4711static u32 *
4712gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4713{
4714        *cs++ = MI_LOAD_REGISTER_IMM(1);
4715        *cs++ = i915_mmio_reg_offset(inv_reg);
4716        *cs++ = AUX_INV;
4717        *cs++ = MI_NOOP;
4718
4719        return cs;
4720}
4721
4722static int gen12_emit_flush_render(struct i915_request *request,
4723                                   u32 mode)
4724{
4725        if (mode & EMIT_FLUSH) {
4726                u32 flags = 0;
4727                u32 *cs;
4728
4729                flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4730                flags |= PIPE_CONTROL_FLUSH_L3;
4731                flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4732                flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4733                /* Wa_1409600907:tgl */
4734                flags |= PIPE_CONTROL_DEPTH_STALL;
4735                flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4736                flags |= PIPE_CONTROL_FLUSH_ENABLE;
4737
4738                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4739                flags |= PIPE_CONTROL_QW_WRITE;
4740
4741                flags |= PIPE_CONTROL_CS_STALL;
4742
4743                cs = intel_ring_begin(request, 6);
4744                if (IS_ERR(cs))
4745                        return PTR_ERR(cs);
4746
4747                cs = gen12_emit_pipe_control(cs,
4748                                             PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4749                                             flags, LRC_PPHWSP_SCRATCH_ADDR);
4750                intel_ring_advance(request, cs);
4751        }
4752
4753        if (mode & EMIT_INVALIDATE) {
4754                u32 flags = 0;
4755                u32 *cs;
4756
4757                flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4758                flags |= PIPE_CONTROL_TLB_INVALIDATE;
4759                flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4760                flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4761                flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4762                flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4763                flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4764
4765                flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4766                flags |= PIPE_CONTROL_QW_WRITE;
4767
4768                flags |= PIPE_CONTROL_CS_STALL;
4769
4770                cs = intel_ring_begin(request, 8 + 4);
4771                if (IS_ERR(cs))
4772                        return PTR_ERR(cs);
4773
4774                /*
4775                 * Prevent the pre-parser from skipping past the TLB
4776                 * invalidate and loading a stale page for the batch
4777                 * buffer / request payload.
4778                 */
4779                *cs++ = preparser_disable(true);
4780
4781                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4782
4783                /* hsdes: 1809175790 */
4784                cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4785
4786                *cs++ = preparser_disable(false);
4787                intel_ring_advance(request, cs);
4788        }
4789
4790        return 0;
4791}
4792
4793static int gen12_emit_flush(struct i915_request *request, u32 mode)
4794{
4795        intel_engine_mask_t aux_inv = 0;
4796        u32 cmd, *cs;
4797
4798        cmd = 4;
4799        if (mode & EMIT_INVALIDATE)
4800                cmd += 2;
4801        if (mode & EMIT_INVALIDATE)
4802                aux_inv = request->engine->mask & ~BIT(BCS0);
4803        if (aux_inv)
4804                cmd += 2 * hweight8(aux_inv) + 2;
4805
4806        cs = intel_ring_begin(request, cmd);
4807        if (IS_ERR(cs))
4808                return PTR_ERR(cs);
4809
4810        if (mode & EMIT_INVALIDATE)
4811                *cs++ = preparser_disable(true);
4812
4813        cmd = MI_FLUSH_DW + 1;
4814
4815        /* We always require a command barrier so that subsequent
4816         * commands, such as breadcrumb interrupts, are strictly ordered
4817         * wrt the contents of the write cache being flushed to memory
4818         * (and thus being coherent from the CPU).
4819         */
4820        cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4821
4822        if (mode & EMIT_INVALIDATE) {
4823                cmd |= MI_INVALIDATE_TLB;
4824                if (request->engine->class == VIDEO_DECODE_CLASS)
4825                        cmd |= MI_INVALIDATE_BSD;
4826        }
4827
4828        *cs++ = cmd;
4829        *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4830        *cs++ = 0; /* upper addr */
4831        *cs++ = 0; /* value */
4832
4833        if (aux_inv) { /* hsdes: 1809175790 */
4834                struct intel_engine_cs *engine;
4835                unsigned int tmp;
4836
4837                *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4838                for_each_engine_masked(engine, request->engine->gt,
4839                                       aux_inv, tmp) {
4840                        *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4841                        *cs++ = AUX_INV;
4842                }
4843                *cs++ = MI_NOOP;
4844        }
4845
4846        if (mode & EMIT_INVALIDATE)
4847                *cs++ = preparser_disable(false);
4848
4849        intel_ring_advance(request, cs);
4850
4851        return 0;
4852}
4853
4854static void assert_request_valid(struct i915_request *rq)
4855{
4856        struct intel_ring *ring __maybe_unused = rq->ring;
4857
4858        /* Can we unwind this request without appearing to go forwards? */
4859        GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4860}
4861
4862/*
4863 * Reserve space for 2 NOOPs at the end of each request to be
4864 * used as a workaround for not being allowed to do lite
4865 * restore with HEAD==TAIL (WaIdleLiteRestore).
4866 */
4867static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4868{
4869        /* Ensure there's always at least one preemption point per-request. */
4870        *cs++ = MI_ARB_CHECK;
4871        *cs++ = MI_NOOP;
4872        request->wa_tail = intel_ring_offset(request, cs);
4873
4874        /* Check that entire request is less than half the ring */
4875        assert_request_valid(request);
4876
4877        return cs;
4878}
4879
4880static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4881{
4882        *cs++ = MI_SEMAPHORE_WAIT |
4883                MI_SEMAPHORE_GLOBAL_GTT |
4884                MI_SEMAPHORE_POLL |
4885                MI_SEMAPHORE_SAD_EQ_SDD;
4886        *cs++ = 0;
4887        *cs++ = intel_hws_preempt_address(request->engine);
4888        *cs++ = 0;
4889
4890        return cs;
4891}
4892
4893static __always_inline u32*
4894gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4895{
4896        *cs++ = MI_USER_INTERRUPT;
4897
4898        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4899        if (intel_engine_has_semaphores(request->engine))
4900                cs = emit_preempt_busywait(request, cs);
4901
4902        request->tail = intel_ring_offset(request, cs);
4903        assert_ring_tail_valid(request->ring, request->tail);
4904
4905        return gen8_emit_wa_tail(request, cs);
4906}
4907
4908static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
4909{
4910        return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4911}
4912
4913static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4914{
4915        return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4916}
4917
4918static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4919{
4920        cs = gen8_emit_pipe_control(cs,
4921                                    PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4922                                    PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4923                                    PIPE_CONTROL_DC_FLUSH_ENABLE,
4924                                    0);
4925
4926        /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4927        cs = gen8_emit_ggtt_write_rcs(cs,
4928                                      request->fence.seqno,
4929                                      hwsp_offset(request),
4930                                      PIPE_CONTROL_FLUSH_ENABLE |
4931                                      PIPE_CONTROL_CS_STALL);
4932
4933        return gen8_emit_fini_breadcrumb_tail(request, cs);
4934}
4935
4936static u32 *
4937gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4938{
4939        cs = gen8_emit_ggtt_write_rcs(cs,
4940                                      request->fence.seqno,
4941                                      hwsp_offset(request),
4942                                      PIPE_CONTROL_CS_STALL |
4943                                      PIPE_CONTROL_TILE_CACHE_FLUSH |
4944                                      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4945                                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4946                                      PIPE_CONTROL_DC_FLUSH_ENABLE |
4947                                      PIPE_CONTROL_FLUSH_ENABLE);
4948
4949        return gen8_emit_fini_breadcrumb_tail(request, cs);
4950}
4951
4952/*
4953 * Note that the CS instruction pre-parser will not stall on the breadcrumb
4954 * flush and will continue pre-fetching the instructions after it before the
4955 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4956 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4957 * of the next request before the memory has been flushed, we're guaranteed that
4958 * we won't access the batch itself too early.
4959 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4960 * so, if the current request is modifying an instruction in the next request on
4961 * the same intel_context, we might pre-fetch and then execute the pre-update
4962 * instruction. To avoid this, the users of self-modifying code should either
4963 * disable the parser around the code emitting the memory writes, via a new flag
4964 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4965 * the in-kernel use-cases we've opted to use a separate context, see
4966 * reloc_gpu() as an example.
4967 * All the above applies only to the instructions themselves. Non-inline data
4968 * used by the instructions is not pre-fetched.
4969 */
4970
4971static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4972{
4973        *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4974                MI_SEMAPHORE_GLOBAL_GTT |
4975                MI_SEMAPHORE_POLL |
4976                MI_SEMAPHORE_SAD_EQ_SDD;
4977        *cs++ = 0;
4978        *cs++ = intel_hws_preempt_address(request->engine);
4979        *cs++ = 0;
4980        *cs++ = 0;
4981        *cs++ = MI_NOOP;
4982
4983        return cs;
4984}
4985
4986static __always_inline u32*
4987gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4988{
4989        *cs++ = MI_USER_INTERRUPT;
4990
4991        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4992        if (intel_engine_has_semaphores(request->engine))
4993                cs = gen12_emit_preempt_busywait(request, cs);
4994
4995        request->tail = intel_ring_offset(request, cs);
4996        assert_ring_tail_valid(request->ring, request->tail);
4997
4998        return gen8_emit_wa_tail(request, cs);
4999}
5000
5001static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
5002{
5003        /* XXX Stalling flush before seqno write; post-sync not */
5004        cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5005        return gen12_emit_fini_breadcrumb_tail(rq, cs);
5006}
5007
5008static u32 *
5009gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
5010{
5011        cs = gen12_emit_ggtt_write_rcs(cs,
5012                                       request->fence.seqno,
5013                                       hwsp_offset(request),
5014                                       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
5015                                       PIPE_CONTROL_CS_STALL |
5016                                       PIPE_CONTROL_TILE_CACHE_FLUSH |
5017                                       PIPE_CONTROL_FLUSH_L3 |
5018                                       PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
5019                                       PIPE_CONTROL_DEPTH_CACHE_FLUSH |
5020                                       /* Wa_1409600907:tgl */
5021                                       PIPE_CONTROL_DEPTH_STALL |
5022                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
5023                                       PIPE_CONTROL_FLUSH_ENABLE);
5024
5025        return gen12_emit_fini_breadcrumb_tail(request, cs);
5026}
5027
5028static void execlists_park(struct intel_engine_cs *engine)
5029{
5030        cancel_timer(&engine->execlists.timer);
5031        cancel_timer(&engine->execlists.preempt);
5032}
5033
5034void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5035{
5036        engine->submit_request = execlists_submit_request;
5037        engine->schedule = i915_schedule;
5038        engine->execlists.tasklet.func = execlists_submission_tasklet;
5039
5040        engine->reset.prepare = execlists_reset_prepare;
5041        engine->reset.rewind = execlists_reset_rewind;
5042        engine->reset.cancel = execlists_reset_cancel;
5043        engine->reset.finish = execlists_reset_finish;
5044
5045        engine->park = execlists_park;
5046        engine->unpark = NULL;
5047
5048        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5049        if (!intel_vgpu_active(engine->i915)) {
5050                engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5051                if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5052                        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5053                        if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5054                                engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5055                }
5056        }
5057
5058        if (INTEL_GEN(engine->i915) >= 12)
5059                engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5060
5061        if (intel_engine_has_preemption(engine))
5062                engine->emit_bb_start = gen8_emit_bb_start;
5063        else
5064                engine->emit_bb_start = gen8_emit_bb_start_noarb;
5065}
5066
5067static void execlists_shutdown(struct intel_engine_cs *engine)
5068{
5069        /* Synchronise with residual timers and any softirq they raise */
5070        del_timer_sync(&engine->execlists.timer);
5071        del_timer_sync(&engine->execlists.preempt);
5072        tasklet_kill(&engine->execlists.tasklet);
5073}
5074
5075static void execlists_release(struct intel_engine_cs *engine)
5076{
5077        engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5078
5079        execlists_shutdown(engine);
5080
5081        intel_engine_cleanup_common(engine);
5082        lrc_destroy_wa_ctx(engine);
5083}
5084
5085static void
5086logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5087{
5088        /* Default vfuncs which can be overriden by each engine. */
5089
5090        engine->resume = execlists_resume;
5091
5092        engine->cops = &execlists_context_ops;
5093        engine->request_alloc = execlists_request_alloc;
5094
5095        engine->emit_flush = gen8_emit_flush;
5096        engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5097        engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5098        if (INTEL_GEN(engine->i915) >= 12) {
5099                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5100                engine->emit_flush = gen12_emit_flush;
5101        }
5102        engine->set_default_submission = intel_execlists_set_default_submission;
5103
5104        if (INTEL_GEN(engine->i915) < 11) {
5105                engine->irq_enable = gen8_logical_ring_enable_irq;
5106                engine->irq_disable = gen8_logical_ring_disable_irq;
5107        } else {
5108                /*
5109                 * TODO: On Gen11 interrupt masks need to be clear
5110                 * to allow C6 entry. Keep interrupts enabled at
5111                 * and take the hit of generating extra interrupts
5112                 * until a more refined solution exists.
5113                 */
5114        }
5115}
5116
5117static inline void
5118logical_ring_default_irqs(struct intel_engine_cs *engine)
5119{
5120        unsigned int shift = 0;
5121
5122        if (INTEL_GEN(engine->i915) < 11) {
5123                const u8 irq_shifts[] = {
5124                        [RCS0]  = GEN8_RCS_IRQ_SHIFT,
5125                        [BCS0]  = GEN8_BCS_IRQ_SHIFT,
5126                        [VCS0]  = GEN8_VCS0_IRQ_SHIFT,
5127                        [VCS1]  = GEN8_VCS1_IRQ_SHIFT,
5128                        [VECS0] = GEN8_VECS_IRQ_SHIFT,
5129                };
5130
5131                shift = irq_shifts[engine->id];
5132        }
5133
5134        engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5135        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5136        engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5137        engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5138}
5139
5140static void rcs_submission_override(struct intel_engine_cs *engine)
5141{
5142        switch (INTEL_GEN(engine->i915)) {
5143        case 12:
5144                engine->emit_flush = gen12_emit_flush_render;
5145                engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5146                break;
5147        case 11:
5148                engine->emit_flush = gen11_emit_flush_render;
5149                engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5150                break;
5151        default:
5152                engine->emit_flush = gen8_emit_flush_render;
5153                engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5154                break;
5155        }
5156}
5157
5158int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5159{
5160        struct intel_engine_execlists * const execlists = &engine->execlists;
5161        struct drm_i915_private *i915 = engine->i915;
5162        struct intel_uncore *uncore = engine->uncore;
5163        u32 base = engine->mmio_base;
5164
5165        tasklet_init(&engine->execlists.tasklet,
5166                     execlists_submission_tasklet, (unsigned long)engine);
5167        timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5168        timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5169
5170        logical_ring_default_vfuncs(engine);
5171        logical_ring_default_irqs(engine);
5172
5173        if (engine->class == RENDER_CLASS)
5174                rcs_submission_override(engine);
5175
5176        if (intel_init_workaround_bb(engine))
5177                /*
5178                 * We continue even if we fail to initialize WA batch
5179                 * because we only expect rare glitches but nothing
5180                 * critical to prevent us from using GPU
5181                 */
5182                drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5183
5184        if (HAS_LOGICAL_RING_ELSQ(i915)) {
5185                execlists->submit_reg = uncore->regs +
5186                        i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5187                execlists->ctrl_reg = uncore->regs +
5188                        i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5189        } else {
5190                execlists->submit_reg = uncore->regs +
5191                        i915_mmio_reg_offset(RING_ELSP(base));
5192        }
5193
5194        execlists->csb_status =
5195                (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5196
5197        execlists->csb_write =
5198                &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5199
5200        if (INTEL_GEN(i915) < 11)
5201                execlists->csb_size = GEN8_CSB_ENTRIES;
5202        else
5203                execlists->csb_size = GEN11_CSB_ENTRIES;
5204
5205        if (INTEL_GEN(engine->i915) >= 11) {
5206                execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5207                execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5208        }
5209
5210        /* Finally, take ownership and responsibility for cleanup! */
5211        engine->sanitize = execlists_sanitize;
5212        engine->release = execlists_release;
5213
5214        return 0;
5215}
5216
5217static void init_common_reg_state(u32 * const regs,
5218                                  const struct intel_engine_cs *engine,
5219                                  const struct intel_ring *ring,
5220                                  bool inhibit)
5221{
5222        u32 ctl;
5223
5224        ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5225        ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5226        if (inhibit)
5227                ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5228        if (INTEL_GEN(engine->i915) < 11)
5229                ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5230                                           CTX_CTRL_RS_CTX_ENABLE);
5231        regs[CTX_CONTEXT_CONTROL] = ctl;
5232
5233        regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5234        regs[CTX_TIMESTAMP] = 0;
5235}
5236
5237static void init_wa_bb_reg_state(u32 * const regs,
5238                                 const struct intel_engine_cs *engine)
5239{
5240        const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5241
5242        if (wa_ctx->per_ctx.size) {
5243                const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5244
5245                GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5246                regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5247                        (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5248        }
5249
5250        if (wa_ctx->indirect_ctx.size) {
5251                lrc_ring_setup_indirect_ctx(regs, engine,
5252                                            i915_ggtt_offset(wa_ctx->vma) +
5253                                            wa_ctx->indirect_ctx.offset,
5254                                            wa_ctx->indirect_ctx.size);
5255        }
5256}
5257
5258static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5259{
5260        if (i915_vm_is_4lvl(&ppgtt->vm)) {
5261                /* 64b PPGTT (48bit canonical)
5262                 * PDP0_DESCRIPTOR contains the base address to PML4 and
5263                 * other PDP Descriptors are ignored.
5264                 */
5265                ASSIGN_CTX_PML4(ppgtt, regs);
5266        } else {
5267                ASSIGN_CTX_PDP(ppgtt, regs, 3);
5268                ASSIGN_CTX_PDP(ppgtt, regs, 2);
5269                ASSIGN_CTX_PDP(ppgtt, regs, 1);
5270                ASSIGN_CTX_PDP(ppgtt, regs, 0);
5271        }
5272}
5273
5274static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5275{
5276        if (i915_is_ggtt(vm))
5277                return i915_vm_to_ggtt(vm)->alias;
5278        else
5279                return i915_vm_to_ppgtt(vm);
5280}
5281
5282static void execlists_init_reg_state(u32 *regs,
5283                                     const struct intel_context *ce,
5284                                     const struct intel_engine_cs *engine,
5285                                     const struct intel_ring *ring,
5286                                     bool inhibit)
5287{
5288        /*
5289         * A context is actually a big batch buffer with several
5290         * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5291         * values we are setting here are only for the first context restore:
5292         * on a subsequent save, the GPU will recreate this batchbuffer with new
5293         * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5294         * we are not initializing here).
5295         *
5296         * Must keep consistent with virtual_update_register_offsets().
5297         */
5298        set_offsets(regs, reg_offsets(engine), engine, inhibit);
5299
5300        init_common_reg_state(regs, engine, ring, inhibit);
5301        init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5302
5303        init_wa_bb_reg_state(regs, engine);
5304
5305        __reset_stop_ring(regs, engine);
5306}
5307
5308static int
5309populate_lr_context(struct intel_context *ce,
5310                    struct drm_i915_gem_object *ctx_obj,
5311                    struct intel_engine_cs *engine,
5312                    struct intel_ring *ring)
5313{
5314        bool inhibit = true;
5315        void *vaddr;
5316
5317        vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5318        if (IS_ERR(vaddr)) {
5319                drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5320                return PTR_ERR(vaddr);
5321        }
5322
5323        set_redzone(vaddr, engine);
5324
5325        if (engine->default_state) {
5326                shmem_read(engine->default_state, 0,
5327                           vaddr, engine->context_size);
5328                __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5329                inhibit = false;
5330        }
5331
5332        /* Clear the ppHWSP (inc. per-context counters) */
5333        memset(vaddr, 0, PAGE_SIZE);
5334
5335        /*
5336         * The second page of the context object contains some registers which
5337         * must be set up prior to the first execution.
5338         */
5339        execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5340                                 ce, engine, ring, inhibit);
5341
5342        __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5343        i915_gem_object_unpin_map(ctx_obj);
5344        return 0;
5345}
5346
5347static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5348{
5349        struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
5350
5351        return intel_timeline_create_from_engine(ce->engine,
5352                                                 page_unmask_bits(tl));
5353}
5354
5355static int __execlists_context_alloc(struct intel_context *ce,
5356                                     struct intel_engine_cs *engine)
5357{
5358        struct drm_i915_gem_object *ctx_obj;
5359        struct intel_ring *ring;
5360        struct i915_vma *vma;
5361        u32 context_size;
5362        int ret;
5363
5364        GEM_BUG_ON(ce->state);
5365        context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5366
5367        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5368                context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5369
5370        if (INTEL_GEN(engine->i915) == 12) {
5371                ce->wa_bb_page = context_size / PAGE_SIZE;
5372                context_size += PAGE_SIZE;
5373        }
5374
5375        ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5376        if (IS_ERR(ctx_obj))
5377                return PTR_ERR(ctx_obj);
5378
5379        vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5380        if (IS_ERR(vma)) {
5381                ret = PTR_ERR(vma);
5382                goto error_deref_obj;
5383        }
5384
5385        if (!page_mask_bits(ce->timeline)) {
5386                struct intel_timeline *tl;
5387
5388                /*
5389                 * Use the static global HWSP for the kernel context, and
5390                 * a dynamically allocated cacheline for everyone else.
5391                 */
5392                if (unlikely(ce->timeline))
5393                        tl = pinned_timeline(ce);
5394                else
5395                        tl = intel_timeline_create(engine->gt);
5396                if (IS_ERR(tl)) {
5397                        ret = PTR_ERR(tl);
5398                        goto error_deref_obj;
5399                }
5400
5401                ce->timeline = tl;
5402        }
5403
5404        ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5405        if (IS_ERR(ring)) {
5406                ret = PTR_ERR(ring);
5407                goto error_deref_obj;
5408        }
5409
5410        ret = populate_lr_context(ce, ctx_obj, engine, ring);
5411        if (ret) {
5412                drm_dbg(&engine->i915->drm,
5413                        "Failed to populate LRC: %d\n", ret);
5414                goto error_ring_free;
5415        }
5416
5417        ce->ring = ring;
5418        ce->state = vma;
5419
5420        return 0;
5421
5422error_ring_free:
5423        intel_ring_put(ring);
5424error_deref_obj:
5425        i915_gem_object_put(ctx_obj);
5426        return ret;
5427}
5428
5429static struct list_head *virtual_queue(struct virtual_engine *ve)
5430{
5431        return &ve->base.execlists.default_priolist.requests[0];
5432}
5433
5434static void rcu_virtual_context_destroy(struct work_struct *wrk)
5435{
5436        struct virtual_engine *ve =
5437                container_of(wrk, typeof(*ve), rcu.work);
5438        unsigned int n;
5439
5440        GEM_BUG_ON(ve->context.inflight);
5441
5442        /* Preempt-to-busy may leave a stale request behind. */
5443        if (unlikely(ve->request)) {
5444                struct i915_request *old;
5445
5446                spin_lock_irq(&ve->base.active.lock);
5447
5448                old = fetch_and_zero(&ve->request);
5449                if (old) {
5450                        GEM_BUG_ON(!i915_request_completed(old));
5451                        __i915_request_submit(old);
5452                        i915_request_put(old);
5453                }
5454
5455                spin_unlock_irq(&ve->base.active.lock);
5456        }
5457
5458        /*
5459         * Flush the tasklet in case it is still running on another core.
5460         *
5461         * This needs to be done before we remove ourselves from the siblings'
5462         * rbtrees as in the case it is running in parallel, it may reinsert
5463         * the rb_node into a sibling.
5464         */
5465        tasklet_kill(&ve->base.execlists.tasklet);
5466
5467        /* Decouple ourselves from the siblings, no more access allowed. */
5468        for (n = 0; n < ve->num_siblings; n++) {
5469                struct intel_engine_cs *sibling = ve->siblings[n];
5470                struct rb_node *node = &ve->nodes[sibling->id].rb;
5471
5472                if (RB_EMPTY_NODE(node))
5473                        continue;
5474
5475                spin_lock_irq(&sibling->active.lock);
5476
5477                /* Detachment is lazily performed in the execlists tasklet */
5478                if (!RB_EMPTY_NODE(node))
5479                        rb_erase_cached(node, &sibling->execlists.virtual);
5480
5481                spin_unlock_irq(&sibling->active.lock);
5482        }
5483        GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5484        GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5485
5486        if (ve->context.state)
5487                __execlists_context_fini(&ve->context);
5488        intel_context_fini(&ve->context);
5489
5490        intel_breadcrumbs_free(ve->base.breadcrumbs);
5491        intel_engine_free_request_pool(&ve->base);
5492
5493        kfree(ve->bonds);
5494        kfree(ve);
5495}
5496
5497static void virtual_context_destroy(struct kref *kref)
5498{
5499        struct virtual_engine *ve =
5500                container_of(kref, typeof(*ve), context.ref);
5501
5502        GEM_BUG_ON(!list_empty(&ve->context.signals));
5503
5504        /*
5505         * When destroying the virtual engine, we have to be aware that
5506         * it may still be in use from an hardirq/softirq context causing
5507         * the resubmission of a completed request (background completion
5508         * due to preempt-to-busy). Before we can free the engine, we need
5509         * to flush the submission code and tasklets that are still potentially
5510         * accessing the engine. Flushing the tasklets requires process context,
5511         * and since we can guard the resubmit onto the engine with an RCU read
5512         * lock, we can delegate the free of the engine to an RCU worker.
5513         */
5514        INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5515        queue_rcu_work(system_wq, &ve->rcu);
5516}
5517
5518static void virtual_engine_initial_hint(struct virtual_engine *ve)
5519{
5520        int swp;
5521
5522        /*
5523         * Pick a random sibling on starting to help spread the load around.
5524         *
5525         * New contexts are typically created with exactly the same order
5526         * of siblings, and often started in batches. Due to the way we iterate
5527         * the array of sibling when submitting requests, sibling[0] is
5528         * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5529         * randomised across the system, we also help spread the load by the
5530         * first engine we inspect being different each time.
5531         *
5532         * NB This does not force us to execute on this engine, it will just
5533         * typically be the first we inspect for submission.
5534         */
5535        swp = prandom_u32_max(ve->num_siblings);
5536        if (swp)
5537                swap(ve->siblings[swp], ve->siblings[0]);
5538}
5539
5540static int virtual_context_alloc(struct intel_context *ce)
5541{
5542        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5543
5544        return __execlists_context_alloc(ce, ve->siblings[0]);
5545}
5546
5547static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5548{
5549        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5550
5551        /* Note: we must use a real engine class for setting up reg state */
5552        return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5553}
5554
5555static void virtual_context_enter(struct intel_context *ce)
5556{
5557        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5558        unsigned int n;
5559
5560        for (n = 0; n < ve->num_siblings; n++)
5561                intel_engine_pm_get(ve->siblings[n]);
5562
5563        intel_timeline_enter(ce->timeline);
5564}
5565
5566static void virtual_context_exit(struct intel_context *ce)
5567{
5568        struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5569        unsigned int n;
5570
5571        intel_timeline_exit(ce->timeline);
5572
5573        for (n = 0; n < ve->num_siblings; n++)
5574                intel_engine_pm_put(ve->siblings[n]);
5575}
5576
5577static const struct intel_context_ops virtual_context_ops = {
5578        .alloc = virtual_context_alloc,
5579
5580        .pre_pin = execlists_context_pre_pin,
5581        .pin = virtual_context_pin,
5582        .unpin = execlists_context_unpin,
5583        .post_unpin = execlists_context_post_unpin,
5584
5585        .enter = virtual_context_enter,
5586        .exit = virtual_context_exit,
5587
5588        .destroy = virtual_context_destroy,
5589};
5590
5591static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5592{
5593        struct i915_request *rq;
5594        intel_engine_mask_t mask;
5595
5596        rq = READ_ONCE(ve->request);
5597        if (!rq)
5598                return 0;
5599
5600        /* The rq is ready for submission; rq->execution_mask is now stable. */
5601        mask = rq->execution_mask;
5602        if (unlikely(!mask)) {
5603                /* Invalid selection, submit to a random engine in error */
5604                i915_request_set_error_once(rq, -ENODEV);
5605                mask = ve->siblings[0]->mask;
5606        }
5607
5608        ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5609                     rq->fence.context, rq->fence.seqno,
5610                     mask, ve->base.execlists.queue_priority_hint);
5611
5612        return mask;
5613}
5614
5615static void virtual_submission_tasklet(unsigned long data)
5616{
5617        struct virtual_engine * const ve = (struct virtual_engine *)data;
5618        const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5619        intel_engine_mask_t mask;
5620        unsigned int n;
5621
5622        rcu_read_lock();
5623        mask = virtual_submission_mask(ve);
5624        rcu_read_unlock();
5625        if (unlikely(!mask))
5626                return;
5627
5628        local_irq_disable();
5629        for (n = 0; n < ve->num_siblings; n++) {
5630                struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5631                struct ve_node * const node = &ve->nodes[sibling->id];
5632                struct rb_node **parent, *rb;
5633                bool first;
5634
5635                if (!READ_ONCE(ve->request))
5636                        break; /* already handled by a sibling's tasklet */
5637
5638                if (unlikely(!(mask & sibling->mask))) {
5639                        if (!RB_EMPTY_NODE(&node->rb)) {
5640                                spin_lock(&sibling->active.lock);
5641                                rb_erase_cached(&node->rb,
5642                                                &sibling->execlists.virtual);
5643                                RB_CLEAR_NODE(&node->rb);
5644                                spin_unlock(&sibling->active.lock);
5645                        }
5646                        continue;
5647                }
5648
5649                spin_lock(&sibling->active.lock);
5650
5651                if (!RB_EMPTY_NODE(&node->rb)) {
5652                        /*
5653                         * Cheat and avoid rebalancing the tree if we can
5654                         * reuse this node in situ.
5655                         */
5656                        first = rb_first_cached(&sibling->execlists.virtual) ==
5657                                &node->rb;
5658                        if (prio == node->prio || (prio > node->prio && first))
5659                                goto submit_engine;
5660
5661                        rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5662                }
5663
5664                rb = NULL;
5665                first = true;
5666                parent = &sibling->execlists.virtual.rb_root.rb_node;
5667                while (*parent) {
5668                        struct ve_node *other;
5669
5670                        rb = *parent;
5671                        other = rb_entry(rb, typeof(*other), rb);
5672                        if (prio > other->prio) {
5673                                parent = &rb->rb_left;
5674                        } else {
5675                                parent = &rb->rb_right;
5676                                first = false;
5677                        }
5678                }
5679
5680                rb_link_node(&node->rb, rb, parent);
5681                rb_insert_color_cached(&node->rb,
5682                                       &sibling->execlists.virtual,
5683                                       first);
5684
5685submit_engine:
5686                GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5687                node->prio = prio;
5688                if (first && prio > sibling->execlists.queue_priority_hint)
5689                        tasklet_hi_schedule(&sibling->execlists.tasklet);
5690
5691                spin_unlock(&sibling->active.lock);
5692        }
5693        local_irq_enable();
5694}
5695
5696static void virtual_submit_request(struct i915_request *rq)
5697{
5698        struct virtual_engine *ve = to_virtual_engine(rq->engine);
5699        struct i915_request *old;
5700        unsigned long flags;
5701
5702        ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5703                     rq->fence.context,
5704                     rq->fence.seqno);
5705
5706        GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5707
5708        spin_lock_irqsave(&ve->base.active.lock, flags);
5709
5710        old = ve->request;
5711        if (old) { /* background completion event from preempt-to-busy */
5712                GEM_BUG_ON(!i915_request_completed(old));
5713                __i915_request_submit(old);
5714                i915_request_put(old);
5715        }
5716
5717        if (i915_request_completed(rq)) {
5718                __i915_request_submit(rq);
5719
5720                ve->base.execlists.queue_priority_hint = INT_MIN;
5721                ve->request = NULL;
5722        } else {
5723                ve->base.execlists.queue_priority_hint = rq_prio(rq);
5724                ve->request = i915_request_get(rq);
5725
5726                GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5727                list_move_tail(&rq->sched.link, virtual_queue(ve));
5728
5729                tasklet_hi_schedule(&ve->base.execlists.tasklet);
5730        }
5731
5732        spin_unlock_irqrestore(&ve->base.active.lock, flags);
5733}
5734
5735static struct ve_bond *
5736virtual_find_bond(struct virtual_engine *ve,
5737                  const struct intel_engine_cs *master)
5738{
5739        int i;
5740
5741        for (i = 0; i < ve->num_bonds; i++) {
5742                if (ve->bonds[i].master == master)
5743                        return &ve->bonds[i];
5744        }
5745
5746        return NULL;
5747}
5748
5749static void
5750virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5751{
5752        struct virtual_engine *ve = to_virtual_engine(rq->engine);
5753        intel_engine_mask_t allowed, exec;
5754        struct ve_bond *bond;
5755
5756        allowed = ~to_request(signal)->engine->mask;
5757
5758        bond = virtual_find_bond(ve, to_request(signal)->engine);
5759        if (bond)
5760                allowed &= bond->sibling_mask;
5761
5762        /* Restrict the bonded request to run on only the available engines */
5763        exec = READ_ONCE(rq->execution_mask);
5764        while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5765                ;
5766
5767        /* Prevent the master from being re-run on the bonded engines */
5768        to_request(signal)->execution_mask &= ~allowed;
5769}
5770
5771struct intel_context *
5772intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5773                               unsigned int count)
5774{
5775        struct virtual_engine *ve;
5776        unsigned int n;
5777        int err;
5778
5779        if (count == 0)
5780                return ERR_PTR(-EINVAL);
5781
5782        if (count == 1)
5783                return intel_context_create(siblings[0]);
5784
5785        ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5786        if (!ve)
5787                return ERR_PTR(-ENOMEM);
5788
5789        ve->base.i915 = siblings[0]->i915;
5790        ve->base.gt = siblings[0]->gt;
5791        ve->base.uncore = siblings[0]->uncore;
5792        ve->base.id = -1;
5793
5794        ve->base.class = OTHER_CLASS;
5795        ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5796        ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5797        ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5798
5799        /*
5800         * The decision on whether to submit a request using semaphores
5801         * depends on the saturated state of the engine. We only compute
5802         * this during HW submission of the request, and we need for this
5803         * state to be globally applied to all requests being submitted
5804         * to this engine. Virtual engines encompass more than one physical
5805         * engine and so we cannot accurately tell in advance if one of those
5806         * engines is already saturated and so cannot afford to use a semaphore
5807         * and be pessimized in priority for doing so -- if we are the only
5808         * context using semaphores after all other clients have stopped, we
5809         * will be starved on the saturated system. Such a global switch for
5810         * semaphores is less than ideal, but alas is the current compromise.
5811         */
5812        ve->base.saturated = ALL_ENGINES;
5813
5814        snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5815
5816        intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5817        intel_engine_init_execlists(&ve->base);
5818
5819        ve->base.cops = &virtual_context_ops;
5820        ve->base.request_alloc = execlists_request_alloc;
5821
5822        ve->base.schedule = i915_schedule;
5823        ve->base.submit_request = virtual_submit_request;
5824        ve->base.bond_execute = virtual_bond_execute;
5825
5826        INIT_LIST_HEAD(virtual_queue(ve));
5827        ve->base.execlists.queue_priority_hint = INT_MIN;
5828        tasklet_init(&ve->base.execlists.tasklet,
5829                     virtual_submission_tasklet,
5830                     (unsigned long)ve);
5831
5832        intel_context_init(&ve->context, &ve->base);
5833
5834        ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
5835        if (!ve->base.breadcrumbs) {
5836                err = -ENOMEM;
5837                goto err_put;
5838        }
5839
5840        for (n = 0; n < count; n++) {
5841                struct intel_engine_cs *sibling = siblings[n];
5842
5843                GEM_BUG_ON(!is_power_of_2(sibling->mask));
5844                if (sibling->mask & ve->base.mask) {
5845                        DRM_DEBUG("duplicate %s entry in load balancer\n",
5846                                  sibling->name);
5847                        err = -EINVAL;
5848                        goto err_put;
5849                }
5850
5851                /*
5852                 * The virtual engine implementation is tightly coupled to
5853                 * the execlists backend -- we push out request directly
5854                 * into a tree inside each physical engine. We could support
5855                 * layering if we handle cloning of the requests and
5856                 * submitting a copy into each backend.
5857                 */
5858                if (sibling->execlists.tasklet.func !=
5859                    execlists_submission_tasklet) {
5860                        err = -ENODEV;
5861                        goto err_put;
5862                }
5863
5864                GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5865                RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5866
5867                ve->siblings[ve->num_siblings++] = sibling;
5868                ve->base.mask |= sibling->mask;
5869
5870                /*
5871                 * All physical engines must be compatible for their emission
5872                 * functions (as we build the instructions during request
5873                 * construction and do not alter them before submission
5874                 * on the physical engine). We use the engine class as a guide
5875                 * here, although that could be refined.
5876                 */
5877                if (ve->base.class != OTHER_CLASS) {
5878                        if (ve->base.class != sibling->class) {
5879                                DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5880                                          sibling->class, ve->base.class);
5881                                err = -EINVAL;
5882                                goto err_put;
5883                        }
5884                        continue;
5885                }
5886
5887                ve->base.class = sibling->class;
5888                ve->base.uabi_class = sibling->uabi_class;
5889                snprintf(ve->base.name, sizeof(ve->base.name),
5890                         "v%dx%d", ve->base.class, count);
5891                ve->base.context_size = sibling->context_size;
5892
5893                ve->base.emit_bb_start = sibling->emit_bb_start;
5894                ve->base.emit_flush = sibling->emit_flush;
5895                ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5896                ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5897                ve->base.emit_fini_breadcrumb_dw =
5898                        sibling->emit_fini_breadcrumb_dw;
5899
5900                ve->base.flags = sibling->flags;
5901        }
5902
5903        ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5904
5905        virtual_engine_initial_hint(ve);
5906        return &ve->context;
5907
5908err_put:
5909        intel_context_put(&ve->context);
5910        return ERR_PTR(err);
5911}
5912
5913struct intel_context *
5914intel_execlists_clone_virtual(struct intel_engine_cs *src)
5915{
5916        struct virtual_engine *se = to_virtual_engine(src);
5917        struct intel_context *dst;
5918
5919        dst = intel_execlists_create_virtual(se->siblings,
5920                                             se->num_siblings);
5921        if (IS_ERR(dst))
5922                return dst;
5923
5924        if (se->num_bonds) {
5925                struct virtual_engine *de = to_virtual_engine(dst->engine);
5926
5927                de->bonds = kmemdup(se->bonds,
5928                                    sizeof(*se->bonds) * se->num_bonds,
5929                                    GFP_KERNEL);
5930                if (!de->bonds) {
5931                        intel_context_put(dst);
5932                        return ERR_PTR(-ENOMEM);
5933                }
5934
5935                de->num_bonds = se->num_bonds;
5936        }
5937
5938        return dst;
5939}
5940
5941int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5942                                     const struct intel_engine_cs *master,
5943                                     const struct intel_engine_cs *sibling)
5944{
5945        struct virtual_engine *ve = to_virtual_engine(engine);
5946        struct ve_bond *bond;
5947        int n;
5948
5949        /* Sanity check the sibling is part of the virtual engine */
5950        for (n = 0; n < ve->num_siblings; n++)
5951                if (sibling == ve->siblings[n])
5952                        break;
5953        if (n == ve->num_siblings)
5954                return -EINVAL;
5955
5956        bond = virtual_find_bond(ve, master);
5957        if (bond) {
5958                bond->sibling_mask |= sibling->mask;
5959                return 0;
5960        }
5961
5962        bond = krealloc(ve->bonds,
5963                        sizeof(*bond) * (ve->num_bonds + 1),
5964                        GFP_KERNEL);
5965        if (!bond)
5966                return -ENOMEM;
5967
5968        bond[ve->num_bonds].master = master;
5969        bond[ve->num_bonds].sibling_mask = sibling->mask;
5970
5971        ve->bonds = bond;
5972        ve->num_bonds++;
5973
5974        return 0;
5975}
5976
5977struct intel_engine_cs *
5978intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5979                                 unsigned int sibling)
5980{
5981        struct virtual_engine *ve = to_virtual_engine(engine);
5982
5983        if (sibling >= ve->num_siblings)
5984                return NULL;
5985
5986        return ve->siblings[sibling];
5987}
5988
5989void intel_execlists_show_requests(struct intel_engine_cs *engine,
5990                                   struct drm_printer *m,
5991                                   void (*show_request)(struct drm_printer *m,
5992                                                        struct i915_request *rq,
5993                                                        const char *prefix),
5994                                   unsigned int max)
5995{
5996        const struct intel_engine_execlists *execlists = &engine->execlists;
5997        struct i915_request *rq, *last;
5998        unsigned long flags;
5999        unsigned int count;
6000        struct rb_node *rb;
6001
6002        spin_lock_irqsave(&engine->active.lock, flags);
6003
6004        last = NULL;
6005        count = 0;
6006        list_for_each_entry(rq, &engine->active.requests, sched.link) {
6007                if (count++ < max - 1)
6008                        show_request(m, rq, "\t\tE ");
6009                else
6010                        last = rq;
6011        }
6012        if (last) {
6013                if (count > max) {
6014                        drm_printf(m,
6015                                   "\t\t...skipping %d executing requests...\n",
6016                                   count - max);
6017                }
6018                show_request(m, last, "\t\tE ");
6019        }
6020
6021        if (execlists->switch_priority_hint != INT_MIN)
6022                drm_printf(m, "\t\tSwitch priority hint: %d\n",
6023                           READ_ONCE(execlists->switch_priority_hint));
6024        if (execlists->queue_priority_hint != INT_MIN)
6025                drm_printf(m, "\t\tQueue priority hint: %d\n",
6026                           READ_ONCE(execlists->queue_priority_hint));
6027
6028        last = NULL;
6029        count = 0;
6030        for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
6031                struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
6032                int i;
6033
6034                priolist_for_each_request(rq, p, i) {
6035                        if (count++ < max - 1)
6036                                show_request(m, rq, "\t\tQ ");
6037                        else
6038                                last = rq;
6039                }
6040        }
6041        if (last) {
6042                if (count > max) {
6043                        drm_printf(m,
6044                                   "\t\t...skipping %d queued requests...\n",
6045                                   count - max);
6046                }
6047                show_request(m, last, "\t\tQ ");
6048        }
6049
6050        last = NULL;
6051        count = 0;
6052        for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
6053                struct virtual_engine *ve =
6054                        rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
6055                struct i915_request *rq = READ_ONCE(ve->request);
6056
6057                if (rq) {
6058                        if (count++ < max - 1)
6059                                show_request(m, rq, "\t\tV ");
6060                        else
6061                                last = rq;
6062                }
6063        }
6064        if (last) {
6065                if (count > max) {
6066                        drm_printf(m,
6067                                   "\t\t...skipping %d virtual requests...\n",
6068                                   count - max);
6069                }
6070                show_request(m, last, "\t\tV ");
6071        }
6072
6073        spin_unlock_irqrestore(&engine->active.lock, flags);
6074}
6075
6076void intel_lr_context_reset(struct intel_engine_cs *engine,
6077                            struct intel_context *ce,
6078                            u32 head,
6079                            bool scrub)
6080{
6081        GEM_BUG_ON(!intel_context_is_pinned(ce));
6082
6083        /*
6084         * We want a simple context + ring to execute the breadcrumb update.
6085         * We cannot rely on the context being intact across the GPU hang,
6086         * so clear it and rebuild just what we need for the breadcrumb.
6087         * All pending requests for this context will be zapped, and any
6088         * future request will be after userspace has had the opportunity
6089         * to recreate its own state.
6090         */
6091        if (scrub)
6092                restore_default_state(ce, engine);
6093
6094        /* Rerun the request; its payload has been neutered (if guilty). */
6095        __execlists_update_reg_state(ce, engine, head);
6096}
6097
6098bool
6099intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6100{
6101        return engine->set_default_submission ==
6102               intel_execlists_set_default_submission;
6103}
6104
6105#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6106#include "selftest_lrc.c"
6107#endif
6108