linux/drivers/gpu/drm/i915/gvt/execlist.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 * SOFTWARE.
  22 *
  23 * Authors:
  24 *    Zhiyuan Lv <zhiyuan.lv@intel.com>
  25 *    Zhi Wang <zhi.a.wang@intel.com>
  26 *
  27 * Contributors:
  28 *    Min He <min.he@intel.com>
  29 *    Bing Niu <bing.niu@intel.com>
  30 *    Ping Gao <ping.a.gao@intel.com>
  31 *    Tina Zhang <tina.zhang@intel.com>
  32 *
  33 */
  34
  35#include "i915_drv.h"
  36#include "gvt.h"
  37
  38#define _EL_OFFSET_STATUS       0x234
  39#define _EL_OFFSET_STATUS_BUF   0x370
  40#define _EL_OFFSET_STATUS_PTR   0x3A0
  41
  42#define execlist_ring_mmio(e, offset) ((e)->mmio_base + (offset))
  43
  44#define valid_context(ctx) ((ctx)->valid)
  45#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
  46                ((a)->lrca == (b)->lrca))
  47
  48static int context_switch_events[] = {
  49        [RCS0]  = RCS_AS_CONTEXT_SWITCH,
  50        [BCS0]  = BCS_AS_CONTEXT_SWITCH,
  51        [VCS0]  = VCS_AS_CONTEXT_SWITCH,
  52        [VCS1]  = VCS2_AS_CONTEXT_SWITCH,
  53        [VECS0] = VECS_AS_CONTEXT_SWITCH,
  54};
  55
  56static int to_context_switch_event(const struct intel_engine_cs *engine)
  57{
  58        if (WARN_ON(engine->id >= ARRAY_SIZE(context_switch_events)))
  59                return -EINVAL;
  60
  61        return context_switch_events[engine->id];
  62}
  63
  64static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
  65{
  66        gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
  67                        execlist->running_slot ?
  68                        execlist->running_slot->index : -1,
  69                        execlist->running_context ?
  70                        execlist->running_context->context_id : 0,
  71                        execlist->pending_slot ?
  72                        execlist->pending_slot->index : -1);
  73
  74        execlist->running_slot = execlist->pending_slot;
  75        execlist->pending_slot = NULL;
  76        execlist->running_context = execlist->running_context ?
  77                &execlist->running_slot->ctx[0] : NULL;
  78
  79        gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
  80                        execlist->running_slot ?
  81                        execlist->running_slot->index : -1,
  82                        execlist->running_context ?
  83                        execlist->running_context->context_id : 0,
  84                        execlist->pending_slot ?
  85                        execlist->pending_slot->index : -1);
  86}
  87
  88static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
  89{
  90        struct intel_vgpu_execlist_slot *running = execlist->running_slot;
  91        struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
  92        struct execlist_ctx_descriptor_format *desc = execlist->running_context;
  93        struct intel_vgpu *vgpu = execlist->vgpu;
  94        struct execlist_status_format status;
  95        u32 status_reg =
  96                execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
  97
  98        status.ldw = vgpu_vreg(vgpu, status_reg);
  99        status.udw = vgpu_vreg(vgpu, status_reg + 4);
 100
 101        if (running) {
 102                status.current_execlist_pointer = !!running->index;
 103                status.execlist_write_pointer = !!!running->index;
 104                status.execlist_0_active = status.execlist_0_valid =
 105                        !!!(running->index);
 106                status.execlist_1_active = status.execlist_1_valid =
 107                        !!(running->index);
 108        } else {
 109                status.context_id = 0;
 110                status.execlist_0_active = status.execlist_0_valid = 0;
 111                status.execlist_1_active = status.execlist_1_valid = 0;
 112        }
 113
 114        status.context_id = desc ? desc->context_id : 0;
 115        status.execlist_queue_full = !!(pending);
 116
 117        vgpu_vreg(vgpu, status_reg) = status.ldw;
 118        vgpu_vreg(vgpu, status_reg + 4) = status.udw;
 119
 120        gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
 121                vgpu->id, status_reg, status.ldw, status.udw);
 122}
 123
 124static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
 125                               struct execlist_context_status_format *status,
 126                               bool trigger_interrupt_later)
 127{
 128        struct intel_vgpu *vgpu = execlist->vgpu;
 129        struct execlist_context_status_pointer_format ctx_status_ptr;
 130        u32 write_pointer;
 131        u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
 132        unsigned long hwsp_gpa;
 133
 134        ctx_status_ptr_reg =
 135                execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_PTR);
 136        ctx_status_buf_reg =
 137                execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_BUF);
 138
 139        ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
 140
 141        write_pointer = ctx_status_ptr.write_ptr;
 142
 143        if (write_pointer == 0x7)
 144                write_pointer = 0;
 145        else {
 146                ++write_pointer;
 147                write_pointer %= 0x6;
 148        }
 149
 150        offset = ctx_status_buf_reg + write_pointer * 8;
 151
 152        vgpu_vreg(vgpu, offset) = status->ldw;
 153        vgpu_vreg(vgpu, offset + 4) = status->udw;
 154
 155        ctx_status_ptr.write_ptr = write_pointer;
 156        vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 157
 158        /* Update the CSB and CSB write pointer in HWSP */
 159        hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 160                                         vgpu->hws_pga[execlist->engine->id]);
 161        if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
 162                intel_gvt_hypervisor_write_gpa(vgpu,
 163                                               hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + write_pointer * 8,
 164                                               status, 8);
 165                intel_gvt_hypervisor_write_gpa(vgpu,
 166                                               hwsp_gpa + intel_hws_csb_write_index(execlist->engine->i915) * 4,
 167                                               &write_pointer, 4);
 168        }
 169
 170        gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
 171                   vgpu->id, write_pointer, offset, status->ldw, status->udw);
 172
 173        if (trigger_interrupt_later)
 174                return;
 175
 176        intel_vgpu_trigger_virtual_event(vgpu,
 177                                         to_context_switch_event(execlist->engine));
 178}
 179
 180static int emulate_execlist_ctx_schedule_out(
 181                struct intel_vgpu_execlist *execlist,
 182                struct execlist_ctx_descriptor_format *ctx)
 183{
 184        struct intel_vgpu *vgpu = execlist->vgpu;
 185        struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 186        struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
 187        struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
 188        struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
 189        struct execlist_context_status_format status;
 190
 191        memset(&status, 0, sizeof(status));
 192
 193        gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
 194
 195        if (WARN_ON(!same_context(ctx, execlist->running_context))) {
 196                gvt_vgpu_err("schedule out context is not running context,"
 197                                "ctx id %x running ctx id %x\n",
 198                                ctx->context_id,
 199                                execlist->running_context->context_id);
 200                return -EINVAL;
 201        }
 202
 203        /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
 204        if (valid_context(ctx1) && same_context(ctx0, ctx)) {
 205                gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
 206
 207                execlist->running_context = ctx1;
 208
 209                emulate_execlist_status(execlist);
 210
 211                status.context_complete = status.element_switch = 1;
 212                status.context_id = ctx->context_id;
 213
 214                emulate_csb_update(execlist, &status, false);
 215                /*
 216                 * ctx1 is not valid, ctx == ctx0
 217                 * ctx1 is valid, ctx1 == ctx
 218                 *      --> last element is finished
 219                 * emulate:
 220                 *      active-to-idle if there is *no* pending execlist
 221                 *      context-complete if there *is* pending execlist
 222                 */
 223        } else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
 224                        || (valid_context(ctx1) && same_context(ctx1, ctx))) {
 225                gvt_dbg_el("need to switch virtual execlist slot\n");
 226
 227                switch_virtual_execlist_slot(execlist);
 228
 229                emulate_execlist_status(execlist);
 230
 231                status.context_complete = status.active_to_idle = 1;
 232                status.context_id = ctx->context_id;
 233
 234                if (!pending) {
 235                        emulate_csb_update(execlist, &status, false);
 236                } else {
 237                        emulate_csb_update(execlist, &status, true);
 238
 239                        memset(&status, 0, sizeof(status));
 240
 241                        status.idle_to_active = 1;
 242                        status.context_id = 0;
 243
 244                        emulate_csb_update(execlist, &status, false);
 245                }
 246        } else {
 247                WARN_ON(1);
 248                return -EINVAL;
 249        }
 250
 251        return 0;
 252}
 253
 254static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
 255                struct intel_vgpu_execlist *execlist)
 256{
 257        struct intel_vgpu *vgpu = execlist->vgpu;
 258        u32 status_reg =
 259                execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
 260        struct execlist_status_format status;
 261
 262        status.ldw = vgpu_vreg(vgpu, status_reg);
 263        status.udw = vgpu_vreg(vgpu, status_reg + 4);
 264
 265        if (status.execlist_queue_full) {
 266                gvt_vgpu_err("virtual execlist slots are full\n");
 267                return NULL;
 268        }
 269
 270        return &execlist->slot[status.execlist_write_pointer];
 271}
 272
 273static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 274                struct execlist_ctx_descriptor_format ctx[2])
 275{
 276        struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 277        struct intel_vgpu_execlist_slot *slot =
 278                get_next_execlist_slot(execlist);
 279
 280        struct execlist_ctx_descriptor_format *ctx0, *ctx1;
 281        struct execlist_context_status_format status;
 282        struct intel_vgpu *vgpu = execlist->vgpu;
 283
 284        gvt_dbg_el("emulate schedule-in\n");
 285
 286        if (!slot) {
 287                gvt_vgpu_err("no available execlist slot\n");
 288                return -EINVAL;
 289        }
 290
 291        memset(&status, 0, sizeof(status));
 292        memset(slot->ctx, 0, sizeof(slot->ctx));
 293
 294        slot->ctx[0] = ctx[0];
 295        slot->ctx[1] = ctx[1];
 296
 297        gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
 298                        slot->index, ctx[0].context_id,
 299                        ctx[1].context_id);
 300
 301        /*
 302         * no running execlist, make this write bundle as running execlist
 303         * -> idle-to-active
 304         */
 305        if (!running) {
 306                gvt_dbg_el("no current running execlist\n");
 307
 308                execlist->running_slot = slot;
 309                execlist->pending_slot = NULL;
 310                execlist->running_context = &slot->ctx[0];
 311
 312                gvt_dbg_el("running slot index %d running context %x\n",
 313                                execlist->running_slot->index,
 314                                execlist->running_context->context_id);
 315
 316                emulate_execlist_status(execlist);
 317
 318                status.idle_to_active = 1;
 319                status.context_id = 0;
 320
 321                emulate_csb_update(execlist, &status, false);
 322                return 0;
 323        }
 324
 325        ctx0 = &running->ctx[0];
 326        ctx1 = &running->ctx[1];
 327
 328        gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
 329                running->index, ctx0->context_id, ctx1->context_id);
 330
 331        /*
 332         * already has an running execlist
 333         *      a. running ctx1 is valid,
 334         *         ctx0 is finished, and running ctx1 == new execlist ctx[0]
 335         *      b. running ctx1 is not valid,
 336         *         ctx0 == new execlist ctx[0]
 337         * ----> lite-restore + preempted
 338         */
 339        if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
 340                /* condition a */
 341                (!same_context(ctx0, execlist->running_context))) ||
 342                        (!valid_context(ctx1) &&
 343                         same_context(ctx0, &slot->ctx[0]))) { /* condition b */
 344                gvt_dbg_el("need to switch virtual execlist slot\n");
 345
 346                execlist->pending_slot = slot;
 347                switch_virtual_execlist_slot(execlist);
 348
 349                emulate_execlist_status(execlist);
 350
 351                status.lite_restore = status.preempted = 1;
 352                status.context_id = ctx[0].context_id;
 353
 354                emulate_csb_update(execlist, &status, false);
 355        } else {
 356                gvt_dbg_el("emulate as pending slot\n");
 357                /*
 358                 * otherwise
 359                 * --> emulate pending execlist exist + but no preemption case
 360                 */
 361                execlist->pending_slot = slot;
 362                emulate_execlist_status(execlist);
 363        }
 364        return 0;
 365}
 366
 367#define get_desc_from_elsp_dwords(ed, i) \
 368        ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
 369
 370static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
 371{
 372        struct intel_vgpu *vgpu = workload->vgpu;
 373        struct intel_vgpu_submission *s = &vgpu->submission;
 374        struct execlist_ctx_descriptor_format ctx[2];
 375        int ret;
 376
 377        if (!workload->emulate_schedule_in)
 378                return 0;
 379
 380        ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
 381        ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
 382
 383        ret = emulate_execlist_schedule_in(&s->execlist[workload->engine->id],
 384                                           ctx);
 385        if (ret) {
 386                gvt_vgpu_err("fail to emulate execlist schedule in\n");
 387                return ret;
 388        }
 389        return 0;
 390}
 391
 392static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 393{
 394        struct intel_vgpu *vgpu = workload->vgpu;
 395        struct intel_vgpu_submission *s = &vgpu->submission;
 396        struct intel_vgpu_execlist *execlist =
 397                &s->execlist[workload->engine->id];
 398        struct intel_vgpu_workload *next_workload;
 399        struct list_head *next = workload_q_head(vgpu, workload->engine)->next;
 400        bool lite_restore = false;
 401        int ret = 0;
 402
 403        gvt_dbg_el("complete workload %p status %d\n",
 404                   workload, workload->status);
 405
 406        if (workload->status || vgpu->resetting_eng & workload->engine->mask)
 407                goto out;
 408
 409        if (!list_empty(workload_q_head(vgpu, workload->engine))) {
 410                struct execlist_ctx_descriptor_format *this_desc, *next_desc;
 411
 412                next_workload = container_of(next,
 413                                struct intel_vgpu_workload, list);
 414                this_desc = &workload->ctx_desc;
 415                next_desc = &next_workload->ctx_desc;
 416
 417                lite_restore = same_context(this_desc, next_desc);
 418        }
 419
 420        if (lite_restore) {
 421                gvt_dbg_el("next context == current - no schedule-out\n");
 422                goto out;
 423        }
 424
 425        ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
 426out:
 427        return ret;
 428}
 429
 430static int submit_context(struct intel_vgpu *vgpu,
 431                          const struct intel_engine_cs *engine,
 432                          struct execlist_ctx_descriptor_format *desc,
 433                          bool emulate_schedule_in)
 434{
 435        struct intel_vgpu_submission *s = &vgpu->submission;
 436        struct intel_vgpu_workload *workload = NULL;
 437
 438        workload = intel_vgpu_create_workload(vgpu, engine, desc);
 439        if (IS_ERR(workload))
 440                return PTR_ERR(workload);
 441
 442        workload->prepare = prepare_execlist_workload;
 443        workload->complete = complete_execlist_workload;
 444        workload->emulate_schedule_in = emulate_schedule_in;
 445
 446        if (emulate_schedule_in)
 447                workload->elsp_dwords = s->execlist[engine->id].elsp_dwords;
 448
 449        gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
 450                   emulate_schedule_in);
 451
 452        intel_vgpu_queue_workload(workload);
 453        return 0;
 454}
 455
 456int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu,
 457                               const struct intel_engine_cs *engine)
 458{
 459        struct intel_vgpu_submission *s = &vgpu->submission;
 460        struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
 461        struct execlist_ctx_descriptor_format *desc[2];
 462        int i, ret;
 463
 464        desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
 465        desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
 466
 467        if (!desc[0]->valid) {
 468                gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
 469                goto inv_desc;
 470        }
 471
 472        for (i = 0; i < ARRAY_SIZE(desc); i++) {
 473                if (!desc[i]->valid)
 474                        continue;
 475                if (!desc[i]->privilege_access) {
 476                        gvt_vgpu_err("unexpected GGTT elsp submission\n");
 477                        goto inv_desc;
 478                }
 479        }
 480
 481        /* submit workload */
 482        for (i = 0; i < ARRAY_SIZE(desc); i++) {
 483                if (!desc[i]->valid)
 484                        continue;
 485                ret = submit_context(vgpu, engine, desc[i], i == 0);
 486                if (ret) {
 487                        gvt_vgpu_err("failed to submit desc %d\n", i);
 488                        return ret;
 489                }
 490        }
 491
 492        return 0;
 493
 494inv_desc:
 495        gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
 496                     desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
 497        return -EINVAL;
 498}
 499
 500static void init_vgpu_execlist(struct intel_vgpu *vgpu,
 501                               const struct intel_engine_cs *engine)
 502{
 503        struct intel_vgpu_submission *s = &vgpu->submission;
 504        struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
 505        struct execlist_context_status_pointer_format ctx_status_ptr;
 506        u32 ctx_status_ptr_reg;
 507
 508        memset(execlist, 0, sizeof(*execlist));
 509
 510        execlist->vgpu = vgpu;
 511        execlist->engine = engine;
 512        execlist->slot[0].index = 0;
 513        execlist->slot[1].index = 1;
 514
 515        ctx_status_ptr_reg = execlist_ring_mmio(engine, _EL_OFFSET_STATUS_PTR);
 516        ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
 517        ctx_status_ptr.read_ptr = 0;
 518        ctx_status_ptr.write_ptr = 0x7;
 519        vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 520}
 521
 522static void clean_execlist(struct intel_vgpu *vgpu,
 523                           intel_engine_mask_t engine_mask)
 524{
 525        struct intel_vgpu_submission *s = &vgpu->submission;
 526        struct intel_engine_cs *engine;
 527        intel_engine_mask_t tmp;
 528
 529        for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
 530                kfree(s->ring_scan_buffer[engine->id]);
 531                s->ring_scan_buffer[engine->id] = NULL;
 532                s->ring_scan_buffer_size[engine->id] = 0;
 533        }
 534}
 535
 536static void reset_execlist(struct intel_vgpu *vgpu,
 537                           intel_engine_mask_t engine_mask)
 538{
 539        struct intel_engine_cs *engine;
 540        intel_engine_mask_t tmp;
 541
 542        for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp)
 543                init_vgpu_execlist(vgpu, engine);
 544}
 545
 546static int init_execlist(struct intel_vgpu *vgpu,
 547                         intel_engine_mask_t engine_mask)
 548{
 549        reset_execlist(vgpu, engine_mask);
 550        return 0;
 551}
 552
 553const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
 554        .name = "execlist",
 555        .init = init_execlist,
 556        .reset = reset_execlist,
 557        .clean = clean_execlist,
 558};
 559