linux/drivers/gpu/drm/i915/gt/selftest_lrc.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2018 Intel Corporation
   5 */
   6
   7#include <linux/prime_numbers.h>
   8
   9#include "gem/i915_gem_pm.h"
  10#include "gt/intel_engine_heartbeat.h"
  11#include "gt/intel_reset.h"
  12#include "gt/selftest_engine_heartbeat.h"
  13
  14#include "i915_selftest.h"
  15#include "selftests/i915_random.h"
  16#include "selftests/igt_flush_test.h"
  17#include "selftests/igt_live_test.h"
  18#include "selftests/igt_spinner.h"
  19#include "selftests/lib_sw_fence.h"
  20
  21#include "gem/selftests/igt_gem_utils.h"
  22#include "gem/selftests/mock_context.h"
  23
  24#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  25#define NUM_GPR 16
  26#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
  27
  28static struct i915_vma *create_scratch(struct intel_gt *gt)
  29{
  30        struct drm_i915_gem_object *obj;
  31        struct i915_vma *vma;
  32        int err;
  33
  34        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
  35        if (IS_ERR(obj))
  36                return ERR_CAST(obj);
  37
  38        i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
  39
  40        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  41        if (IS_ERR(vma)) {
  42                i915_gem_object_put(obj);
  43                return vma;
  44        }
  45
  46        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
  47        if (err) {
  48                i915_gem_object_put(obj);
  49                return ERR_PTR(err);
  50        }
  51
  52        return vma;
  53}
  54
  55static bool is_active(struct i915_request *rq)
  56{
  57        if (i915_request_is_active(rq))
  58                return true;
  59
  60        if (i915_request_on_hold(rq))
  61                return true;
  62
  63        if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
  64                return true;
  65
  66        return false;
  67}
  68
  69static int wait_for_submit(struct intel_engine_cs *engine,
  70                           struct i915_request *rq,
  71                           unsigned long timeout)
  72{
  73        timeout += jiffies;
  74        do {
  75                bool done = time_after(jiffies, timeout);
  76
  77                if (i915_request_completed(rq)) /* that was quick! */
  78                        return 0;
  79
  80                /* Wait until the HW has acknowleged the submission (or err) */
  81                intel_engine_flush_submission(engine);
  82                if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
  83                        return 0;
  84
  85                if (done)
  86                        return -ETIME;
  87
  88                cond_resched();
  89        } while (1);
  90}
  91
  92static int wait_for_reset(struct intel_engine_cs *engine,
  93                          struct i915_request *rq,
  94                          unsigned long timeout)
  95{
  96        timeout += jiffies;
  97
  98        do {
  99                cond_resched();
 100                intel_engine_flush_submission(engine);
 101
 102                if (READ_ONCE(engine->execlists.pending[0]))
 103                        continue;
 104
 105                if (i915_request_completed(rq))
 106                        break;
 107
 108                if (READ_ONCE(rq->fence.error))
 109                        break;
 110        } while (time_before(jiffies, timeout));
 111
 112        flush_scheduled_work();
 113
 114        if (rq->fence.error != -EIO) {
 115                pr_err("%s: hanging request %llx:%lld not reset\n",
 116                       engine->name,
 117                       rq->fence.context,
 118                       rq->fence.seqno);
 119                return -EINVAL;
 120        }
 121
 122        /* Give the request a jiffie to complete after flushing the worker */
 123        if (i915_request_wait(rq, 0,
 124                              max(0l, (long)(timeout - jiffies)) + 1) < 0) {
 125                pr_err("%s: hanging request %llx:%lld did not complete\n",
 126                       engine->name,
 127                       rq->fence.context,
 128                       rq->fence.seqno);
 129                return -ETIME;
 130        }
 131
 132        return 0;
 133}
 134
 135static int live_sanitycheck(void *arg)
 136{
 137        struct intel_gt *gt = arg;
 138        struct intel_engine_cs *engine;
 139        enum intel_engine_id id;
 140        struct igt_spinner spin;
 141        int err = 0;
 142
 143        if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 144                return 0;
 145
 146        if (igt_spinner_init(&spin, gt))
 147                return -ENOMEM;
 148
 149        for_each_engine(engine, gt, id) {
 150                struct intel_context *ce;
 151                struct i915_request *rq;
 152
 153                ce = intel_context_create(engine);
 154                if (IS_ERR(ce)) {
 155                        err = PTR_ERR(ce);
 156                        break;
 157                }
 158
 159                rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 160                if (IS_ERR(rq)) {
 161                        err = PTR_ERR(rq);
 162                        goto out_ctx;
 163                }
 164
 165                i915_request_add(rq);
 166                if (!igt_wait_for_spinner(&spin, rq)) {
 167                        GEM_TRACE("spinner failed to start\n");
 168                        GEM_TRACE_DUMP();
 169                        intel_gt_set_wedged(gt);
 170                        err = -EIO;
 171                        goto out_ctx;
 172                }
 173
 174                igt_spinner_end(&spin);
 175                if (igt_flush_test(gt->i915)) {
 176                        err = -EIO;
 177                        goto out_ctx;
 178                }
 179
 180out_ctx:
 181                intel_context_put(ce);
 182                if (err)
 183                        break;
 184        }
 185
 186        igt_spinner_fini(&spin);
 187        return err;
 188}
 189
 190static int live_unlite_restore(struct intel_gt *gt, int prio)
 191{
 192        struct intel_engine_cs *engine;
 193        enum intel_engine_id id;
 194        struct igt_spinner spin;
 195        int err = -ENOMEM;
 196
 197        /*
 198         * Check that we can correctly context switch between 2 instances
 199         * on the same engine from the same parent context.
 200         */
 201
 202        if (igt_spinner_init(&spin, gt))
 203                return err;
 204
 205        err = 0;
 206        for_each_engine(engine, gt, id) {
 207                struct intel_context *ce[2] = {};
 208                struct i915_request *rq[2];
 209                struct igt_live_test t;
 210                int n;
 211
 212                if (prio && !intel_engine_has_preemption(engine))
 213                        continue;
 214
 215                if (!intel_engine_can_store_dword(engine))
 216                        continue;
 217
 218                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 219                        err = -EIO;
 220                        break;
 221                }
 222                st_engine_heartbeat_disable(engine);
 223
 224                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 225                        struct intel_context *tmp;
 226
 227                        tmp = intel_context_create(engine);
 228                        if (IS_ERR(tmp)) {
 229                                err = PTR_ERR(tmp);
 230                                goto err_ce;
 231                        }
 232
 233                        err = intel_context_pin(tmp);
 234                        if (err) {
 235                                intel_context_put(tmp);
 236                                goto err_ce;
 237                        }
 238
 239                        /*
 240                         * Setup the pair of contexts such that if we
 241                         * lite-restore using the RING_TAIL from ce[1] it
 242                         * will execute garbage from ce[0]->ring.
 243                         */
 244                        memset(tmp->ring->vaddr,
 245                               POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 246                               tmp->ring->vma->size);
 247
 248                        ce[n] = tmp;
 249                }
 250                GEM_BUG_ON(!ce[1]->ring->size);
 251                intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 252                __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
 253
 254                rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 255                if (IS_ERR(rq[0])) {
 256                        err = PTR_ERR(rq[0]);
 257                        goto err_ce;
 258                }
 259
 260                i915_request_get(rq[0]);
 261                i915_request_add(rq[0]);
 262                GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 263
 264                if (!igt_wait_for_spinner(&spin, rq[0])) {
 265                        i915_request_put(rq[0]);
 266                        goto err_ce;
 267                }
 268
 269                rq[1] = i915_request_create(ce[1]);
 270                if (IS_ERR(rq[1])) {
 271                        err = PTR_ERR(rq[1]);
 272                        i915_request_put(rq[0]);
 273                        goto err_ce;
 274                }
 275
 276                if (!prio) {
 277                        /*
 278                         * Ensure we do the switch to ce[1] on completion.
 279                         *
 280                         * rq[0] is already submitted, so this should reduce
 281                         * to a no-op (a wait on a request on the same engine
 282                         * uses the submit fence, not the completion fence),
 283                         * but it will install a dependency on rq[1] for rq[0]
 284                         * that will prevent the pair being reordered by
 285                         * timeslicing.
 286                         */
 287                        i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 288                }
 289
 290                i915_request_get(rq[1]);
 291                i915_request_add(rq[1]);
 292                GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 293                i915_request_put(rq[0]);
 294
 295                if (prio) {
 296                        struct i915_sched_attr attr = {
 297                                .priority = prio,
 298                        };
 299
 300                        /* Alternatively preempt the spinner with ce[1] */
 301                        engine->schedule(rq[1], &attr);
 302                }
 303
 304                /* And switch back to ce[0] for good measure */
 305                rq[0] = i915_request_create(ce[0]);
 306                if (IS_ERR(rq[0])) {
 307                        err = PTR_ERR(rq[0]);
 308                        i915_request_put(rq[1]);
 309                        goto err_ce;
 310                }
 311
 312                i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 313                i915_request_get(rq[0]);
 314                i915_request_add(rq[0]);
 315                GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 316                i915_request_put(rq[1]);
 317                i915_request_put(rq[0]);
 318
 319err_ce:
 320                intel_engine_flush_submission(engine);
 321                igt_spinner_end(&spin);
 322                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 323                        if (IS_ERR_OR_NULL(ce[n]))
 324                                break;
 325
 326                        intel_context_unpin(ce[n]);
 327                        intel_context_put(ce[n]);
 328                }
 329
 330                st_engine_heartbeat_enable(engine);
 331                if (igt_live_test_end(&t))
 332                        err = -EIO;
 333                if (err)
 334                        break;
 335        }
 336
 337        igt_spinner_fini(&spin);
 338        return err;
 339}
 340
 341static int live_unlite_switch(void *arg)
 342{
 343        return live_unlite_restore(arg, 0);
 344}
 345
 346static int live_unlite_preempt(void *arg)
 347{
 348        return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 349}
 350
 351static int live_unlite_ring(void *arg)
 352{
 353        struct intel_gt *gt = arg;
 354        struct intel_engine_cs *engine;
 355        struct igt_spinner spin;
 356        enum intel_engine_id id;
 357        int err = 0;
 358
 359        /*
 360         * Setup a preemption event that will cause almost the entire ring
 361         * to be unwound, potentially fooling our intel_ring_direction()
 362         * into emitting a forward lite-restore instead of the rollback.
 363         */
 364
 365        if (igt_spinner_init(&spin, gt))
 366                return -ENOMEM;
 367
 368        for_each_engine(engine, gt, id) {
 369                struct intel_context *ce[2] = {};
 370                struct i915_request *rq;
 371                struct igt_live_test t;
 372                int n;
 373
 374                if (!intel_engine_has_preemption(engine))
 375                        continue;
 376
 377                if (!intel_engine_can_store_dword(engine))
 378                        continue;
 379
 380                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 381                        err = -EIO;
 382                        break;
 383                }
 384                st_engine_heartbeat_disable(engine);
 385
 386                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 387                        struct intel_context *tmp;
 388
 389                        tmp = intel_context_create(engine);
 390                        if (IS_ERR(tmp)) {
 391                                err = PTR_ERR(tmp);
 392                                goto err_ce;
 393                        }
 394
 395                        err = intel_context_pin(tmp);
 396                        if (err) {
 397                                intel_context_put(tmp);
 398                                goto err_ce;
 399                        }
 400
 401                        memset32(tmp->ring->vaddr,
 402                                 0xdeadbeef, /* trigger a hang if executed */
 403                                 tmp->ring->vma->size / sizeof(u32));
 404
 405                        ce[n] = tmp;
 406                }
 407
 408                /* Create max prio spinner, followed by N low prio nops */
 409                rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 410                if (IS_ERR(rq)) {
 411                        err = PTR_ERR(rq);
 412                        goto err_ce;
 413                }
 414
 415                i915_request_get(rq);
 416                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 417                i915_request_add(rq);
 418
 419                if (!igt_wait_for_spinner(&spin, rq)) {
 420                        intel_gt_set_wedged(gt);
 421                        i915_request_put(rq);
 422                        err = -ETIME;
 423                        goto err_ce;
 424                }
 425
 426                /* Fill the ring, until we will cause a wrap */
 427                n = 0;
 428                while (intel_ring_direction(ce[0]->ring,
 429                                            rq->wa_tail,
 430                                            ce[0]->ring->tail) <= 0) {
 431                        struct i915_request *tmp;
 432
 433                        tmp = intel_context_create_request(ce[0]);
 434                        if (IS_ERR(tmp)) {
 435                                err = PTR_ERR(tmp);
 436                                i915_request_put(rq);
 437                                goto err_ce;
 438                        }
 439
 440                        i915_request_add(tmp);
 441                        intel_engine_flush_submission(engine);
 442                        n++;
 443                }
 444                intel_engine_flush_submission(engine);
 445                pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
 446                         engine->name, n,
 447                         ce[0]->ring->size,
 448                         ce[0]->ring->tail,
 449                         ce[0]->ring->emit,
 450                         rq->tail);
 451                GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
 452                                                rq->tail,
 453                                                ce[0]->ring->tail) <= 0);
 454                i915_request_put(rq);
 455
 456                /* Create a second ring to preempt the first ring after rq[0] */
 457                rq = intel_context_create_request(ce[1]);
 458                if (IS_ERR(rq)) {
 459                        err = PTR_ERR(rq);
 460                        goto err_ce;
 461                }
 462
 463                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 464                i915_request_get(rq);
 465                i915_request_add(rq);
 466
 467                err = wait_for_submit(engine, rq, HZ / 2);
 468                i915_request_put(rq);
 469                if (err) {
 470                        pr_err("%s: preemption request was not submitted\n",
 471                               engine->name);
 472                        err = -ETIME;
 473                }
 474
 475                pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
 476                         engine->name,
 477                         ce[0]->ring->tail, ce[0]->ring->emit,
 478                         ce[1]->ring->tail, ce[1]->ring->emit);
 479
 480err_ce:
 481                intel_engine_flush_submission(engine);
 482                igt_spinner_end(&spin);
 483                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 484                        if (IS_ERR_OR_NULL(ce[n]))
 485                                break;
 486
 487                        intel_context_unpin(ce[n]);
 488                        intel_context_put(ce[n]);
 489                }
 490                st_engine_heartbeat_enable(engine);
 491                if (igt_live_test_end(&t))
 492                        err = -EIO;
 493                if (err)
 494                        break;
 495        }
 496
 497        igt_spinner_fini(&spin);
 498        return err;
 499}
 500
 501static int live_pin_rewind(void *arg)
 502{
 503        struct intel_gt *gt = arg;
 504        struct intel_engine_cs *engine;
 505        enum intel_engine_id id;
 506        int err = 0;
 507
 508        /*
 509         * We have to be careful not to trust intel_ring too much, for example
 510         * ring->head is updated upon retire which is out of sync with pinning
 511         * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
 512         * or else we risk writing an older, stale value.
 513         *
 514         * To simulate this, let's apply a bit of deliberate sabotague.
 515         */
 516
 517        for_each_engine(engine, gt, id) {
 518                struct intel_context *ce;
 519                struct i915_request *rq;
 520                struct intel_ring *ring;
 521                struct igt_live_test t;
 522
 523                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 524                        err = -EIO;
 525                        break;
 526                }
 527
 528                ce = intel_context_create(engine);
 529                if (IS_ERR(ce)) {
 530                        err = PTR_ERR(ce);
 531                        break;
 532                }
 533
 534                err = intel_context_pin(ce);
 535                if (err) {
 536                        intel_context_put(ce);
 537                        break;
 538                }
 539
 540                /* Keep the context awake while we play games */
 541                err = i915_active_acquire(&ce->active);
 542                if (err) {
 543                        intel_context_unpin(ce);
 544                        intel_context_put(ce);
 545                        break;
 546                }
 547                ring = ce->ring;
 548
 549                /* Poison the ring, and offset the next request from HEAD */
 550                memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
 551                ring->emit = ring->size / 2;
 552                ring->tail = ring->emit;
 553                GEM_BUG_ON(ring->head);
 554
 555                intel_context_unpin(ce);
 556
 557                /* Submit a simple nop request */
 558                GEM_BUG_ON(intel_context_is_pinned(ce));
 559                rq = intel_context_create_request(ce);
 560                i915_active_release(&ce->active); /* e.g. async retire */
 561                intel_context_put(ce);
 562                if (IS_ERR(rq)) {
 563                        err = PTR_ERR(rq);
 564                        break;
 565                }
 566                GEM_BUG_ON(!rq->head);
 567                i915_request_add(rq);
 568
 569                /* Expect not to hang! */
 570                if (igt_live_test_end(&t)) {
 571                        err = -EIO;
 572                        break;
 573                }
 574        }
 575
 576        return err;
 577}
 578
 579static int live_hold_reset(void *arg)
 580{
 581        struct intel_gt *gt = arg;
 582        struct intel_engine_cs *engine;
 583        enum intel_engine_id id;
 584        struct igt_spinner spin;
 585        int err = 0;
 586
 587        /*
 588         * In order to support offline error capture for fast preempt reset,
 589         * we need to decouple the guilty request and ensure that it and its
 590         * descendents are not executed while the capture is in progress.
 591         */
 592
 593        if (!intel_has_reset_engine(gt))
 594                return 0;
 595
 596        if (igt_spinner_init(&spin, gt))
 597                return -ENOMEM;
 598
 599        for_each_engine(engine, gt, id) {
 600                struct intel_context *ce;
 601                struct i915_request *rq;
 602
 603                ce = intel_context_create(engine);
 604                if (IS_ERR(ce)) {
 605                        err = PTR_ERR(ce);
 606                        break;
 607                }
 608
 609                st_engine_heartbeat_disable(engine);
 610
 611                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 612                if (IS_ERR(rq)) {
 613                        err = PTR_ERR(rq);
 614                        goto out;
 615                }
 616                i915_request_add(rq);
 617
 618                if (!igt_wait_for_spinner(&spin, rq)) {
 619                        intel_gt_set_wedged(gt);
 620                        err = -ETIME;
 621                        goto out;
 622                }
 623
 624                /* We have our request executing, now remove it and reset */
 625
 626                if (test_and_set_bit(I915_RESET_ENGINE + id,
 627                                     &gt->reset.flags)) {
 628                        intel_gt_set_wedged(gt);
 629                        err = -EBUSY;
 630                        goto out;
 631                }
 632                tasklet_disable(&engine->execlists.tasklet);
 633
 634                engine->execlists.tasklet.func(engine->execlists.tasklet.data);
 635                GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 636
 637                i915_request_get(rq);
 638                execlists_hold(engine, rq);
 639                GEM_BUG_ON(!i915_request_on_hold(rq));
 640
 641                intel_engine_reset(engine, NULL);
 642                GEM_BUG_ON(rq->fence.error != -EIO);
 643
 644                tasklet_enable(&engine->execlists.tasklet);
 645                clear_and_wake_up_bit(I915_RESET_ENGINE + id,
 646                                      &gt->reset.flags);
 647
 648                /* Check that we do not resubmit the held request */
 649                if (!i915_request_wait(rq, 0, HZ / 5)) {
 650                        pr_err("%s: on hold request completed!\n",
 651                               engine->name);
 652                        i915_request_put(rq);
 653                        err = -EIO;
 654                        goto out;
 655                }
 656                GEM_BUG_ON(!i915_request_on_hold(rq));
 657
 658                /* But is resubmitted on release */
 659                execlists_unhold(engine, rq);
 660                if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 661                        pr_err("%s: held request did not complete!\n",
 662                               engine->name);
 663                        intel_gt_set_wedged(gt);
 664                        err = -ETIME;
 665                }
 666                i915_request_put(rq);
 667
 668out:
 669                st_engine_heartbeat_enable(engine);
 670                intel_context_put(ce);
 671                if (err)
 672                        break;
 673        }
 674
 675        igt_spinner_fini(&spin);
 676        return err;
 677}
 678
 679static const char *error_repr(int err)
 680{
 681        return err ? "bad" : "good";
 682}
 683
 684static int live_error_interrupt(void *arg)
 685{
 686        static const struct error_phase {
 687                enum { GOOD = 0, BAD = -EIO } error[2];
 688        } phases[] = {
 689                { { BAD,  GOOD } },
 690                { { BAD,  BAD  } },
 691                { { BAD,  GOOD } },
 692                { { GOOD, GOOD } }, /* sentinel */
 693        };
 694        struct intel_gt *gt = arg;
 695        struct intel_engine_cs *engine;
 696        enum intel_engine_id id;
 697
 698        /*
 699         * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
 700         * of invalid commands in user batches that will cause a GPU hang.
 701         * This is a faster mechanism than using hangcheck/heartbeats, but
 702         * only detects problems the HW knows about -- it will not warn when
 703         * we kill the HW!
 704         *
 705         * To verify our detection and reset, we throw some invalid commands
 706         * at the HW and wait for the interrupt.
 707         */
 708
 709        if (!intel_has_reset_engine(gt))
 710                return 0;
 711
 712        for_each_engine(engine, gt, id) {
 713                const struct error_phase *p;
 714                int err = 0;
 715
 716                st_engine_heartbeat_disable(engine);
 717
 718                for (p = phases; p->error[0] != GOOD; p++) {
 719                        struct i915_request *client[ARRAY_SIZE(phases->error)];
 720                        u32 *cs;
 721                        int i;
 722
 723                        memset(client, 0, sizeof(*client));
 724                        for (i = 0; i < ARRAY_SIZE(client); i++) {
 725                                struct intel_context *ce;
 726                                struct i915_request *rq;
 727
 728                                ce = intel_context_create(engine);
 729                                if (IS_ERR(ce)) {
 730                                        err = PTR_ERR(ce);
 731                                        goto out;
 732                                }
 733
 734                                rq = intel_context_create_request(ce);
 735                                intel_context_put(ce);
 736                                if (IS_ERR(rq)) {
 737                                        err = PTR_ERR(rq);
 738                                        goto out;
 739                                }
 740
 741                                if (rq->engine->emit_init_breadcrumb) {
 742                                        err = rq->engine->emit_init_breadcrumb(rq);
 743                                        if (err) {
 744                                                i915_request_add(rq);
 745                                                goto out;
 746                                        }
 747                                }
 748
 749                                cs = intel_ring_begin(rq, 2);
 750                                if (IS_ERR(cs)) {
 751                                        i915_request_add(rq);
 752                                        err = PTR_ERR(cs);
 753                                        goto out;
 754                                }
 755
 756                                if (p->error[i]) {
 757                                        *cs++ = 0xdeadbeef;
 758                                        *cs++ = 0xdeadbeef;
 759                                } else {
 760                                        *cs++ = MI_NOOP;
 761                                        *cs++ = MI_NOOP;
 762                                }
 763
 764                                client[i] = i915_request_get(rq);
 765                                i915_request_add(rq);
 766                        }
 767
 768                        err = wait_for_submit(engine, client[0], HZ / 2);
 769                        if (err) {
 770                                pr_err("%s: first request did not start within time!\n",
 771                                       engine->name);
 772                                err = -ETIME;
 773                                goto out;
 774                        }
 775
 776                        for (i = 0; i < ARRAY_SIZE(client); i++) {
 777                                if (i915_request_wait(client[i], 0, HZ / 5) < 0)
 778                                        pr_debug("%s: %s request incomplete!\n",
 779                                                 engine->name,
 780                                                 error_repr(p->error[i]));
 781
 782                                if (!i915_request_started(client[i])) {
 783                                        pr_err("%s: %s request not started!\n",
 784                                               engine->name,
 785                                               error_repr(p->error[i]));
 786                                        err = -ETIME;
 787                                        goto out;
 788                                }
 789
 790                                /* Kick the tasklet to process the error */
 791                                intel_engine_flush_submission(engine);
 792                                if (client[i]->fence.error != p->error[i]) {
 793                                        pr_err("%s: %s request (%s) with wrong error code: %d\n",
 794                                               engine->name,
 795                                               error_repr(p->error[i]),
 796                                               i915_request_completed(client[i]) ? "completed" : "running",
 797                                               client[i]->fence.error);
 798                                        err = -EINVAL;
 799                                        goto out;
 800                                }
 801                        }
 802
 803out:
 804                        for (i = 0; i < ARRAY_SIZE(client); i++)
 805                                if (client[i])
 806                                        i915_request_put(client[i]);
 807                        if (err) {
 808                                pr_err("%s: failed at phase[%zd] { %d, %d }\n",
 809                                       engine->name, p - phases,
 810                                       p->error[0], p->error[1]);
 811                                break;
 812                        }
 813                }
 814
 815                st_engine_heartbeat_enable(engine);
 816                if (err) {
 817                        intel_gt_set_wedged(gt);
 818                        return err;
 819                }
 820        }
 821
 822        return 0;
 823}
 824
 825static int
 826emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 827{
 828        u32 *cs;
 829
 830        cs = intel_ring_begin(rq, 10);
 831        if (IS_ERR(cs))
 832                return PTR_ERR(cs);
 833
 834        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 835
 836        *cs++ = MI_SEMAPHORE_WAIT |
 837                MI_SEMAPHORE_GLOBAL_GTT |
 838                MI_SEMAPHORE_POLL |
 839                MI_SEMAPHORE_SAD_NEQ_SDD;
 840        *cs++ = 0;
 841        *cs++ = i915_ggtt_offset(vma) + 4 * idx;
 842        *cs++ = 0;
 843
 844        if (idx > 0) {
 845                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 846                *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 847                *cs++ = 0;
 848                *cs++ = 1;
 849        } else {
 850                *cs++ = MI_NOOP;
 851                *cs++ = MI_NOOP;
 852                *cs++ = MI_NOOP;
 853                *cs++ = MI_NOOP;
 854        }
 855
 856        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 857
 858        intel_ring_advance(rq, cs);
 859        return 0;
 860}
 861
 862static struct i915_request *
 863semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 864{
 865        struct intel_context *ce;
 866        struct i915_request *rq;
 867        int err;
 868
 869        ce = intel_context_create(engine);
 870        if (IS_ERR(ce))
 871                return ERR_CAST(ce);
 872
 873        rq = intel_context_create_request(ce);
 874        if (IS_ERR(rq))
 875                goto out_ce;
 876
 877        err = 0;
 878        if (rq->engine->emit_init_breadcrumb)
 879                err = rq->engine->emit_init_breadcrumb(rq);
 880        if (err == 0)
 881                err = emit_semaphore_chain(rq, vma, idx);
 882        if (err == 0)
 883                i915_request_get(rq);
 884        i915_request_add(rq);
 885        if (err)
 886                rq = ERR_PTR(err);
 887
 888out_ce:
 889        intel_context_put(ce);
 890        return rq;
 891}
 892
 893static int
 894release_queue(struct intel_engine_cs *engine,
 895              struct i915_vma *vma,
 896              int idx, int prio)
 897{
 898        struct i915_sched_attr attr = {
 899                .priority = prio,
 900        };
 901        struct i915_request *rq;
 902        u32 *cs;
 903
 904        rq = intel_engine_create_kernel_request(engine);
 905        if (IS_ERR(rq))
 906                return PTR_ERR(rq);
 907
 908        cs = intel_ring_begin(rq, 4);
 909        if (IS_ERR(cs)) {
 910                i915_request_add(rq);
 911                return PTR_ERR(cs);
 912        }
 913
 914        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 915        *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 916        *cs++ = 0;
 917        *cs++ = 1;
 918
 919        intel_ring_advance(rq, cs);
 920
 921        i915_request_get(rq);
 922        i915_request_add(rq);
 923
 924        local_bh_disable();
 925        engine->schedule(rq, &attr);
 926        local_bh_enable(); /* kick tasklet */
 927
 928        i915_request_put(rq);
 929
 930        return 0;
 931}
 932
 933static int
 934slice_semaphore_queue(struct intel_engine_cs *outer,
 935                      struct i915_vma *vma,
 936                      int count)
 937{
 938        struct intel_engine_cs *engine;
 939        struct i915_request *head;
 940        enum intel_engine_id id;
 941        int err, i, n = 0;
 942
 943        head = semaphore_queue(outer, vma, n++);
 944        if (IS_ERR(head))
 945                return PTR_ERR(head);
 946
 947        for_each_engine(engine, outer->gt, id) {
 948                for (i = 0; i < count; i++) {
 949                        struct i915_request *rq;
 950
 951                        rq = semaphore_queue(engine, vma, n++);
 952                        if (IS_ERR(rq)) {
 953                                err = PTR_ERR(rq);
 954                                goto out;
 955                        }
 956
 957                        i915_request_put(rq);
 958                }
 959        }
 960
 961        err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
 962        if (err)
 963                goto out;
 964
 965        if (i915_request_wait(head, 0,
 966                              2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
 967                pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 968                       count, n);
 969                GEM_TRACE_DUMP();
 970                intel_gt_set_wedged(outer->gt);
 971                err = -EIO;
 972        }
 973
 974out:
 975        i915_request_put(head);
 976        return err;
 977}
 978
 979static int live_timeslice_preempt(void *arg)
 980{
 981        struct intel_gt *gt = arg;
 982        struct drm_i915_gem_object *obj;
 983        struct intel_engine_cs *engine;
 984        enum intel_engine_id id;
 985        struct i915_vma *vma;
 986        void *vaddr;
 987        int err = 0;
 988
 989        /*
 990         * If a request takes too long, we would like to give other users
 991         * a fair go on the GPU. In particular, users may create batches
 992         * that wait upon external input, where that input may even be
 993         * supplied by another GPU job. To avoid blocking forever, we
 994         * need to preempt the current task and replace it with another
 995         * ready task.
 996         */
 997        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 998                return 0;
 999
1000        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001        if (IS_ERR(obj))
1002                return PTR_ERR(obj);
1003
1004        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005        if (IS_ERR(vma)) {
1006                err = PTR_ERR(vma);
1007                goto err_obj;
1008        }
1009
1010        vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1011        if (IS_ERR(vaddr)) {
1012                err = PTR_ERR(vaddr);
1013                goto err_obj;
1014        }
1015
1016        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017        if (err)
1018                goto err_map;
1019
1020        err = i915_vma_sync(vma);
1021        if (err)
1022                goto err_pin;
1023
1024        for_each_engine(engine, gt, id) {
1025                if (!intel_engine_has_preemption(engine))
1026                        continue;
1027
1028                memset(vaddr, 0, PAGE_SIZE);
1029
1030                st_engine_heartbeat_disable(engine);
1031                err = slice_semaphore_queue(engine, vma, 5);
1032                st_engine_heartbeat_enable(engine);
1033                if (err)
1034                        goto err_pin;
1035
1036                if (igt_flush_test(gt->i915)) {
1037                        err = -EIO;
1038                        goto err_pin;
1039                }
1040        }
1041
1042err_pin:
1043        i915_vma_unpin(vma);
1044err_map:
1045        i915_gem_object_unpin_map(obj);
1046err_obj:
1047        i915_gem_object_put(obj);
1048        return err;
1049}
1050
1051static struct i915_request *
1052create_rewinder(struct intel_context *ce,
1053                struct i915_request *wait,
1054                void *slot, int idx)
1055{
1056        const u32 offset =
1057                i915_ggtt_offset(ce->engine->status_page.vma) +
1058                offset_in_page(slot);
1059        struct i915_request *rq;
1060        u32 *cs;
1061        int err;
1062
1063        rq = intel_context_create_request(ce);
1064        if (IS_ERR(rq))
1065                return rq;
1066
1067        if (wait) {
1068                err = i915_request_await_dma_fence(rq, &wait->fence);
1069                if (err)
1070                        goto err;
1071        }
1072
1073        cs = intel_ring_begin(rq, 14);
1074        if (IS_ERR(cs)) {
1075                err = PTR_ERR(cs);
1076                goto err;
1077        }
1078
1079        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080        *cs++ = MI_NOOP;
1081
1082        *cs++ = MI_SEMAPHORE_WAIT |
1083                MI_SEMAPHORE_GLOBAL_GTT |
1084                MI_SEMAPHORE_POLL |
1085                MI_SEMAPHORE_SAD_GTE_SDD;
1086        *cs++ = idx;
1087        *cs++ = offset;
1088        *cs++ = 0;
1089
1090        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091        *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092        *cs++ = offset + idx * sizeof(u32);
1093        *cs++ = 0;
1094
1095        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096        *cs++ = offset;
1097        *cs++ = 0;
1098        *cs++ = idx + 1;
1099
1100        intel_ring_advance(rq, cs);
1101
1102        rq->sched.attr.priority = I915_PRIORITY_MASK;
1103        err = 0;
1104err:
1105        i915_request_get(rq);
1106        i915_request_add(rq);
1107        if (err) {
1108                i915_request_put(rq);
1109                return ERR_PTR(err);
1110        }
1111
1112        return rq;
1113}
1114
1115static int live_timeslice_rewind(void *arg)
1116{
1117        struct intel_gt *gt = arg;
1118        struct intel_engine_cs *engine;
1119        enum intel_engine_id id;
1120
1121        /*
1122         * The usual presumption on timeslice expiration is that we replace
1123         * the active context with another. However, given a chain of
1124         * dependencies we may end up with replacing the context with itself,
1125         * but only a few of those requests, forcing us to rewind the
1126         * RING_TAIL of the original request.
1127         */
1128        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1129                return 0;
1130
1131        for_each_engine(engine, gt, id) {
1132                enum { A1, A2, B1 };
1133                enum { X = 1, Z, Y };
1134                struct i915_request *rq[3] = {};
1135                struct intel_context *ce;
1136                unsigned long timeslice;
1137                int i, err = 0;
1138                u32 *slot;
1139
1140                if (!intel_engine_has_timeslices(engine))
1141                        continue;
1142
1143                /*
1144                 * A:rq1 -- semaphore wait, timestamp X
1145                 * A:rq2 -- write timestamp Y
1146                 *
1147                 * B:rq1 [await A:rq1] -- write timestamp Z
1148                 *
1149                 * Force timeslice, release semaphore.
1150                 *
1151                 * Expect execution/evaluation order XZY
1152                 */
1153
1154                st_engine_heartbeat_disable(engine);
1155                timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1156
1157                slot = memset32(engine->status_page.addr + 1000, 0, 4);
1158
1159                ce = intel_context_create(engine);
1160                if (IS_ERR(ce)) {
1161                        err = PTR_ERR(ce);
1162                        goto err;
1163                }
1164
1165                rq[A1] = create_rewinder(ce, NULL, slot, X);
1166                if (IS_ERR(rq[A1])) {
1167                        intel_context_put(ce);
1168                        goto err;
1169                }
1170
1171                rq[A2] = create_rewinder(ce, NULL, slot, Y);
1172                intel_context_put(ce);
1173                if (IS_ERR(rq[A2]))
1174                        goto err;
1175
1176                err = wait_for_submit(engine, rq[A2], HZ / 2);
1177                if (err) {
1178                        pr_err("%s: failed to submit first context\n",
1179                               engine->name);
1180                        goto err;
1181                }
1182
1183                ce = intel_context_create(engine);
1184                if (IS_ERR(ce)) {
1185                        err = PTR_ERR(ce);
1186                        goto err;
1187                }
1188
1189                rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1190                intel_context_put(ce);
1191                if (IS_ERR(rq[2]))
1192                        goto err;
1193
1194                err = wait_for_submit(engine, rq[B1], HZ / 2);
1195                if (err) {
1196                        pr_err("%s: failed to submit second context\n",
1197                               engine->name);
1198                        goto err;
1199                }
1200
1201                /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202                ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1203                if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1204                        /* Wait for the timeslice to kick in */
1205                        del_timer(&engine->execlists.timer);
1206                        tasklet_hi_schedule(&engine->execlists.tasklet);
1207                        intel_engine_flush_submission(engine);
1208                }
1209                /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210                GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1211                GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1212                GEM_BUG_ON(i915_request_is_active(rq[A2]));
1213
1214                /* Release the hounds! */
1215                slot[0] = 1;
1216                wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1217
1218                for (i = 1; i <= 3; i++) {
1219                        unsigned long timeout = jiffies + HZ / 2;
1220
1221                        while (!READ_ONCE(slot[i]) &&
1222                               time_before(jiffies, timeout))
1223                                ;
1224
1225                        if (!time_before(jiffies, timeout)) {
1226                                pr_err("%s: rq[%d] timed out\n",
1227                                       engine->name, i - 1);
1228                                err = -ETIME;
1229                                goto err;
1230                        }
1231
1232                        pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1233                }
1234
1235                /* XZY: XZ < XY */
1236                if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1237                        pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1238                               engine->name,
1239                               slot[Z] - slot[X],
1240                               slot[Y] - slot[X]);
1241                        err = -EINVAL;
1242                }
1243
1244err:
1245                memset32(&slot[0], -1, 4);
1246                wmb();
1247
1248                engine->props.timeslice_duration_ms = timeslice;
1249                st_engine_heartbeat_enable(engine);
1250                for (i = 0; i < 3; i++)
1251                        i915_request_put(rq[i]);
1252                if (igt_flush_test(gt->i915))
1253                        err = -EIO;
1254                if (err)
1255                        return err;
1256        }
1257
1258        return 0;
1259}
1260
1261static struct i915_request *nop_request(struct intel_engine_cs *engine)
1262{
1263        struct i915_request *rq;
1264
1265        rq = intel_engine_create_kernel_request(engine);
1266        if (IS_ERR(rq))
1267                return rq;
1268
1269        i915_request_get(rq);
1270        i915_request_add(rq);
1271
1272        return rq;
1273}
1274
1275static long slice_timeout(struct intel_engine_cs *engine)
1276{
1277        long timeout;
1278
1279        /* Enough time for a timeslice to kick in, and kick out */
1280        timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1281
1282        /* Enough time for the nop request to complete */
1283        timeout += HZ / 5;
1284
1285        return timeout + 1;
1286}
1287
1288static int live_timeslice_queue(void *arg)
1289{
1290        struct intel_gt *gt = arg;
1291        struct drm_i915_gem_object *obj;
1292        struct intel_engine_cs *engine;
1293        enum intel_engine_id id;
1294        struct i915_vma *vma;
1295        void *vaddr;
1296        int err = 0;
1297
1298        /*
1299         * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300         * timeslicing between them disabled, we *do* enable timeslicing
1301         * if the queue demands it. (Normally, we do not submit if
1302         * ELSP[1] is already occupied, so must rely on timeslicing to
1303         * eject ELSP[0] in favour of the queue.)
1304         */
1305        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1306                return 0;
1307
1308        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1309        if (IS_ERR(obj))
1310                return PTR_ERR(obj);
1311
1312        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1313        if (IS_ERR(vma)) {
1314                err = PTR_ERR(vma);
1315                goto err_obj;
1316        }
1317
1318        vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1319        if (IS_ERR(vaddr)) {
1320                err = PTR_ERR(vaddr);
1321                goto err_obj;
1322        }
1323
1324        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1325        if (err)
1326                goto err_map;
1327
1328        err = i915_vma_sync(vma);
1329        if (err)
1330                goto err_pin;
1331
1332        for_each_engine(engine, gt, id) {
1333                struct i915_sched_attr attr = {
1334                        .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1335                };
1336                struct i915_request *rq, *nop;
1337
1338                if (!intel_engine_has_preemption(engine))
1339                        continue;
1340
1341                st_engine_heartbeat_disable(engine);
1342                memset(vaddr, 0, PAGE_SIZE);
1343
1344                /* ELSP[0]: semaphore wait */
1345                rq = semaphore_queue(engine, vma, 0);
1346                if (IS_ERR(rq)) {
1347                        err = PTR_ERR(rq);
1348                        goto err_heartbeat;
1349                }
1350                engine->schedule(rq, &attr);
1351                err = wait_for_submit(engine, rq, HZ / 2);
1352                if (err) {
1353                        pr_err("%s: Timed out trying to submit semaphores\n",
1354                               engine->name);
1355                        goto err_rq;
1356                }
1357
1358                /* ELSP[1]: nop request */
1359                nop = nop_request(engine);
1360                if (IS_ERR(nop)) {
1361                        err = PTR_ERR(nop);
1362                        goto err_rq;
1363                }
1364                err = wait_for_submit(engine, nop, HZ / 2);
1365                i915_request_put(nop);
1366                if (err) {
1367                        pr_err("%s: Timed out trying to submit nop\n",
1368                               engine->name);
1369                        goto err_rq;
1370                }
1371
1372                GEM_BUG_ON(i915_request_completed(rq));
1373                GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1374
1375                /* Queue: semaphore signal, matching priority as semaphore */
1376                err = release_queue(engine, vma, 1, effective_prio(rq));
1377                if (err)
1378                        goto err_rq;
1379
1380                /* Wait until we ack the release_queue and start timeslicing */
1381                do {
1382                        cond_resched();
1383                        intel_engine_flush_submission(engine);
1384                } while (READ_ONCE(engine->execlists.pending[0]));
1385
1386                /* Timeslice every jiffy, so within 2 we should signal */
1387                if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1388                        struct drm_printer p =
1389                                drm_info_printer(gt->i915->drm.dev);
1390
1391                        pr_err("%s: Failed to timeslice into queue\n",
1392                               engine->name);
1393                        intel_engine_dump(engine, &p,
1394                                          "%s\n", engine->name);
1395
1396                        memset(vaddr, 0xff, PAGE_SIZE);
1397                        err = -EIO;
1398                }
1399err_rq:
1400                i915_request_put(rq);
1401err_heartbeat:
1402                st_engine_heartbeat_enable(engine);
1403                if (err)
1404                        break;
1405        }
1406
1407err_pin:
1408        i915_vma_unpin(vma);
1409err_map:
1410        i915_gem_object_unpin_map(obj);
1411err_obj:
1412        i915_gem_object_put(obj);
1413        return err;
1414}
1415
1416static int live_timeslice_nopreempt(void *arg)
1417{
1418        struct intel_gt *gt = arg;
1419        struct intel_engine_cs *engine;
1420        enum intel_engine_id id;
1421        struct igt_spinner spin;
1422        int err = 0;
1423
1424        /*
1425         * We should not timeslice into a request that is marked with
1426         * I915_REQUEST_NOPREEMPT.
1427         */
1428        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1429                return 0;
1430
1431        if (igt_spinner_init(&spin, gt))
1432                return -ENOMEM;
1433
1434        for_each_engine(engine, gt, id) {
1435                struct intel_context *ce;
1436                struct i915_request *rq;
1437                unsigned long timeslice;
1438
1439                if (!intel_engine_has_preemption(engine))
1440                        continue;
1441
1442                ce = intel_context_create(engine);
1443                if (IS_ERR(ce)) {
1444                        err = PTR_ERR(ce);
1445                        break;
1446                }
1447
1448                st_engine_heartbeat_disable(engine);
1449                timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1450
1451                /* Create an unpreemptible spinner */
1452
1453                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1454                intel_context_put(ce);
1455                if (IS_ERR(rq)) {
1456                        err = PTR_ERR(rq);
1457                        goto out_heartbeat;
1458                }
1459
1460                i915_request_get(rq);
1461                i915_request_add(rq);
1462
1463                if (!igt_wait_for_spinner(&spin, rq)) {
1464                        i915_request_put(rq);
1465                        err = -ETIME;
1466                        goto out_spin;
1467                }
1468
1469                set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1470                i915_request_put(rq);
1471
1472                /* Followed by a maximum priority barrier (heartbeat) */
1473
1474                ce = intel_context_create(engine);
1475                if (IS_ERR(ce)) {
1476                        err = PTR_ERR(ce);
1477                        goto out_spin;
1478                }
1479
1480                rq = intel_context_create_request(ce);
1481                intel_context_put(ce);
1482                if (IS_ERR(rq)) {
1483                        err = PTR_ERR(rq);
1484                        goto out_spin;
1485                }
1486
1487                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1488                i915_request_get(rq);
1489                i915_request_add(rq);
1490
1491                /*
1492                 * Wait until the barrier is in ELSP, and we know timeslicing
1493                 * will have been activated.
1494                 */
1495                if (wait_for_submit(engine, rq, HZ / 2)) {
1496                        i915_request_put(rq);
1497                        err = -ETIME;
1498                        goto out_spin;
1499                }
1500
1501                /*
1502                 * Since the ELSP[0] request is unpreemptible, it should not
1503                 * allow the maximum priority barrier through. Wait long
1504                 * enough to see if it is timesliced in by mistake.
1505                 */
1506                if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1507                        pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1508                               engine->name);
1509                        err = -EINVAL;
1510                }
1511                i915_request_put(rq);
1512
1513out_spin:
1514                igt_spinner_end(&spin);
1515out_heartbeat:
1516                xchg(&engine->props.timeslice_duration_ms, timeslice);
1517                st_engine_heartbeat_enable(engine);
1518                if (err)
1519                        break;
1520
1521                if (igt_flush_test(gt->i915)) {
1522                        err = -EIO;
1523                        break;
1524                }
1525        }
1526
1527        igt_spinner_fini(&spin);
1528        return err;
1529}
1530
1531static int live_busywait_preempt(void *arg)
1532{
1533        struct intel_gt *gt = arg;
1534        struct i915_gem_context *ctx_hi, *ctx_lo;
1535        struct intel_engine_cs *engine;
1536        struct drm_i915_gem_object *obj;
1537        struct i915_vma *vma;
1538        enum intel_engine_id id;
1539        int err = -ENOMEM;
1540        u32 *map;
1541
1542        /*
1543         * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544         * preempt the busywaits used to synchronise between rings.
1545         */
1546
1547        ctx_hi = kernel_context(gt->i915);
1548        if (!ctx_hi)
1549                return -ENOMEM;
1550        ctx_hi->sched.priority =
1551                I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1552
1553        ctx_lo = kernel_context(gt->i915);
1554        if (!ctx_lo)
1555                goto err_ctx_hi;
1556        ctx_lo->sched.priority =
1557                I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1558
1559        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1560        if (IS_ERR(obj)) {
1561                err = PTR_ERR(obj);
1562                goto err_ctx_lo;
1563        }
1564
1565        map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1566        if (IS_ERR(map)) {
1567                err = PTR_ERR(map);
1568                goto err_obj;
1569        }
1570
1571        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1572        if (IS_ERR(vma)) {
1573                err = PTR_ERR(vma);
1574                goto err_map;
1575        }
1576
1577        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1578        if (err)
1579                goto err_map;
1580
1581        err = i915_vma_sync(vma);
1582        if (err)
1583                goto err_vma;
1584
1585        for_each_engine(engine, gt, id) {
1586                struct i915_request *lo, *hi;
1587                struct igt_live_test t;
1588                u32 *cs;
1589
1590                if (!intel_engine_has_preemption(engine))
1591                        continue;
1592
1593                if (!intel_engine_can_store_dword(engine))
1594                        continue;
1595
1596                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1597                        err = -EIO;
1598                        goto err_vma;
1599                }
1600
1601                /*
1602                 * We create two requests. The low priority request
1603                 * busywaits on a semaphore (inside the ringbuffer where
1604                 * is should be preemptible) and the high priority requests
1605                 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606                 * allowing the first request to complete. If preemption
1607                 * fails, we hang instead.
1608                 */
1609
1610                lo = igt_request_alloc(ctx_lo, engine);
1611                if (IS_ERR(lo)) {
1612                        err = PTR_ERR(lo);
1613                        goto err_vma;
1614                }
1615
1616                cs = intel_ring_begin(lo, 8);
1617                if (IS_ERR(cs)) {
1618                        err = PTR_ERR(cs);
1619                        i915_request_add(lo);
1620                        goto err_vma;
1621                }
1622
1623                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1624                *cs++ = i915_ggtt_offset(vma);
1625                *cs++ = 0;
1626                *cs++ = 1;
1627
1628                /* XXX Do we need a flush + invalidate here? */
1629
1630                *cs++ = MI_SEMAPHORE_WAIT |
1631                        MI_SEMAPHORE_GLOBAL_GTT |
1632                        MI_SEMAPHORE_POLL |
1633                        MI_SEMAPHORE_SAD_EQ_SDD;
1634                *cs++ = 0;
1635                *cs++ = i915_ggtt_offset(vma);
1636                *cs++ = 0;
1637
1638                intel_ring_advance(lo, cs);
1639
1640                i915_request_get(lo);
1641                i915_request_add(lo);
1642
1643                if (wait_for(READ_ONCE(*map), 10)) {
1644                        i915_request_put(lo);
1645                        err = -ETIMEDOUT;
1646                        goto err_vma;
1647                }
1648
1649                /* Low priority request should be busywaiting now */
1650                if (i915_request_wait(lo, 0, 1) != -ETIME) {
1651                        i915_request_put(lo);
1652                        pr_err("%s: Busywaiting request did not!\n",
1653                               engine->name);
1654                        err = -EIO;
1655                        goto err_vma;
1656                }
1657
1658                hi = igt_request_alloc(ctx_hi, engine);
1659                if (IS_ERR(hi)) {
1660                        err = PTR_ERR(hi);
1661                        i915_request_put(lo);
1662                        goto err_vma;
1663                }
1664
1665                cs = intel_ring_begin(hi, 4);
1666                if (IS_ERR(cs)) {
1667                        err = PTR_ERR(cs);
1668                        i915_request_add(hi);
1669                        i915_request_put(lo);
1670                        goto err_vma;
1671                }
1672
1673                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1674                *cs++ = i915_ggtt_offset(vma);
1675                *cs++ = 0;
1676                *cs++ = 0;
1677
1678                intel_ring_advance(hi, cs);
1679                i915_request_add(hi);
1680
1681                if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1682                        struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1683
1684                        pr_err("%s: Failed to preempt semaphore busywait!\n",
1685                               engine->name);
1686
1687                        intel_engine_dump(engine, &p, "%s\n", engine->name);
1688                        GEM_TRACE_DUMP();
1689
1690                        i915_request_put(lo);
1691                        intel_gt_set_wedged(gt);
1692                        err = -EIO;
1693                        goto err_vma;
1694                }
1695                GEM_BUG_ON(READ_ONCE(*map));
1696                i915_request_put(lo);
1697
1698                if (igt_live_test_end(&t)) {
1699                        err = -EIO;
1700                        goto err_vma;
1701                }
1702        }
1703
1704        err = 0;
1705err_vma:
1706        i915_vma_unpin(vma);
1707err_map:
1708        i915_gem_object_unpin_map(obj);
1709err_obj:
1710        i915_gem_object_put(obj);
1711err_ctx_lo:
1712        kernel_context_close(ctx_lo);
1713err_ctx_hi:
1714        kernel_context_close(ctx_hi);
1715        return err;
1716}
1717
1718static struct i915_request *
1719spinner_create_request(struct igt_spinner *spin,
1720                       struct i915_gem_context *ctx,
1721                       struct intel_engine_cs *engine,
1722                       u32 arb)
1723{
1724        struct intel_context *ce;
1725        struct i915_request *rq;
1726
1727        ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1728        if (IS_ERR(ce))
1729                return ERR_CAST(ce);
1730
1731        rq = igt_spinner_create_request(spin, ce, arb);
1732        intel_context_put(ce);
1733        return rq;
1734}
1735
1736static int live_preempt(void *arg)
1737{
1738        struct intel_gt *gt = arg;
1739        struct i915_gem_context *ctx_hi, *ctx_lo;
1740        struct igt_spinner spin_hi, spin_lo;
1741        struct intel_engine_cs *engine;
1742        enum intel_engine_id id;
1743        int err = -ENOMEM;
1744
1745        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1746                return 0;
1747
1748        if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1749                pr_err("Logical preemption supported, but not exposed\n");
1750
1751        if (igt_spinner_init(&spin_hi, gt))
1752                return -ENOMEM;
1753
1754        if (igt_spinner_init(&spin_lo, gt))
1755                goto err_spin_hi;
1756
1757        ctx_hi = kernel_context(gt->i915);
1758        if (!ctx_hi)
1759                goto err_spin_lo;
1760        ctx_hi->sched.priority =
1761                I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1762
1763        ctx_lo = kernel_context(gt->i915);
1764        if (!ctx_lo)
1765                goto err_ctx_hi;
1766        ctx_lo->sched.priority =
1767                I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1768
1769        for_each_engine(engine, gt, id) {
1770                struct igt_live_test t;
1771                struct i915_request *rq;
1772
1773                if (!intel_engine_has_preemption(engine))
1774                        continue;
1775
1776                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1777                        err = -EIO;
1778                        goto err_ctx_lo;
1779                }
1780
1781                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1782                                            MI_ARB_CHECK);
1783                if (IS_ERR(rq)) {
1784                        err = PTR_ERR(rq);
1785                        goto err_ctx_lo;
1786                }
1787
1788                i915_request_add(rq);
1789                if (!igt_wait_for_spinner(&spin_lo, rq)) {
1790                        GEM_TRACE("lo spinner failed to start\n");
1791                        GEM_TRACE_DUMP();
1792                        intel_gt_set_wedged(gt);
1793                        err = -EIO;
1794                        goto err_ctx_lo;
1795                }
1796
1797                rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1798                                            MI_ARB_CHECK);
1799                if (IS_ERR(rq)) {
1800                        igt_spinner_end(&spin_lo);
1801                        err = PTR_ERR(rq);
1802                        goto err_ctx_lo;
1803                }
1804
1805                i915_request_add(rq);
1806                if (!igt_wait_for_spinner(&spin_hi, rq)) {
1807                        GEM_TRACE("hi spinner failed to start\n");
1808                        GEM_TRACE_DUMP();
1809                        intel_gt_set_wedged(gt);
1810                        err = -EIO;
1811                        goto err_ctx_lo;
1812                }
1813
1814                igt_spinner_end(&spin_hi);
1815                igt_spinner_end(&spin_lo);
1816
1817                if (igt_live_test_end(&t)) {
1818                        err = -EIO;
1819                        goto err_ctx_lo;
1820                }
1821        }
1822
1823        err = 0;
1824err_ctx_lo:
1825        kernel_context_close(ctx_lo);
1826err_ctx_hi:
1827        kernel_context_close(ctx_hi);
1828err_spin_lo:
1829        igt_spinner_fini(&spin_lo);
1830err_spin_hi:
1831        igt_spinner_fini(&spin_hi);
1832        return err;
1833}
1834
1835static int live_late_preempt(void *arg)
1836{
1837        struct intel_gt *gt = arg;
1838        struct i915_gem_context *ctx_hi, *ctx_lo;
1839        struct igt_spinner spin_hi, spin_lo;
1840        struct intel_engine_cs *engine;
1841        struct i915_sched_attr attr = {};
1842        enum intel_engine_id id;
1843        int err = -ENOMEM;
1844
1845        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1846                return 0;
1847
1848        if (igt_spinner_init(&spin_hi, gt))
1849                return -ENOMEM;
1850
1851        if (igt_spinner_init(&spin_lo, gt))
1852                goto err_spin_hi;
1853
1854        ctx_hi = kernel_context(gt->i915);
1855        if (!ctx_hi)
1856                goto err_spin_lo;
1857
1858        ctx_lo = kernel_context(gt->i915);
1859        if (!ctx_lo)
1860                goto err_ctx_hi;
1861
1862        /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863        ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1864
1865        for_each_engine(engine, gt, id) {
1866                struct igt_live_test t;
1867                struct i915_request *rq;
1868
1869                if (!intel_engine_has_preemption(engine))
1870                        continue;
1871
1872                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1873                        err = -EIO;
1874                        goto err_ctx_lo;
1875                }
1876
1877                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1878                                            MI_ARB_CHECK);
1879                if (IS_ERR(rq)) {
1880                        err = PTR_ERR(rq);
1881                        goto err_ctx_lo;
1882                }
1883
1884                i915_request_add(rq);
1885                if (!igt_wait_for_spinner(&spin_lo, rq)) {
1886                        pr_err("First context failed to start\n");
1887                        goto err_wedged;
1888                }
1889
1890                rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1891                                            MI_NOOP);
1892                if (IS_ERR(rq)) {
1893                        igt_spinner_end(&spin_lo);
1894                        err = PTR_ERR(rq);
1895                        goto err_ctx_lo;
1896                }
1897
1898                i915_request_add(rq);
1899                if (igt_wait_for_spinner(&spin_hi, rq)) {
1900                        pr_err("Second context overtook first?\n");
1901                        goto err_wedged;
1902                }
1903
1904                attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1905                engine->schedule(rq, &attr);
1906
1907                if (!igt_wait_for_spinner(&spin_hi, rq)) {
1908                        pr_err("High priority context failed to preempt the low priority context\n");
1909                        GEM_TRACE_DUMP();
1910                        goto err_wedged;
1911                }
1912
1913                igt_spinner_end(&spin_hi);
1914                igt_spinner_end(&spin_lo);
1915
1916                if (igt_live_test_end(&t)) {
1917                        err = -EIO;
1918                        goto err_ctx_lo;
1919                }
1920        }
1921
1922        err = 0;
1923err_ctx_lo:
1924        kernel_context_close(ctx_lo);
1925err_ctx_hi:
1926        kernel_context_close(ctx_hi);
1927err_spin_lo:
1928        igt_spinner_fini(&spin_lo);
1929err_spin_hi:
1930        igt_spinner_fini(&spin_hi);
1931        return err;
1932
1933err_wedged:
1934        igt_spinner_end(&spin_hi);
1935        igt_spinner_end(&spin_lo);
1936        intel_gt_set_wedged(gt);
1937        err = -EIO;
1938        goto err_ctx_lo;
1939}
1940
1941struct preempt_client {
1942        struct igt_spinner spin;
1943        struct i915_gem_context *ctx;
1944};
1945
1946static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1947{
1948        c->ctx = kernel_context(gt->i915);
1949        if (!c->ctx)
1950                return -ENOMEM;
1951
1952        if (igt_spinner_init(&c->spin, gt))
1953                goto err_ctx;
1954
1955        return 0;
1956
1957err_ctx:
1958        kernel_context_close(c->ctx);
1959        return -ENOMEM;
1960}
1961
1962static void preempt_client_fini(struct preempt_client *c)
1963{
1964        igt_spinner_fini(&c->spin);
1965        kernel_context_close(c->ctx);
1966}
1967
1968static int live_nopreempt(void *arg)
1969{
1970        struct intel_gt *gt = arg;
1971        struct intel_engine_cs *engine;
1972        struct preempt_client a, b;
1973        enum intel_engine_id id;
1974        int err = -ENOMEM;
1975
1976        /*
1977         * Verify that we can disable preemption for an individual request
1978         * that may be being observed and not want to be interrupted.
1979         */
1980
1981        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1982                return 0;
1983
1984        if (preempt_client_init(gt, &a))
1985                return -ENOMEM;
1986        if (preempt_client_init(gt, &b))
1987                goto err_client_a;
1988        b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1989
1990        for_each_engine(engine, gt, id) {
1991                struct i915_request *rq_a, *rq_b;
1992
1993                if (!intel_engine_has_preemption(engine))
1994                        continue;
1995
1996                engine->execlists.preempt_hang.count = 0;
1997
1998                rq_a = spinner_create_request(&a.spin,
1999                                              a.ctx, engine,
2000                                              MI_ARB_CHECK);
2001                if (IS_ERR(rq_a)) {
2002                        err = PTR_ERR(rq_a);
2003                        goto err_client_b;
2004                }
2005
2006                /* Low priority client, but unpreemptable! */
2007                __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
2008
2009                i915_request_add(rq_a);
2010                if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2011                        pr_err("First client failed to start\n");
2012                        goto err_wedged;
2013                }
2014
2015                rq_b = spinner_create_request(&b.spin,
2016                                              b.ctx, engine,
2017                                              MI_ARB_CHECK);
2018                if (IS_ERR(rq_b)) {
2019                        err = PTR_ERR(rq_b);
2020                        goto err_client_b;
2021                }
2022
2023                i915_request_add(rq_b);
2024
2025                /* B is much more important than A! (But A is unpreemptable.) */
2026                GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2027
2028                /* Wait long enough for preemption and timeslicing */
2029                if (igt_wait_for_spinner(&b.spin, rq_b)) {
2030                        pr_err("Second client started too early!\n");
2031                        goto err_wedged;
2032                }
2033
2034                igt_spinner_end(&a.spin);
2035
2036                if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2037                        pr_err("Second client failed to start\n");
2038                        goto err_wedged;
2039                }
2040
2041                igt_spinner_end(&b.spin);
2042
2043                if (engine->execlists.preempt_hang.count) {
2044                        pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045                               engine->execlists.preempt_hang.count);
2046                        err = -EINVAL;
2047                        goto err_wedged;
2048                }
2049
2050                if (igt_flush_test(gt->i915))
2051                        goto err_wedged;
2052        }
2053
2054        err = 0;
2055err_client_b:
2056        preempt_client_fini(&b);
2057err_client_a:
2058        preempt_client_fini(&a);
2059        return err;
2060
2061err_wedged:
2062        igt_spinner_end(&b.spin);
2063        igt_spinner_end(&a.spin);
2064        intel_gt_set_wedged(gt);
2065        err = -EIO;
2066        goto err_client_b;
2067}
2068
2069struct live_preempt_cancel {
2070        struct intel_engine_cs *engine;
2071        struct preempt_client a, b;
2072};
2073
2074static int __cancel_active0(struct live_preempt_cancel *arg)
2075{
2076        struct i915_request *rq;
2077        struct igt_live_test t;
2078        int err;
2079
2080        /* Preempt cancel of ELSP0 */
2081        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2082        if (igt_live_test_begin(&t, arg->engine->i915,
2083                                __func__, arg->engine->name))
2084                return -EIO;
2085
2086        rq = spinner_create_request(&arg->a.spin,
2087                                    arg->a.ctx, arg->engine,
2088                                    MI_ARB_CHECK);
2089        if (IS_ERR(rq))
2090                return PTR_ERR(rq);
2091
2092        clear_bit(CONTEXT_BANNED, &rq->context->flags);
2093        i915_request_get(rq);
2094        i915_request_add(rq);
2095        if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2096                err = -EIO;
2097                goto out;
2098        }
2099
2100        intel_context_set_banned(rq->context);
2101        err = intel_engine_pulse(arg->engine);
2102        if (err)
2103                goto out;
2104
2105        err = wait_for_reset(arg->engine, rq, HZ / 2);
2106        if (err) {
2107                pr_err("Cancelled inflight0 request did not reset\n");
2108                goto out;
2109        }
2110
2111out:
2112        i915_request_put(rq);
2113        if (igt_live_test_end(&t))
2114                err = -EIO;
2115        return err;
2116}
2117
2118static int __cancel_active1(struct live_preempt_cancel *arg)
2119{
2120        struct i915_request *rq[2] = {};
2121        struct igt_live_test t;
2122        int err;
2123
2124        /* Preempt cancel of ELSP1 */
2125        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2126        if (igt_live_test_begin(&t, arg->engine->i915,
2127                                __func__, arg->engine->name))
2128                return -EIO;
2129
2130        rq[0] = spinner_create_request(&arg->a.spin,
2131                                       arg->a.ctx, arg->engine,
2132                                       MI_NOOP); /* no preemption */
2133        if (IS_ERR(rq[0]))
2134                return PTR_ERR(rq[0]);
2135
2136        clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2137        i915_request_get(rq[0]);
2138        i915_request_add(rq[0]);
2139        if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2140                err = -EIO;
2141                goto out;
2142        }
2143
2144        rq[1] = spinner_create_request(&arg->b.spin,
2145                                       arg->b.ctx, arg->engine,
2146                                       MI_ARB_CHECK);
2147        if (IS_ERR(rq[1])) {
2148                err = PTR_ERR(rq[1]);
2149                goto out;
2150        }
2151
2152        clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2153        i915_request_get(rq[1]);
2154        err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2155        i915_request_add(rq[1]);
2156        if (err)
2157                goto out;
2158
2159        intel_context_set_banned(rq[1]->context);
2160        err = intel_engine_pulse(arg->engine);
2161        if (err)
2162                goto out;
2163
2164        igt_spinner_end(&arg->a.spin);
2165        err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2166        if (err)
2167                goto out;
2168
2169        if (rq[0]->fence.error != 0) {
2170                pr_err("Normal inflight0 request did not complete\n");
2171                err = -EINVAL;
2172                goto out;
2173        }
2174
2175        if (rq[1]->fence.error != -EIO) {
2176                pr_err("Cancelled inflight1 request did not report -EIO\n");
2177                err = -EINVAL;
2178                goto out;
2179        }
2180
2181out:
2182        i915_request_put(rq[1]);
2183        i915_request_put(rq[0]);
2184        if (igt_live_test_end(&t))
2185                err = -EIO;
2186        return err;
2187}
2188
2189static int __cancel_queued(struct live_preempt_cancel *arg)
2190{
2191        struct i915_request *rq[3] = {};
2192        struct igt_live_test t;
2193        int err;
2194
2195        /* Full ELSP and one in the wings */
2196        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2197        if (igt_live_test_begin(&t, arg->engine->i915,
2198                                __func__, arg->engine->name))
2199                return -EIO;
2200
2201        rq[0] = spinner_create_request(&arg->a.spin,
2202                                       arg->a.ctx, arg->engine,
2203                                       MI_ARB_CHECK);
2204        if (IS_ERR(rq[0]))
2205                return PTR_ERR(rq[0]);
2206
2207        clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2208        i915_request_get(rq[0]);
2209        i915_request_add(rq[0]);
2210        if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2211                err = -EIO;
2212                goto out;
2213        }
2214
2215        rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2216        if (IS_ERR(rq[1])) {
2217                err = PTR_ERR(rq[1]);
2218                goto out;
2219        }
2220
2221        clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2222        i915_request_get(rq[1]);
2223        err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2224        i915_request_add(rq[1]);
2225        if (err)
2226                goto out;
2227
2228        rq[2] = spinner_create_request(&arg->b.spin,
2229                                       arg->a.ctx, arg->engine,
2230                                       MI_ARB_CHECK);
2231        if (IS_ERR(rq[2])) {
2232                err = PTR_ERR(rq[2]);
2233                goto out;
2234        }
2235
2236        i915_request_get(rq[2]);
2237        err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2238        i915_request_add(rq[2]);
2239        if (err)
2240                goto out;
2241
2242        intel_context_set_banned(rq[2]->context);
2243        err = intel_engine_pulse(arg->engine);
2244        if (err)
2245                goto out;
2246
2247        err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2248        if (err)
2249                goto out;
2250
2251        if (rq[0]->fence.error != -EIO) {
2252                pr_err("Cancelled inflight0 request did not report -EIO\n");
2253                err = -EINVAL;
2254                goto out;
2255        }
2256
2257        if (rq[1]->fence.error != 0) {
2258                pr_err("Normal inflight1 request did not complete\n");
2259                err = -EINVAL;
2260                goto out;
2261        }
2262
2263        if (rq[2]->fence.error != -EIO) {
2264                pr_err("Cancelled queued request did not report -EIO\n");
2265                err = -EINVAL;
2266                goto out;
2267        }
2268
2269out:
2270        i915_request_put(rq[2]);
2271        i915_request_put(rq[1]);
2272        i915_request_put(rq[0]);
2273        if (igt_live_test_end(&t))
2274                err = -EIO;
2275        return err;
2276}
2277
2278static int __cancel_hostile(struct live_preempt_cancel *arg)
2279{
2280        struct i915_request *rq;
2281        int err;
2282
2283        /* Preempt cancel non-preemptible spinner in ELSP0 */
2284        if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2285                return 0;
2286
2287        if (!intel_has_reset_engine(arg->engine->gt))
2288                return 0;
2289
2290        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2291        rq = spinner_create_request(&arg->a.spin,
2292                                    arg->a.ctx, arg->engine,
2293                                    MI_NOOP); /* preemption disabled */
2294        if (IS_ERR(rq))
2295                return PTR_ERR(rq);
2296
2297        clear_bit(CONTEXT_BANNED, &rq->context->flags);
2298        i915_request_get(rq);
2299        i915_request_add(rq);
2300        if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2301                err = -EIO;
2302                goto out;
2303        }
2304
2305        intel_context_set_banned(rq->context);
2306        err = intel_engine_pulse(arg->engine); /* force reset */
2307        if (err)
2308                goto out;
2309
2310        err = wait_for_reset(arg->engine, rq, HZ / 2);
2311        if (err) {
2312                pr_err("Cancelled inflight0 request did not reset\n");
2313                goto out;
2314        }
2315
2316out:
2317        i915_request_put(rq);
2318        if (igt_flush_test(arg->engine->i915))
2319                err = -EIO;
2320        return err;
2321}
2322
2323static int live_preempt_cancel(void *arg)
2324{
2325        struct intel_gt *gt = arg;
2326        struct live_preempt_cancel data;
2327        enum intel_engine_id id;
2328        int err = -ENOMEM;
2329
2330        /*
2331         * To cancel an inflight context, we need to first remove it from the
2332         * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2333         */
2334
2335        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2336                return 0;
2337
2338        if (preempt_client_init(gt, &data.a))
2339                return -ENOMEM;
2340        if (preempt_client_init(gt, &data.b))
2341                goto err_client_a;
2342
2343        for_each_engine(data.engine, gt, id) {
2344                if (!intel_engine_has_preemption(data.engine))
2345                        continue;
2346
2347                err = __cancel_active0(&data);
2348                if (err)
2349                        goto err_wedged;
2350
2351                err = __cancel_active1(&data);
2352                if (err)
2353                        goto err_wedged;
2354
2355                err = __cancel_queued(&data);
2356                if (err)
2357                        goto err_wedged;
2358
2359                err = __cancel_hostile(&data);
2360                if (err)
2361                        goto err_wedged;
2362        }
2363
2364        err = 0;
2365err_client_b:
2366        preempt_client_fini(&data.b);
2367err_client_a:
2368        preempt_client_fini(&data.a);
2369        return err;
2370
2371err_wedged:
2372        GEM_TRACE_DUMP();
2373        igt_spinner_end(&data.b.spin);
2374        igt_spinner_end(&data.a.spin);
2375        intel_gt_set_wedged(gt);
2376        goto err_client_b;
2377}
2378
2379static int live_suppress_self_preempt(void *arg)
2380{
2381        struct intel_gt *gt = arg;
2382        struct intel_engine_cs *engine;
2383        struct i915_sched_attr attr = {
2384                .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2385        };
2386        struct preempt_client a, b;
2387        enum intel_engine_id id;
2388        int err = -ENOMEM;
2389
2390        /*
2391         * Verify that if a preemption request does not cause a change in
2392         * the current execution order, the preempt-to-idle injection is
2393         * skipped and that we do not accidentally apply it after the CS
2394         * completion event.
2395         */
2396
2397        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2398                return 0;
2399
2400        if (intel_uc_uses_guc_submission(&gt->uc))
2401                return 0; /* presume black blox */
2402
2403        if (intel_vgpu_active(gt->i915))
2404                return 0; /* GVT forces single port & request submission */
2405
2406        if (preempt_client_init(gt, &a))
2407                return -ENOMEM;
2408        if (preempt_client_init(gt, &b))
2409                goto err_client_a;
2410
2411        for_each_engine(engine, gt, id) {
2412                struct i915_request *rq_a, *rq_b;
2413                int depth;
2414
2415                if (!intel_engine_has_preemption(engine))
2416                        continue;
2417
2418                if (igt_flush_test(gt->i915))
2419                        goto err_wedged;
2420
2421                st_engine_heartbeat_disable(engine);
2422                engine->execlists.preempt_hang.count = 0;
2423
2424                rq_a = spinner_create_request(&a.spin,
2425                                              a.ctx, engine,
2426                                              MI_NOOP);
2427                if (IS_ERR(rq_a)) {
2428                        err = PTR_ERR(rq_a);
2429                        st_engine_heartbeat_enable(engine);
2430                        goto err_client_b;
2431                }
2432
2433                i915_request_add(rq_a);
2434                if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2435                        pr_err("First client failed to start\n");
2436                        st_engine_heartbeat_enable(engine);
2437                        goto err_wedged;
2438                }
2439
2440                /* Keep postponing the timer to avoid premature slicing */
2441                mod_timer(&engine->execlists.timer, jiffies + HZ);
2442                for (depth = 0; depth < 8; depth++) {
2443                        rq_b = spinner_create_request(&b.spin,
2444                                                      b.ctx, engine,
2445                                                      MI_NOOP);
2446                        if (IS_ERR(rq_b)) {
2447                                err = PTR_ERR(rq_b);
2448                                st_engine_heartbeat_enable(engine);
2449                                goto err_client_b;
2450                        }
2451                        i915_request_add(rq_b);
2452
2453                        GEM_BUG_ON(i915_request_completed(rq_a));
2454                        engine->schedule(rq_a, &attr);
2455                        igt_spinner_end(&a.spin);
2456
2457                        if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2458                                pr_err("Second client failed to start\n");
2459                                st_engine_heartbeat_enable(engine);
2460                                goto err_wedged;
2461                        }
2462
2463                        swap(a, b);
2464                        rq_a = rq_b;
2465                }
2466                igt_spinner_end(&a.spin);
2467
2468                if (engine->execlists.preempt_hang.count) {
2469                        pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2470                               engine->name,
2471                               engine->execlists.preempt_hang.count,
2472                               depth);
2473                        st_engine_heartbeat_enable(engine);
2474                        err = -EINVAL;
2475                        goto err_client_b;
2476                }
2477
2478                st_engine_heartbeat_enable(engine);
2479                if (igt_flush_test(gt->i915))
2480                        goto err_wedged;
2481        }
2482
2483        err = 0;
2484err_client_b:
2485        preempt_client_fini(&b);
2486err_client_a:
2487        preempt_client_fini(&a);
2488        return err;
2489
2490err_wedged:
2491        igt_spinner_end(&b.spin);
2492        igt_spinner_end(&a.spin);
2493        intel_gt_set_wedged(gt);
2494        err = -EIO;
2495        goto err_client_b;
2496}
2497
2498static int live_chain_preempt(void *arg)
2499{
2500        struct intel_gt *gt = arg;
2501        struct intel_engine_cs *engine;
2502        struct preempt_client hi, lo;
2503        enum intel_engine_id id;
2504        int err = -ENOMEM;
2505
2506        /*
2507         * Build a chain AB...BA between two contexts (A, B) and request
2508         * preemption of the last request. It should then complete before
2509         * the previously submitted spinner in B.
2510         */
2511
2512        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2513                return 0;
2514
2515        if (preempt_client_init(gt, &hi))
2516                return -ENOMEM;
2517
2518        if (preempt_client_init(gt, &lo))
2519                goto err_client_hi;
2520
2521        for_each_engine(engine, gt, id) {
2522                struct i915_sched_attr attr = {
2523                        .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2524                };
2525                struct igt_live_test t;
2526                struct i915_request *rq;
2527                int ring_size, count, i;
2528
2529                if (!intel_engine_has_preemption(engine))
2530                        continue;
2531
2532                rq = spinner_create_request(&lo.spin,
2533                                            lo.ctx, engine,
2534                                            MI_ARB_CHECK);
2535                if (IS_ERR(rq))
2536                        goto err_wedged;
2537
2538                i915_request_get(rq);
2539                i915_request_add(rq);
2540
2541                ring_size = rq->wa_tail - rq->head;
2542                if (ring_size < 0)
2543                        ring_size += rq->ring->size;
2544                ring_size = rq->ring->size / ring_size;
2545                pr_debug("%s(%s): Using maximum of %d requests\n",
2546                         __func__, engine->name, ring_size);
2547
2548                igt_spinner_end(&lo.spin);
2549                if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2550                        pr_err("Timed out waiting to flush %s\n", engine->name);
2551                        i915_request_put(rq);
2552                        goto err_wedged;
2553                }
2554                i915_request_put(rq);
2555
2556                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2557                        err = -EIO;
2558                        goto err_wedged;
2559                }
2560
2561                for_each_prime_number_from(count, 1, ring_size) {
2562                        rq = spinner_create_request(&hi.spin,
2563                                                    hi.ctx, engine,
2564                                                    MI_ARB_CHECK);
2565                        if (IS_ERR(rq))
2566                                goto err_wedged;
2567                        i915_request_add(rq);
2568                        if (!igt_wait_for_spinner(&hi.spin, rq))
2569                                goto err_wedged;
2570
2571                        rq = spinner_create_request(&lo.spin,
2572                                                    lo.ctx, engine,
2573                                                    MI_ARB_CHECK);
2574                        if (IS_ERR(rq))
2575                                goto err_wedged;
2576                        i915_request_add(rq);
2577
2578                        for (i = 0; i < count; i++) {
2579                                rq = igt_request_alloc(lo.ctx, engine);
2580                                if (IS_ERR(rq))
2581                                        goto err_wedged;
2582                                i915_request_add(rq);
2583                        }
2584
2585                        rq = igt_request_alloc(hi.ctx, engine);
2586                        if (IS_ERR(rq))
2587                                goto err_wedged;
2588
2589                        i915_request_get(rq);
2590                        i915_request_add(rq);
2591                        engine->schedule(rq, &attr);
2592
2593                        igt_spinner_end(&hi.spin);
2594                        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2595                                struct drm_printer p =
2596                                        drm_info_printer(gt->i915->drm.dev);
2597
2598                                pr_err("Failed to preempt over chain of %d\n",
2599                                       count);
2600                                intel_engine_dump(engine, &p,
2601                                                  "%s\n", engine->name);
2602                                i915_request_put(rq);
2603                                goto err_wedged;
2604                        }
2605                        igt_spinner_end(&lo.spin);
2606                        i915_request_put(rq);
2607
2608                        rq = igt_request_alloc(lo.ctx, engine);
2609                        if (IS_ERR(rq))
2610                                goto err_wedged;
2611
2612                        i915_request_get(rq);
2613                        i915_request_add(rq);
2614
2615                        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2616                                struct drm_printer p =
2617                                        drm_info_printer(gt->i915->drm.dev);
2618
2619                                pr_err("Failed to flush low priority chain of %d requests\n",
2620                                       count);
2621                                intel_engine_dump(engine, &p,
2622                                                  "%s\n", engine->name);
2623
2624                                i915_request_put(rq);
2625                                goto err_wedged;
2626                        }
2627                        i915_request_put(rq);
2628                }
2629
2630                if (igt_live_test_end(&t)) {
2631                        err = -EIO;
2632                        goto err_wedged;
2633                }
2634        }
2635
2636        err = 0;
2637err_client_lo:
2638        preempt_client_fini(&lo);
2639err_client_hi:
2640        preempt_client_fini(&hi);
2641        return err;
2642
2643err_wedged:
2644        igt_spinner_end(&hi.spin);
2645        igt_spinner_end(&lo.spin);
2646        intel_gt_set_wedged(gt);
2647        err = -EIO;
2648        goto err_client_lo;
2649}
2650
2651static int create_gang(struct intel_engine_cs *engine,
2652                       struct i915_request **prev)
2653{
2654        struct drm_i915_gem_object *obj;
2655        struct intel_context *ce;
2656        struct i915_request *rq;
2657        struct i915_vma *vma;
2658        u32 *cs;
2659        int err;
2660
2661        ce = intel_context_create(engine);
2662        if (IS_ERR(ce))
2663                return PTR_ERR(ce);
2664
2665        obj = i915_gem_object_create_internal(engine->i915, 4096);
2666        if (IS_ERR(obj)) {
2667                err = PTR_ERR(obj);
2668                goto err_ce;
2669        }
2670
2671        vma = i915_vma_instance(obj, ce->vm, NULL);
2672        if (IS_ERR(vma)) {
2673                err = PTR_ERR(vma);
2674                goto err_obj;
2675        }
2676
2677        err = i915_vma_pin(vma, 0, 0, PIN_USER);
2678        if (err)
2679                goto err_obj;
2680
2681        cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2682        if (IS_ERR(cs))
2683                goto err_obj;
2684
2685        /* Semaphore target: spin until zero */
2686        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2687
2688        *cs++ = MI_SEMAPHORE_WAIT |
2689                MI_SEMAPHORE_POLL |
2690                MI_SEMAPHORE_SAD_EQ_SDD;
2691        *cs++ = 0;
2692        *cs++ = lower_32_bits(vma->node.start);
2693        *cs++ = upper_32_bits(vma->node.start);
2694
2695        if (*prev) {
2696                u64 offset = (*prev)->batch->node.start;
2697
2698                /* Terminate the spinner in the next lower priority batch. */
2699                *cs++ = MI_STORE_DWORD_IMM_GEN4;
2700                *cs++ = lower_32_bits(offset);
2701                *cs++ = upper_32_bits(offset);
2702                *cs++ = 0;
2703        }
2704
2705        *cs++ = MI_BATCH_BUFFER_END;
2706        i915_gem_object_flush_map(obj);
2707        i915_gem_object_unpin_map(obj);
2708
2709        rq = intel_context_create_request(ce);
2710        if (IS_ERR(rq))
2711                goto err_obj;
2712
2713        rq->batch = i915_vma_get(vma);
2714        i915_request_get(rq);
2715
2716        i915_vma_lock(vma);
2717        err = i915_request_await_object(rq, vma->obj, false);
2718        if (!err)
2719                err = i915_vma_move_to_active(vma, rq, 0);
2720        if (!err)
2721                err = rq->engine->emit_bb_start(rq,
2722                                                vma->node.start,
2723                                                PAGE_SIZE, 0);
2724        i915_vma_unlock(vma);
2725        i915_request_add(rq);
2726        if (err)
2727                goto err_rq;
2728
2729        i915_gem_object_put(obj);
2730        intel_context_put(ce);
2731
2732        rq->mock.link.next = &(*prev)->mock.link;
2733        *prev = rq;
2734        return 0;
2735
2736err_rq:
2737        i915_vma_put(rq->batch);
2738        i915_request_put(rq);
2739err_obj:
2740        i915_gem_object_put(obj);
2741err_ce:
2742        intel_context_put(ce);
2743        return err;
2744}
2745
2746static int __live_preempt_ring(struct intel_engine_cs *engine,
2747                               struct igt_spinner *spin,
2748                               int queue_sz, int ring_sz)
2749{
2750        struct intel_context *ce[2] = {};
2751        struct i915_request *rq;
2752        struct igt_live_test t;
2753        int err = 0;
2754        int n;
2755
2756        if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2757                return -EIO;
2758
2759        for (n = 0; n < ARRAY_SIZE(ce); n++) {
2760                struct intel_context *tmp;
2761
2762                tmp = intel_context_create(engine);
2763                if (IS_ERR(tmp)) {
2764                        err = PTR_ERR(tmp);
2765                        goto err_ce;
2766                }
2767
2768                tmp->ring = __intel_context_ring_size(ring_sz);
2769
2770                err = intel_context_pin(tmp);
2771                if (err) {
2772                        intel_context_put(tmp);
2773                        goto err_ce;
2774                }
2775
2776                memset32(tmp->ring->vaddr,
2777                         0xdeadbeef, /* trigger a hang if executed */
2778                         tmp->ring->vma->size / sizeof(u32));
2779
2780                ce[n] = tmp;
2781        }
2782
2783        rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2784        if (IS_ERR(rq)) {
2785                err = PTR_ERR(rq);
2786                goto err_ce;
2787        }
2788
2789        i915_request_get(rq);
2790        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2791        i915_request_add(rq);
2792
2793        if (!igt_wait_for_spinner(spin, rq)) {
2794                intel_gt_set_wedged(engine->gt);
2795                i915_request_put(rq);
2796                err = -ETIME;
2797                goto err_ce;
2798        }
2799
2800        /* Fill the ring, until we will cause a wrap */
2801        n = 0;
2802        while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2803                struct i915_request *tmp;
2804
2805                tmp = intel_context_create_request(ce[0]);
2806                if (IS_ERR(tmp)) {
2807                        err = PTR_ERR(tmp);
2808                        i915_request_put(rq);
2809                        goto err_ce;
2810                }
2811
2812                i915_request_add(tmp);
2813                intel_engine_flush_submission(engine);
2814                n++;
2815        }
2816        intel_engine_flush_submission(engine);
2817        pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818                 engine->name, queue_sz, n,
2819                 ce[0]->ring->size,
2820                 ce[0]->ring->tail,
2821                 ce[0]->ring->emit,
2822                 rq->tail);
2823        i915_request_put(rq);
2824
2825        /* Create a second request to preempt the first ring */
2826        rq = intel_context_create_request(ce[1]);
2827        if (IS_ERR(rq)) {
2828                err = PTR_ERR(rq);
2829                goto err_ce;
2830        }
2831
2832        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2833        i915_request_get(rq);
2834        i915_request_add(rq);
2835
2836        err = wait_for_submit(engine, rq, HZ / 2);
2837        i915_request_put(rq);
2838        if (err) {
2839                pr_err("%s: preemption request was not submited\n",
2840                       engine->name);
2841                err = -ETIME;
2842        }
2843
2844        pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2845                 engine->name,
2846                 ce[0]->ring->tail, ce[0]->ring->emit,
2847                 ce[1]->ring->tail, ce[1]->ring->emit);
2848
2849err_ce:
2850        intel_engine_flush_submission(engine);
2851        igt_spinner_end(spin);
2852        for (n = 0; n < ARRAY_SIZE(ce); n++) {
2853                if (IS_ERR_OR_NULL(ce[n]))
2854                        break;
2855
2856                intel_context_unpin(ce[n]);
2857                intel_context_put(ce[n]);
2858        }
2859        if (igt_live_test_end(&t))
2860                err = -EIO;
2861        return err;
2862}
2863
2864static int live_preempt_ring(void *arg)
2865{
2866        struct intel_gt *gt = arg;
2867        struct intel_engine_cs *engine;
2868        struct igt_spinner spin;
2869        enum intel_engine_id id;
2870        int err = 0;
2871
2872        /*
2873         * Check that we rollback large chunks of a ring in order to do a
2874         * preemption event. Similar to live_unlite_ring, but looking at
2875         * ring size rather than the impact of intel_ring_direction().
2876         */
2877
2878        if (igt_spinner_init(&spin, gt))
2879                return -ENOMEM;
2880
2881        for_each_engine(engine, gt, id) {
2882                int n;
2883
2884                if (!intel_engine_has_preemption(engine))
2885                        continue;
2886
2887                if (!intel_engine_can_store_dword(engine))
2888                        continue;
2889
2890                st_engine_heartbeat_disable(engine);
2891
2892                for (n = 0; n <= 3; n++) {
2893                        err = __live_preempt_ring(engine, &spin,
2894                                                  n * SZ_4K / 4, SZ_4K);
2895                        if (err)
2896                                break;
2897                }
2898
2899                st_engine_heartbeat_enable(engine);
2900                if (err)
2901                        break;
2902        }
2903
2904        igt_spinner_fini(&spin);
2905        return err;
2906}
2907
2908static int live_preempt_gang(void *arg)
2909{
2910        struct intel_gt *gt = arg;
2911        struct intel_engine_cs *engine;
2912        enum intel_engine_id id;
2913
2914        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2915                return 0;
2916
2917        /*
2918         * Build as long a chain of preempters as we can, with each
2919         * request higher priority than the last. Once we are ready, we release
2920         * the last batch which then precolates down the chain, each releasing
2921         * the next oldest in turn. The intent is to simply push as hard as we
2922         * can with the number of preemptions, trying to exceed narrow HW
2923         * limits. At a minimum, we insist that we can sort all the user
2924         * high priority levels into execution order.
2925         */
2926
2927        for_each_engine(engine, gt, id) {
2928                struct i915_request *rq = NULL;
2929                struct igt_live_test t;
2930                IGT_TIMEOUT(end_time);
2931                int prio = 0;
2932                int err = 0;
2933                u32 *cs;
2934
2935                if (!intel_engine_has_preemption(engine))
2936                        continue;
2937
2938                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2939                        return -EIO;
2940
2941                do {
2942                        struct i915_sched_attr attr = {
2943                                .priority = I915_USER_PRIORITY(prio++),
2944                        };
2945
2946                        err = create_gang(engine, &rq);
2947                        if (err)
2948                                break;
2949
2950                        /* Submit each spinner at increasing priority */
2951                        engine->schedule(rq, &attr);
2952                } while (prio <= I915_PRIORITY_MAX &&
2953                         !__igt_timeout(end_time, NULL));
2954                pr_debug("%s: Preempt chain of %d requests\n",
2955                         engine->name, prio);
2956
2957                /*
2958                 * Such that the last spinner is the highest priority and
2959                 * should execute first. When that spinner completes,
2960                 * it will terminate the next lowest spinner until there
2961                 * are no more spinners and the gang is complete.
2962                 */
2963                cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2964                if (!IS_ERR(cs)) {
2965                        *cs = 0;
2966                        i915_gem_object_unpin_map(rq->batch->obj);
2967                } else {
2968                        err = PTR_ERR(cs);
2969                        intel_gt_set_wedged(gt);
2970                }
2971
2972                while (rq) { /* wait for each rq from highest to lowest prio */
2973                        struct i915_request *n = list_next_entry(rq, mock.link);
2974
2975                        if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2976                                struct drm_printer p =
2977                                        drm_info_printer(engine->i915->drm.dev);
2978
2979                                pr_err("Failed to flush chain of %d requests, at %d\n",
2980                                       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2981                                intel_engine_dump(engine, &p,
2982                                                  "%s\n", engine->name);
2983
2984                                err = -ETIME;
2985                        }
2986
2987                        i915_vma_put(rq->batch);
2988                        i915_request_put(rq);
2989                        rq = n;
2990                }
2991
2992                if (igt_live_test_end(&t))
2993                        err = -EIO;
2994                if (err)
2995                        return err;
2996        }
2997
2998        return 0;
2999}
3000
3001static struct i915_vma *
3002create_gpr_user(struct intel_engine_cs *engine,
3003                struct i915_vma *result,
3004                unsigned int offset)
3005{
3006        struct drm_i915_gem_object *obj;
3007        struct i915_vma *vma;
3008        u32 *cs;
3009        int err;
3010        int i;
3011
3012        obj = i915_gem_object_create_internal(engine->i915, 4096);
3013        if (IS_ERR(obj))
3014                return ERR_CAST(obj);
3015
3016        vma = i915_vma_instance(obj, result->vm, NULL);
3017        if (IS_ERR(vma)) {
3018                i915_gem_object_put(obj);
3019                return vma;
3020        }
3021
3022        err = i915_vma_pin(vma, 0, 0, PIN_USER);
3023        if (err) {
3024                i915_vma_put(vma);
3025                return ERR_PTR(err);
3026        }
3027
3028        cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3029        if (IS_ERR(cs)) {
3030                i915_vma_put(vma);
3031                return ERR_CAST(cs);
3032        }
3033
3034        /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035        *cs++ = MI_LOAD_REGISTER_IMM(1);
3036        *cs++ = CS_GPR(engine, 0);
3037        *cs++ = 1;
3038
3039        for (i = 1; i < NUM_GPR; i++) {
3040                u64 addr;
3041
3042                /*
3043                 * Perform: GPR[i]++
3044                 *
3045                 * As we read and write into the context saved GPR[i], if
3046                 * we restart this batch buffer from an earlier point, we
3047                 * will repeat the increment and store a value > 1.
3048                 */
3049                *cs++ = MI_MATH(4);
3050                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3051                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3052                *cs++ = MI_MATH_ADD;
3053                *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3054
3055                addr = result->node.start + offset + i * sizeof(*cs);
3056                *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3057                *cs++ = CS_GPR(engine, 2 * i);
3058                *cs++ = lower_32_bits(addr);
3059                *cs++ = upper_32_bits(addr);
3060
3061                *cs++ = MI_SEMAPHORE_WAIT |
3062                        MI_SEMAPHORE_POLL |
3063                        MI_SEMAPHORE_SAD_GTE_SDD;
3064                *cs++ = i;
3065                *cs++ = lower_32_bits(result->node.start);
3066                *cs++ = upper_32_bits(result->node.start);
3067        }
3068
3069        *cs++ = MI_BATCH_BUFFER_END;
3070        i915_gem_object_flush_map(obj);
3071        i915_gem_object_unpin_map(obj);
3072
3073        return vma;
3074}
3075
3076static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3077{
3078        struct drm_i915_gem_object *obj;
3079        struct i915_vma *vma;
3080        int err;
3081
3082        obj = i915_gem_object_create_internal(gt->i915, sz);
3083        if (IS_ERR(obj))
3084                return ERR_CAST(obj);
3085
3086        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3087        if (IS_ERR(vma)) {
3088                i915_gem_object_put(obj);
3089                return vma;
3090        }
3091
3092        err = i915_ggtt_pin(vma, NULL, 0, 0);
3093        if (err) {
3094                i915_vma_put(vma);
3095                return ERR_PTR(err);
3096        }
3097
3098        return vma;
3099}
3100
3101static struct i915_request *
3102create_gpr_client(struct intel_engine_cs *engine,
3103                  struct i915_vma *global,
3104                  unsigned int offset)
3105{
3106        struct i915_vma *batch, *vma;
3107        struct intel_context *ce;
3108        struct i915_request *rq;
3109        int err;
3110
3111        ce = intel_context_create(engine);
3112        if (IS_ERR(ce))
3113                return ERR_CAST(ce);
3114
3115        vma = i915_vma_instance(global->obj, ce->vm, NULL);
3116        if (IS_ERR(vma)) {
3117                err = PTR_ERR(vma);
3118                goto out_ce;
3119        }
3120
3121        err = i915_vma_pin(vma, 0, 0, PIN_USER);
3122        if (err)
3123                goto out_ce;
3124
3125        batch = create_gpr_user(engine, vma, offset);
3126        if (IS_ERR(batch)) {
3127                err = PTR_ERR(batch);
3128                goto out_vma;
3129        }
3130
3131        rq = intel_context_create_request(ce);
3132        if (IS_ERR(rq)) {
3133                err = PTR_ERR(rq);
3134                goto out_batch;
3135        }
3136
3137        i915_vma_lock(vma);
3138        err = i915_request_await_object(rq, vma->obj, false);
3139        if (!err)
3140                err = i915_vma_move_to_active(vma, rq, 0);
3141        i915_vma_unlock(vma);
3142
3143        i915_vma_lock(batch);
3144        if (!err)
3145                err = i915_request_await_object(rq, batch->obj, false);
3146        if (!err)
3147                err = i915_vma_move_to_active(batch, rq, 0);
3148        if (!err)
3149                err = rq->engine->emit_bb_start(rq,
3150                                                batch->node.start,
3151                                                PAGE_SIZE, 0);
3152        i915_vma_unlock(batch);
3153        i915_vma_unpin(batch);
3154
3155        if (!err)
3156                i915_request_get(rq);
3157        i915_request_add(rq);
3158
3159out_batch:
3160        i915_vma_put(batch);
3161out_vma:
3162        i915_vma_unpin(vma);
3163out_ce:
3164        intel_context_put(ce);
3165        return err ? ERR_PTR(err) : rq;
3166}
3167
3168static int preempt_user(struct intel_engine_cs *engine,
3169                        struct i915_vma *global,
3170                        int id)
3171{
3172        struct i915_sched_attr attr = {
3173                .priority = I915_PRIORITY_MAX
3174        };
3175        struct i915_request *rq;
3176        int err = 0;
3177        u32 *cs;
3178
3179        rq = intel_engine_create_kernel_request(engine);
3180        if (IS_ERR(rq))
3181                return PTR_ERR(rq);
3182
3183        cs = intel_ring_begin(rq, 4);
3184        if (IS_ERR(cs)) {
3185                i915_request_add(rq);
3186                return PTR_ERR(cs);
3187        }
3188
3189        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3190        *cs++ = i915_ggtt_offset(global);
3191        *cs++ = 0;
3192        *cs++ = id;
3193
3194        intel_ring_advance(rq, cs);
3195
3196        i915_request_get(rq);
3197        i915_request_add(rq);
3198
3199        engine->schedule(rq, &attr);
3200
3201        if (i915_request_wait(rq, 0, HZ / 2) < 0)
3202                err = -ETIME;
3203        i915_request_put(rq);
3204
3205        return err;
3206}
3207
3208static int live_preempt_user(void *arg)
3209{
3210        struct intel_gt *gt = arg;
3211        struct intel_engine_cs *engine;
3212        struct i915_vma *global;
3213        enum intel_engine_id id;
3214        u32 *result;
3215        int err = 0;
3216
3217        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3218                return 0;
3219
3220        /*
3221         * In our other tests, we look at preemption in carefully
3222         * controlled conditions in the ringbuffer. Since most of the
3223         * time is spent in user batches, most of our preemptions naturally
3224         * occur there. We want to verify that when we preempt inside a batch
3225         * we continue on from the current instruction and do not roll back
3226         * to the start, or another earlier arbitration point.
3227         *
3228         * To verify this, we create a batch which is a mixture of
3229         * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230         * a few preempting contexts thrown into the mix, we look for any
3231         * repeated instructions (which show up as incorrect values).
3232         */
3233
3234        global = create_global(gt, 4096);
3235        if (IS_ERR(global))
3236                return PTR_ERR(global);
3237
3238        result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3239        if (IS_ERR(result)) {
3240                i915_vma_unpin_and_release(&global, 0);
3241                return PTR_ERR(result);
3242        }
3243
3244        for_each_engine(engine, gt, id) {
3245                struct i915_request *client[3] = {};
3246                struct igt_live_test t;
3247                int i;
3248
3249                if (!intel_engine_has_preemption(engine))
3250                        continue;
3251
3252                if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3253                        continue; /* we need per-context GPR */
3254
3255                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3256                        err = -EIO;
3257                        break;
3258                }
3259
3260                memset(result, 0, 4096);
3261
3262                for (i = 0; i < ARRAY_SIZE(client); i++) {
3263                        struct i915_request *rq;
3264
3265                        rq = create_gpr_client(engine, global,
3266                                               NUM_GPR * i * sizeof(u32));
3267                        if (IS_ERR(rq))
3268                                goto end_test;
3269
3270                        client[i] = rq;
3271                }
3272
3273                /* Continuously preempt the set of 3 running contexts */
3274                for (i = 1; i <= NUM_GPR; i++) {
3275                        err = preempt_user(engine, global, i);
3276                        if (err)
3277                                goto end_test;
3278                }
3279
3280                if (READ_ONCE(result[0]) != NUM_GPR) {
3281                        pr_err("%s: Failed to release semaphore\n",
3282                               engine->name);
3283                        err = -EIO;
3284                        goto end_test;
3285                }
3286
3287                for (i = 0; i < ARRAY_SIZE(client); i++) {
3288                        int gpr;
3289
3290                        if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3291                                err = -ETIME;
3292                                goto end_test;
3293                        }
3294
3295                        for (gpr = 1; gpr < NUM_GPR; gpr++) {
3296                                if (result[NUM_GPR * i + gpr] != 1) {
3297                                        pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3298                                               engine->name,
3299                                               i, gpr, result[NUM_GPR * i + gpr]);
3300                                        err = -EINVAL;
3301                                        goto end_test;
3302                                }
3303                        }
3304                }
3305
3306end_test:
3307                for (i = 0; i < ARRAY_SIZE(client); i++) {
3308                        if (!client[i])
3309                                break;
3310
3311                        i915_request_put(client[i]);
3312                }
3313
3314                /* Flush the semaphores on error */
3315                smp_store_mb(result[0], -1);
3316                if (igt_live_test_end(&t))
3317                        err = -EIO;
3318                if (err)
3319                        break;
3320        }
3321
3322        i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3323        return err;
3324}
3325
3326static int live_preempt_timeout(void *arg)
3327{
3328        struct intel_gt *gt = arg;
3329        struct i915_gem_context *ctx_hi, *ctx_lo;
3330        struct igt_spinner spin_lo;
3331        struct intel_engine_cs *engine;
3332        enum intel_engine_id id;
3333        int err = -ENOMEM;
3334
3335        /*
3336         * Check that we force preemption to occur by cancelling the previous
3337         * context if it refuses to yield the GPU.
3338         */
3339        if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3340                return 0;
3341
3342        if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3343                return 0;
3344
3345        if (!intel_has_reset_engine(gt))
3346                return 0;
3347
3348        if (igt_spinner_init(&spin_lo, gt))
3349                return -ENOMEM;
3350
3351        ctx_hi = kernel_context(gt->i915);
3352        if (!ctx_hi)
3353                goto err_spin_lo;
3354        ctx_hi->sched.priority =
3355                I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3356
3357        ctx_lo = kernel_context(gt->i915);
3358        if (!ctx_lo)
3359                goto err_ctx_hi;
3360        ctx_lo->sched.priority =
3361                I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3362
3363        for_each_engine(engine, gt, id) {
3364                unsigned long saved_timeout;
3365                struct i915_request *rq;
3366
3367                if (!intel_engine_has_preemption(engine))
3368                        continue;
3369
3370                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3371                                            MI_NOOP); /* preemption disabled */
3372                if (IS_ERR(rq)) {
3373                        err = PTR_ERR(rq);
3374                        goto err_ctx_lo;
3375                }
3376
3377                i915_request_add(rq);
3378                if (!igt_wait_for_spinner(&spin_lo, rq)) {
3379                        intel_gt_set_wedged(gt);
3380                        err = -EIO;
3381                        goto err_ctx_lo;
3382                }
3383
3384                rq = igt_request_alloc(ctx_hi, engine);
3385                if (IS_ERR(rq)) {
3386                        igt_spinner_end(&spin_lo);
3387                        err = PTR_ERR(rq);
3388                        goto err_ctx_lo;
3389                }
3390
3391                /* Flush the previous CS ack before changing timeouts */
3392                while (READ_ONCE(engine->execlists.pending[0]))
3393                        cpu_relax();
3394
3395                saved_timeout = engine->props.preempt_timeout_ms;
3396                engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3397
3398                i915_request_get(rq);
3399                i915_request_add(rq);
3400
3401                intel_engine_flush_submission(engine);
3402                engine->props.preempt_timeout_ms = saved_timeout;
3403
3404                if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3405                        intel_gt_set_wedged(gt);
3406                        i915_request_put(rq);
3407                        err = -ETIME;
3408                        goto err_ctx_lo;
3409                }
3410
3411                igt_spinner_end(&spin_lo);
3412                i915_request_put(rq);
3413        }
3414
3415        err = 0;
3416err_ctx_lo:
3417        kernel_context_close(ctx_lo);
3418err_ctx_hi:
3419        kernel_context_close(ctx_hi);
3420err_spin_lo:
3421        igt_spinner_fini(&spin_lo);
3422        return err;
3423}
3424
3425static int random_range(struct rnd_state *rnd, int min, int max)
3426{
3427        return i915_prandom_u32_max_state(max - min, rnd) + min;
3428}
3429
3430static int random_priority(struct rnd_state *rnd)
3431{
3432        return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3433}
3434
3435struct preempt_smoke {
3436        struct intel_gt *gt;
3437        struct i915_gem_context **contexts;
3438        struct intel_engine_cs *engine;
3439        struct drm_i915_gem_object *batch;
3440        unsigned int ncontext;
3441        struct rnd_state prng;
3442        unsigned long count;
3443};
3444
3445static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3446{
3447        return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3448                                                          &smoke->prng)];
3449}
3450
3451static int smoke_submit(struct preempt_smoke *smoke,
3452                        struct i915_gem_context *ctx, int prio,
3453                        struct drm_i915_gem_object *batch)
3454{
3455        struct i915_request *rq;
3456        struct i915_vma *vma = NULL;
3457        int err = 0;
3458
3459        if (batch) {
3460                struct i915_address_space *vm;
3461
3462                vm = i915_gem_context_get_vm_rcu(ctx);
3463                vma = i915_vma_instance(batch, vm, NULL);
3464                i915_vm_put(vm);
3465                if (IS_ERR(vma))
3466                        return PTR_ERR(vma);
3467
3468                err = i915_vma_pin(vma, 0, 0, PIN_USER);
3469                if (err)
3470                        return err;
3471        }
3472
3473        ctx->sched.priority = prio;
3474
3475        rq = igt_request_alloc(ctx, smoke->engine);
3476        if (IS_ERR(rq)) {
3477                err = PTR_ERR(rq);
3478                goto unpin;
3479        }
3480
3481        if (vma) {
3482                i915_vma_lock(vma);
3483                err = i915_request_await_object(rq, vma->obj, false);
3484                if (!err)
3485                        err = i915_vma_move_to_active(vma, rq, 0);
3486                if (!err)
3487                        err = rq->engine->emit_bb_start(rq,
3488                                                        vma->node.start,
3489                                                        PAGE_SIZE, 0);
3490                i915_vma_unlock(vma);
3491        }
3492
3493        i915_request_add(rq);
3494
3495unpin:
3496        if (vma)
3497                i915_vma_unpin(vma);
3498
3499        return err;
3500}
3501
3502static int smoke_crescendo_thread(void *arg)
3503{
3504        struct preempt_smoke *smoke = arg;
3505        IGT_TIMEOUT(end_time);
3506        unsigned long count;
3507
3508        count = 0;
3509        do {
3510                struct i915_gem_context *ctx = smoke_context(smoke);
3511                int err;
3512
3513                err = smoke_submit(smoke,
3514                                   ctx, count % I915_PRIORITY_MAX,
3515                                   smoke->batch);
3516                if (err)
3517                        return err;
3518
3519                count++;
3520        } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3521
3522        smoke->count = count;
3523        return 0;
3524}
3525
3526static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3527#define BATCH BIT(0)
3528{
3529        struct task_struct *tsk[I915_NUM_ENGINES] = {};
3530        struct preempt_smoke arg[I915_NUM_ENGINES];
3531        struct intel_engine_cs *engine;
3532        enum intel_engine_id id;
3533        unsigned long count;
3534        int err = 0;
3535
3536        for_each_engine(engine, smoke->gt, id) {
3537                arg[id] = *smoke;
3538                arg[id].engine = engine;
3539                if (!(flags & BATCH))
3540                        arg[id].batch = NULL;
3541                arg[id].count = 0;
3542
3543                tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3544                                      "igt/smoke:%d", id);
3545                if (IS_ERR(tsk[id])) {
3546                        err = PTR_ERR(tsk[id]);
3547                        break;
3548                }
3549                get_task_struct(tsk[id]);
3550        }
3551
3552        yield(); /* start all threads before we kthread_stop() */
3553
3554        count = 0;
3555        for_each_engine(engine, smoke->gt, id) {
3556                int status;
3557
3558                if (IS_ERR_OR_NULL(tsk[id]))
3559                        continue;
3560
3561                status = kthread_stop(tsk[id]);
3562                if (status && !err)
3563                        err = status;
3564
3565                count += arg[id].count;
3566
3567                put_task_struct(tsk[id]);
3568        }
3569
3570        pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571                count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3572        return 0;
3573}
3574
3575static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3576{
3577        enum intel_engine_id id;
3578        IGT_TIMEOUT(end_time);
3579        unsigned long count;
3580
3581        count = 0;
3582        do {
3583                for_each_engine(smoke->engine, smoke->gt, id) {
3584                        struct i915_gem_context *ctx = smoke_context(smoke);
3585                        int err;
3586
3587                        err = smoke_submit(smoke,
3588                                           ctx, random_priority(&smoke->prng),
3589                                           flags & BATCH ? smoke->batch : NULL);
3590                        if (err)
3591                                return err;
3592
3593                        count++;
3594                }
3595        } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3596
3597        pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598                count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3599        return 0;
3600}
3601
3602static int live_preempt_smoke(void *arg)
3603{
3604        struct preempt_smoke smoke = {
3605                .gt = arg,
3606                .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3607                .ncontext = 256,
3608        };
3609        const unsigned int phase[] = { 0, BATCH };
3610        struct igt_live_test t;
3611        int err = -ENOMEM;
3612        u32 *cs;
3613        int n;
3614
3615        if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3616                return 0;
3617
3618        smoke.contexts = kmalloc_array(smoke.ncontext,
3619                                       sizeof(*smoke.contexts),
3620                                       GFP_KERNEL);
3621        if (!smoke.contexts)
3622                return -ENOMEM;
3623
3624        smoke.batch =
3625                i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3626        if (IS_ERR(smoke.batch)) {
3627                err = PTR_ERR(smoke.batch);
3628                goto err_free;
3629        }
3630
3631        cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3632        if (IS_ERR(cs)) {
3633                err = PTR_ERR(cs);
3634                goto err_batch;
3635        }
3636        for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3637                cs[n] = MI_ARB_CHECK;
3638        cs[n] = MI_BATCH_BUFFER_END;
3639        i915_gem_object_flush_map(smoke.batch);
3640        i915_gem_object_unpin_map(smoke.batch);
3641
3642        if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3643                err = -EIO;
3644                goto err_batch;
3645        }
3646
3647        for (n = 0; n < smoke.ncontext; n++) {
3648                smoke.contexts[n] = kernel_context(smoke.gt->i915);
3649                if (!smoke.contexts[n])
3650                        goto err_ctx;
3651        }
3652
3653        for (n = 0; n < ARRAY_SIZE(phase); n++) {
3654                err = smoke_crescendo(&smoke, phase[n]);
3655                if (err)
3656                        goto err_ctx;
3657
3658                err = smoke_random(&smoke, phase[n]);
3659                if (err)
3660                        goto err_ctx;
3661        }
3662
3663err_ctx:
3664        if (igt_live_test_end(&t))
3665                err = -EIO;
3666
3667        for (n = 0; n < smoke.ncontext; n++) {
3668                if (!smoke.contexts[n])
3669                        break;
3670                kernel_context_close(smoke.contexts[n]);
3671        }
3672
3673err_batch:
3674        i915_gem_object_put(smoke.batch);
3675err_free:
3676        kfree(smoke.contexts);
3677
3678        return err;
3679}
3680
3681static int nop_virtual_engine(struct intel_gt *gt,
3682                              struct intel_engine_cs **siblings,
3683                              unsigned int nsibling,
3684                              unsigned int nctx,
3685                              unsigned int flags)
3686#define CHAIN BIT(0)
3687{
3688        IGT_TIMEOUT(end_time);
3689        struct i915_request *request[16] = {};
3690        struct intel_context *ve[16];
3691        unsigned long n, prime, nc;
3692        struct igt_live_test t;
3693        ktime_t times[2] = {};
3694        int err;
3695
3696        GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3697
3698        for (n = 0; n < nctx; n++) {
3699                ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3700                if (IS_ERR(ve[n])) {
3701                        err = PTR_ERR(ve[n]);
3702                        nctx = n;
3703                        goto out;
3704                }
3705
3706                err = intel_context_pin(ve[n]);
3707                if (err) {
3708                        intel_context_put(ve[n]);
3709                        nctx = n;
3710                        goto out;
3711                }
3712        }
3713
3714        err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3715        if (err)
3716                goto out;
3717
3718        for_each_prime_number_from(prime, 1, 8192) {
3719                times[1] = ktime_get_raw();
3720
3721                if (flags & CHAIN) {
3722                        for (nc = 0; nc < nctx; nc++) {
3723                                for (n = 0; n < prime; n++) {
3724                                        struct i915_request *rq;
3725
3726                                        rq = i915_request_create(ve[nc]);
3727                                        if (IS_ERR(rq)) {
3728                                                err = PTR_ERR(rq);
3729                                                goto out;
3730                                        }
3731
3732                                        if (request[nc])
3733                                                i915_request_put(request[nc]);
3734                                        request[nc] = i915_request_get(rq);
3735                                        i915_request_add(rq);
3736                                }
3737                        }
3738                } else {
3739                        for (n = 0; n < prime; n++) {
3740                                for (nc = 0; nc < nctx; nc++) {
3741                                        struct i915_request *rq;
3742
3743                                        rq = i915_request_create(ve[nc]);
3744                                        if (IS_ERR(rq)) {
3745                                                err = PTR_ERR(rq);
3746                                                goto out;
3747                                        }
3748
3749                                        if (request[nc])
3750                                                i915_request_put(request[nc]);
3751                                        request[nc] = i915_request_get(rq);
3752                                        i915_request_add(rq);
3753                                }
3754                        }
3755                }
3756
3757                for (nc = 0; nc < nctx; nc++) {
3758                        if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3759                                pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760                                       __func__, ve[0]->engine->name,
3761                                       request[nc]->fence.context,
3762                                       request[nc]->fence.seqno);
3763
3764                                GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765                                          __func__, ve[0]->engine->name,
3766                                          request[nc]->fence.context,
3767                                          request[nc]->fence.seqno);
3768                                GEM_TRACE_DUMP();
3769                                intel_gt_set_wedged(gt);
3770                                break;
3771                        }
3772                }
3773
3774                times[1] = ktime_sub(ktime_get_raw(), times[1]);
3775                if (prime == 1)
3776                        times[0] = times[1];
3777
3778                for (nc = 0; nc < nctx; nc++) {
3779                        i915_request_put(request[nc]);
3780                        request[nc] = NULL;
3781                }
3782
3783                if (__igt_timeout(end_time, NULL))
3784                        break;
3785        }
3786
3787        err = igt_live_test_end(&t);
3788        if (err)
3789                goto out;
3790
3791        pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792                nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3793                prime, div64_u64(ktime_to_ns(times[1]), prime));
3794
3795out:
3796        if (igt_flush_test(gt->i915))
3797                err = -EIO;
3798
3799        for (nc = 0; nc < nctx; nc++) {
3800                i915_request_put(request[nc]);
3801                intel_context_unpin(ve[nc]);
3802                intel_context_put(ve[nc]);
3803        }
3804        return err;
3805}
3806
3807static unsigned int
3808__select_siblings(struct intel_gt *gt,
3809                  unsigned int class,
3810                  struct intel_engine_cs **siblings,
3811                  bool (*filter)(const struct intel_engine_cs *))
3812{
3813        unsigned int n = 0;
3814        unsigned int inst;
3815
3816        for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3817                if (!gt->engine_class[class][inst])
3818                        continue;
3819
3820                if (filter && !filter(gt->engine_class[class][inst]))
3821                        continue;
3822
3823                siblings[n++] = gt->engine_class[class][inst];
3824        }
3825
3826        return n;
3827}
3828
3829static unsigned int
3830select_siblings(struct intel_gt *gt,
3831                unsigned int class,
3832                struct intel_engine_cs **siblings)
3833{
3834        return __select_siblings(gt, class, siblings, NULL);
3835}
3836
3837static int live_virtual_engine(void *arg)
3838{
3839        struct intel_gt *gt = arg;
3840        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3841        struct intel_engine_cs *engine;
3842        enum intel_engine_id id;
3843        unsigned int class;
3844        int err;
3845
3846        if (intel_uc_uses_guc_submission(&gt->uc))
3847                return 0;
3848
3849        for_each_engine(engine, gt, id) {
3850                err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3851                if (err) {
3852                        pr_err("Failed to wrap engine %s: err=%d\n",
3853                               engine->name, err);
3854                        return err;
3855                }
3856        }
3857
3858        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3859                int nsibling, n;
3860
3861                nsibling = select_siblings(gt, class, siblings);
3862                if (nsibling < 2)
3863                        continue;
3864
3865                for (n = 1; n <= nsibling + 1; n++) {
3866                        err = nop_virtual_engine(gt, siblings, nsibling,
3867                                                 n, 0);
3868                        if (err)
3869                                return err;
3870                }
3871
3872                err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3873                if (err)
3874                        return err;
3875        }
3876
3877        return 0;
3878}
3879
3880static int mask_virtual_engine(struct intel_gt *gt,
3881                               struct intel_engine_cs **siblings,
3882                               unsigned int nsibling)
3883{
3884        struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3885        struct intel_context *ve;
3886        struct igt_live_test t;
3887        unsigned int n;
3888        int err;
3889
3890        /*
3891         * Check that by setting the execution mask on a request, we can
3892         * restrict it to our desired engine within the virtual engine.
3893         */
3894
3895        ve = intel_execlists_create_virtual(siblings, nsibling);
3896        if (IS_ERR(ve)) {
3897                err = PTR_ERR(ve);
3898                goto out_close;
3899        }
3900
3901        err = intel_context_pin(ve);
3902        if (err)
3903                goto out_put;
3904
3905        err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906        if (err)
3907                goto out_unpin;
3908
3909        for (n = 0; n < nsibling; n++) {
3910                request[n] = i915_request_create(ve);
3911                if (IS_ERR(request[n])) {
3912                        err = PTR_ERR(request[n]);
3913                        nsibling = n;
3914                        goto out;
3915                }
3916
3917                /* Reverse order as it's more likely to be unnatural */
3918                request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3919
3920                i915_request_get(request[n]);
3921                i915_request_add(request[n]);
3922        }
3923
3924        for (n = 0; n < nsibling; n++) {
3925                if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3926                        pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927                               __func__, ve->engine->name,
3928                               request[n]->fence.context,
3929                               request[n]->fence.seqno);
3930
3931                        GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932                                  __func__, ve->engine->name,
3933                                  request[n]->fence.context,
3934                                  request[n]->fence.seqno);
3935                        GEM_TRACE_DUMP();
3936                        intel_gt_set_wedged(gt);
3937                        err = -EIO;
3938                        goto out;
3939                }
3940
3941                if (request[n]->engine != siblings[nsibling - n - 1]) {
3942                        pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943                               request[n]->engine->name,
3944                               siblings[nsibling - n - 1]->name);
3945                        err = -EINVAL;
3946                        goto out;
3947                }
3948        }
3949
3950        err = igt_live_test_end(&t);
3951out:
3952        if (igt_flush_test(gt->i915))
3953                err = -EIO;
3954
3955        for (n = 0; n < nsibling; n++)
3956                i915_request_put(request[n]);
3957
3958out_unpin:
3959        intel_context_unpin(ve);
3960out_put:
3961        intel_context_put(ve);
3962out_close:
3963        return err;
3964}
3965
3966static int live_virtual_mask(void *arg)
3967{
3968        struct intel_gt *gt = arg;
3969        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3970        unsigned int class;
3971        int err;
3972
3973        if (intel_uc_uses_guc_submission(&gt->uc))
3974                return 0;
3975
3976        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3977                unsigned int nsibling;
3978
3979                nsibling = select_siblings(gt, class, siblings);
3980                if (nsibling < 2)
3981                        continue;
3982
3983                err = mask_virtual_engine(gt, siblings, nsibling);
3984                if (err)
3985                        return err;
3986        }
3987
3988        return 0;
3989}
3990
3991static int slicein_virtual_engine(struct intel_gt *gt,
3992                                  struct intel_engine_cs **siblings,
3993                                  unsigned int nsibling)
3994{
3995        const long timeout = slice_timeout(siblings[0]);
3996        struct intel_context *ce;
3997        struct i915_request *rq;
3998        struct igt_spinner spin;
3999        unsigned int n;
4000        int err = 0;
4001
4002        /*
4003         * Virtual requests must take part in timeslicing on the target engines.
4004         */
4005
4006        if (igt_spinner_init(&spin, gt))
4007                return -ENOMEM;
4008
4009        for (n = 0; n < nsibling; n++) {
4010                ce = intel_context_create(siblings[n]);
4011                if (IS_ERR(ce)) {
4012                        err = PTR_ERR(ce);
4013                        goto out;
4014                }
4015
4016                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4017                intel_context_put(ce);
4018                if (IS_ERR(rq)) {
4019                        err = PTR_ERR(rq);
4020                        goto out;
4021                }
4022
4023                i915_request_add(rq);
4024        }
4025
4026        ce = intel_execlists_create_virtual(siblings, nsibling);
4027        if (IS_ERR(ce)) {
4028                err = PTR_ERR(ce);
4029                goto out;
4030        }
4031
4032        rq = intel_context_create_request(ce);
4033        intel_context_put(ce);
4034        if (IS_ERR(rq)) {
4035                err = PTR_ERR(rq);
4036                goto out;
4037        }
4038
4039        i915_request_get(rq);
4040        i915_request_add(rq);
4041        if (i915_request_wait(rq, 0, timeout) < 0) {
4042                GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043                              __func__, rq->engine->name);
4044                GEM_TRACE_DUMP();
4045                intel_gt_set_wedged(gt);
4046                err = -EIO;
4047        }
4048        i915_request_put(rq);
4049
4050out:
4051        igt_spinner_end(&spin);
4052        if (igt_flush_test(gt->i915))
4053                err = -EIO;
4054        igt_spinner_fini(&spin);
4055        return err;
4056}
4057
4058static int sliceout_virtual_engine(struct intel_gt *gt,
4059                                   struct intel_engine_cs **siblings,
4060                                   unsigned int nsibling)
4061{
4062        const long timeout = slice_timeout(siblings[0]);
4063        struct intel_context *ce;
4064        struct i915_request *rq;
4065        struct igt_spinner spin;
4066        unsigned int n;
4067        int err = 0;
4068
4069        /*
4070         * Virtual requests must allow others a fair timeslice.
4071         */
4072
4073        if (igt_spinner_init(&spin, gt))
4074                return -ENOMEM;
4075
4076        /* XXX We do not handle oversubscription and fairness with normal rq */
4077        for (n = 0; n < nsibling; n++) {
4078                ce = intel_execlists_create_virtual(siblings, nsibling);
4079                if (IS_ERR(ce)) {
4080                        err = PTR_ERR(ce);
4081                        goto out;
4082                }
4083
4084                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4085                intel_context_put(ce);
4086                if (IS_ERR(rq)) {
4087                        err = PTR_ERR(rq);
4088                        goto out;
4089                }
4090
4091                i915_request_add(rq);
4092        }
4093
4094        for (n = 0; !err && n < nsibling; n++) {
4095                ce = intel_context_create(siblings[n]);
4096                if (IS_ERR(ce)) {
4097                        err = PTR_ERR(ce);
4098                        goto out;
4099                }
4100
4101                rq = intel_context_create_request(ce);
4102                intel_context_put(ce);
4103                if (IS_ERR(rq)) {
4104                        err = PTR_ERR(rq);
4105                        goto out;
4106                }
4107
4108                i915_request_get(rq);
4109                i915_request_add(rq);
4110                if (i915_request_wait(rq, 0, timeout) < 0) {
4111                        GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112                                      __func__, siblings[n]->name);
4113                        GEM_TRACE_DUMP();
4114                        intel_gt_set_wedged(gt);
4115                        err = -EIO;
4116                }
4117                i915_request_put(rq);
4118        }
4119
4120out:
4121        igt_spinner_end(&spin);
4122        if (igt_flush_test(gt->i915))
4123                err = -EIO;
4124        igt_spinner_fini(&spin);
4125        return err;
4126}
4127
4128static int live_virtual_slice(void *arg)
4129{
4130        struct intel_gt *gt = arg;
4131        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4132        unsigned int class;
4133        int err;
4134
4135        if (intel_uc_uses_guc_submission(&gt->uc))
4136                return 0;
4137
4138        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4139                unsigned int nsibling;
4140
4141                nsibling = __select_siblings(gt, class, siblings,
4142                                             intel_engine_has_timeslices);
4143                if (nsibling < 2)
4144                        continue;
4145
4146                err = slicein_virtual_engine(gt, siblings, nsibling);
4147                if (err)
4148                        return err;
4149
4150                err = sliceout_virtual_engine(gt, siblings, nsibling);
4151                if (err)
4152                        return err;
4153        }
4154
4155        return 0;
4156}
4157
4158static int preserved_virtual_engine(struct intel_gt *gt,
4159                                    struct intel_engine_cs **siblings,
4160                                    unsigned int nsibling)
4161{
4162        struct i915_request *last = NULL;
4163        struct intel_context *ve;
4164        struct i915_vma *scratch;
4165        struct igt_live_test t;
4166        unsigned int n;
4167        int err = 0;
4168        u32 *cs;
4169
4170        scratch = create_scratch(siblings[0]->gt);
4171        if (IS_ERR(scratch))
4172                return PTR_ERR(scratch);
4173
4174        err = i915_vma_sync(scratch);
4175        if (err)
4176                goto out_scratch;
4177
4178        ve = intel_execlists_create_virtual(siblings, nsibling);
4179        if (IS_ERR(ve)) {
4180                err = PTR_ERR(ve);
4181                goto out_scratch;
4182        }
4183
4184        err = intel_context_pin(ve);
4185        if (err)
4186                goto out_put;
4187
4188        err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4189        if (err)
4190                goto out_unpin;
4191
4192        for (n = 0; n < NUM_GPR_DW; n++) {
4193                struct intel_engine_cs *engine = siblings[n % nsibling];
4194                struct i915_request *rq;
4195
4196                rq = i915_request_create(ve);
4197                if (IS_ERR(rq)) {
4198                        err = PTR_ERR(rq);
4199                        goto out_end;
4200                }
4201
4202                i915_request_put(last);
4203                last = i915_request_get(rq);
4204
4205                cs = intel_ring_begin(rq, 8);
4206                if (IS_ERR(cs)) {
4207                        i915_request_add(rq);
4208                        err = PTR_ERR(cs);
4209                        goto out_end;
4210                }
4211
4212                *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4213                *cs++ = CS_GPR(engine, n);
4214                *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4215                *cs++ = 0;
4216
4217                *cs++ = MI_LOAD_REGISTER_IMM(1);
4218                *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4219                *cs++ = n + 1;
4220
4221                *cs++ = MI_NOOP;
4222                intel_ring_advance(rq, cs);
4223
4224                /* Restrict this request to run on a particular engine */
4225                rq->execution_mask = engine->mask;
4226                i915_request_add(rq);
4227        }
4228
4229        if (i915_request_wait(last, 0, HZ / 5) < 0) {
4230                err = -ETIME;
4231                goto out_end;
4232        }
4233
4234        cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4235        if (IS_ERR(cs)) {
4236                err = PTR_ERR(cs);
4237                goto out_end;
4238        }
4239
4240        for (n = 0; n < NUM_GPR_DW; n++) {
4241                if (cs[n] != n) {
4242                        pr_err("Incorrect value[%d] found for GPR[%d]\n",
4243                               cs[n], n);
4244                        err = -EINVAL;
4245                        break;
4246                }
4247        }
4248
4249        i915_gem_object_unpin_map(scratch->obj);
4250
4251out_end:
4252        if (igt_live_test_end(&t))
4253                err = -EIO;
4254        i915_request_put(last);
4255out_unpin:
4256        intel_context_unpin(ve);
4257out_put:
4258        intel_context_put(ve);
4259out_scratch:
4260        i915_vma_unpin_and_release(&scratch, 0);
4261        return err;
4262}
4263
4264static int live_virtual_preserved(void *arg)
4265{
4266        struct intel_gt *gt = arg;
4267        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4268        unsigned int class;
4269
4270        /*
4271         * Check that the context image retains non-privileged (user) registers
4272         * from one engine to the next. For this we check that the CS_GPR
4273         * are preserved.
4274         */
4275
4276        if (intel_uc_uses_guc_submission(&gt->uc))
4277                return 0;
4278
4279        /* As we use CS_GPR we cannot run before they existed on all engines. */
4280        if (INTEL_GEN(gt->i915) < 9)
4281                return 0;
4282
4283        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4284                int nsibling, err;
4285
4286                nsibling = select_siblings(gt, class, siblings);
4287                if (nsibling < 2)
4288                        continue;
4289
4290                err = preserved_virtual_engine(gt, siblings, nsibling);
4291                if (err)
4292                        return err;
4293        }
4294
4295        return 0;
4296}
4297
4298static int bond_virtual_engine(struct intel_gt *gt,
4299                               unsigned int class,
4300                               struct intel_engine_cs **siblings,
4301                               unsigned int nsibling,
4302                               unsigned int flags)
4303#define BOND_SCHEDULE BIT(0)
4304{
4305        struct intel_engine_cs *master;
4306        struct i915_request *rq[16];
4307        enum intel_engine_id id;
4308        struct igt_spinner spin;
4309        unsigned long n;
4310        int err;
4311
4312        /*
4313         * A set of bonded requests is intended to be run concurrently
4314         * across a number of engines. We use one request per-engine
4315         * and a magic fence to schedule each of the bonded requests
4316         * at the same time. A consequence of our current scheduler is that
4317         * we only move requests to the HW ready queue when the request
4318         * becomes ready, that is when all of its prerequisite fences have
4319         * been signaled. As one of those fences is the master submit fence,
4320         * there is a delay on all secondary fences as the HW may be
4321         * currently busy. Equally, as all the requests are independent,
4322         * they may have other fences that delay individual request
4323         * submission to HW. Ergo, we do not guarantee that all requests are
4324         * immediately submitted to HW at the same time, just that if the
4325         * rules are abided by, they are ready at the same time as the
4326         * first is submitted. Userspace can embed semaphores in its batch
4327         * to ensure parallel execution of its phases as it requires.
4328         * Though naturally it gets requested that perhaps the scheduler should
4329         * take care of parallel execution, even across preemption events on
4330         * different HW. (The proper answer is of course "lalalala".)
4331         *
4332         * With the submit-fence, we have identified three possible phases
4333         * of synchronisation depending on the master fence: queued (not
4334         * ready), executing, and signaled. The first two are quite simple
4335         * and checked below. However, the signaled master fence handling is
4336         * contentious. Currently we do not distinguish between a signaled
4337         * fence and an expired fence, as once signaled it does not convey
4338         * any information about the previous execution. It may even be freed
4339         * and hence checking later it may not exist at all. Ergo we currently
4340         * do not apply the bonding constraint for an already signaled fence,
4341         * as our expectation is that it should not constrain the secondaries
4342         * and is outside of the scope of the bonded request API (i.e. all
4343         * userspace requests are meant to be running in parallel). As
4344         * it imposes no constraint, and is effectively a no-op, we do not
4345         * check below as normal execution flows are checked extensively above.
4346         *
4347         * XXX Is the degenerate handling of signaled submit fences the
4348         * expected behaviour for userpace?
4349         */
4350
4351        GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4352
4353        if (igt_spinner_init(&spin, gt))
4354                return -ENOMEM;
4355
4356        err = 0;
4357        rq[0] = ERR_PTR(-ENOMEM);
4358        for_each_engine(master, gt, id) {
4359                struct i915_sw_fence fence = {};
4360                struct intel_context *ce;
4361
4362                if (master->class == class)
4363                        continue;
4364
4365                ce = intel_context_create(master);
4366                if (IS_ERR(ce)) {
4367                        err = PTR_ERR(ce);
4368                        goto out;
4369                }
4370
4371                memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4372
4373                rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4374                intel_context_put(ce);
4375                if (IS_ERR(rq[0])) {
4376                        err = PTR_ERR(rq[0]);
4377                        goto out;
4378                }
4379                i915_request_get(rq[0]);
4380
4381                if (flags & BOND_SCHEDULE) {
4382                        onstack_fence_init(&fence);
4383                        err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4384                                                               &fence,
4385                                                               GFP_KERNEL);
4386                }
4387
4388                i915_request_add(rq[0]);
4389                if (err < 0)
4390                        goto out;
4391
4392                if (!(flags & BOND_SCHEDULE) &&
4393                    !igt_wait_for_spinner(&spin, rq[0])) {
4394                        err = -EIO;
4395                        goto out;
4396                }
4397
4398                for (n = 0; n < nsibling; n++) {
4399                        struct intel_context *ve;
4400
4401                        ve = intel_execlists_create_virtual(siblings, nsibling);
4402                        if (IS_ERR(ve)) {
4403                                err = PTR_ERR(ve);
4404                                onstack_fence_fini(&fence);
4405                                goto out;
4406                        }
4407
4408                        err = intel_virtual_engine_attach_bond(ve->engine,
4409                                                               master,
4410                                                               siblings[n]);
4411                        if (err) {
4412                                intel_context_put(ve);
4413                                onstack_fence_fini(&fence);
4414                                goto out;
4415                        }
4416
4417                        err = intel_context_pin(ve);
4418                        intel_context_put(ve);
4419                        if (err) {
4420                                onstack_fence_fini(&fence);
4421                                goto out;
4422                        }
4423
4424                        rq[n + 1] = i915_request_create(ve);
4425                        intel_context_unpin(ve);
4426                        if (IS_ERR(rq[n + 1])) {
4427                                err = PTR_ERR(rq[n + 1]);
4428                                onstack_fence_fini(&fence);
4429                                goto out;
4430                        }
4431                        i915_request_get(rq[n + 1]);
4432
4433                        err = i915_request_await_execution(rq[n + 1],
4434                                                           &rq[0]->fence,
4435                                                           ve->engine->bond_execute);
4436                        i915_request_add(rq[n + 1]);
4437                        if (err < 0) {
4438                                onstack_fence_fini(&fence);
4439                                goto out;
4440                        }
4441                }
4442                onstack_fence_fini(&fence);
4443                intel_engine_flush_submission(master);
4444                igt_spinner_end(&spin);
4445
4446                if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4447                        pr_err("Master request did not execute (on %s)!\n",
4448                               rq[0]->engine->name);
4449                        err = -EIO;
4450                        goto out;
4451                }
4452
4453                for (n = 0; n < nsibling; n++) {
4454                        if (i915_request_wait(rq[n + 1], 0,
4455                                              MAX_SCHEDULE_TIMEOUT) < 0) {
4456                                err = -EIO;
4457                                goto out;
4458                        }
4459
4460                        if (rq[n + 1]->engine != siblings[n]) {
4461                                pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4462                                       siblings[n]->name,
4463                                       rq[n + 1]->engine->name,
4464                                       rq[0]->engine->name);
4465                                err = -EINVAL;
4466                                goto out;
4467                        }
4468                }
4469
4470                for (n = 0; !IS_ERR(rq[n]); n++)
4471                        i915_request_put(rq[n]);
4472                rq[0] = ERR_PTR(-ENOMEM);
4473        }
4474
4475out:
4476        for (n = 0; !IS_ERR(rq[n]); n++)
4477                i915_request_put(rq[n]);
4478        if (igt_flush_test(gt->i915))
4479                err = -EIO;
4480
4481        igt_spinner_fini(&spin);
4482        return err;
4483}
4484
4485static int live_virtual_bond(void *arg)
4486{
4487        static const struct phase {
4488                const char *name;
4489                unsigned int flags;
4490        } phases[] = {
4491                { "", 0 },
4492                { "schedule", BOND_SCHEDULE },
4493                { },
4494        };
4495        struct intel_gt *gt = arg;
4496        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4497        unsigned int class;
4498        int err;
4499
4500        if (intel_uc_uses_guc_submission(&gt->uc))
4501                return 0;
4502
4503        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4504                const struct phase *p;
4505                int nsibling;
4506
4507                nsibling = select_siblings(gt, class, siblings);
4508                if (nsibling < 2)
4509                        continue;
4510
4511                for (p = phases; p->name; p++) {
4512                        err = bond_virtual_engine(gt,
4513                                                  class, siblings, nsibling,
4514                                                  p->flags);
4515                        if (err) {
4516                                pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517                                       __func__, p->name, class, nsibling, err);
4518                                return err;
4519                        }
4520                }
4521        }
4522
4523        return 0;
4524}
4525
4526static int reset_virtual_engine(struct intel_gt *gt,
4527                                struct intel_engine_cs **siblings,
4528                                unsigned int nsibling)
4529{
4530        struct intel_engine_cs *engine;
4531        struct intel_context *ve;
4532        struct igt_spinner spin;
4533        struct i915_request *rq;
4534        unsigned int n;
4535        int err = 0;
4536
4537        /*
4538         * In order to support offline error capture for fast preempt reset,
4539         * we need to decouple the guilty request and ensure that it and its
4540         * descendents are not executed while the capture is in progress.
4541         */
4542
4543        if (igt_spinner_init(&spin, gt))
4544                return -ENOMEM;
4545
4546        ve = intel_execlists_create_virtual(siblings, nsibling);
4547        if (IS_ERR(ve)) {
4548                err = PTR_ERR(ve);
4549                goto out_spin;
4550        }
4551
4552        for (n = 0; n < nsibling; n++)
4553                st_engine_heartbeat_disable(siblings[n]);
4554
4555        rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4556        if (IS_ERR(rq)) {
4557                err = PTR_ERR(rq);
4558                goto out_heartbeat;
4559        }
4560        i915_request_add(rq);
4561
4562        if (!igt_wait_for_spinner(&spin, rq)) {
4563                intel_gt_set_wedged(gt);
4564                err = -ETIME;
4565                goto out_heartbeat;
4566        }
4567
4568        engine = rq->engine;
4569        GEM_BUG_ON(engine == ve->engine);
4570
4571        /* Take ownership of the reset and tasklet */
4572        if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4573                             &gt->reset.flags)) {
4574                intel_gt_set_wedged(gt);
4575                err = -EBUSY;
4576                goto out_heartbeat;
4577        }
4578        tasklet_disable(&engine->execlists.tasklet);
4579
4580        engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4581        GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4582
4583        /* Fake a preemption event; failed of course */
4584        spin_lock_irq(&engine->active.lock);
4585        __unwind_incomplete_requests(engine);
4586        spin_unlock_irq(&engine->active.lock);
4587        GEM_BUG_ON(rq->engine != ve->engine);
4588
4589        /* Reset the engine while keeping our active request on hold */
4590        execlists_hold(engine, rq);
4591        GEM_BUG_ON(!i915_request_on_hold(rq));
4592
4593        intel_engine_reset(engine, NULL);
4594        GEM_BUG_ON(rq->fence.error != -EIO);
4595
4596        /* Release our grasp on the engine, letting CS flow again */
4597        tasklet_enable(&engine->execlists.tasklet);
4598        clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4599
4600        /* Check that we do not resubmit the held request */
4601        i915_request_get(rq);
4602        if (!i915_request_wait(rq, 0, HZ / 5)) {
4603                pr_err("%s: on hold request completed!\n",
4604                       engine->name);
4605                intel_gt_set_wedged(gt);
4606                err = -EIO;
4607                goto out_rq;
4608        }
4609        GEM_BUG_ON(!i915_request_on_hold(rq));
4610
4611        /* But is resubmitted on release */
4612        execlists_unhold(engine, rq);
4613        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4614                pr_err("%s: held request did not complete!\n",
4615                       engine->name);
4616                intel_gt_set_wedged(gt);
4617                err = -ETIME;
4618        }
4619
4620out_rq:
4621        i915_request_put(rq);
4622out_heartbeat:
4623        for (n = 0; n < nsibling; n++)
4624                st_engine_heartbeat_enable(siblings[n]);
4625
4626        intel_context_put(ve);
4627out_spin:
4628        igt_spinner_fini(&spin);
4629        return err;
4630}
4631
4632static int live_virtual_reset(void *arg)
4633{
4634        struct intel_gt *gt = arg;
4635        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4636        unsigned int class;
4637
4638        /*
4639         * Check that we handle a reset event within a virtual engine.
4640         * Only the physical engine is reset, but we have to check the flow
4641         * of the virtual requests around the reset, and make sure it is not
4642         * forgotten.
4643         */
4644
4645        if (intel_uc_uses_guc_submission(&gt->uc))
4646                return 0;
4647
4648        if (!intel_has_reset_engine(gt))
4649                return 0;
4650
4651        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4652                int nsibling, err;
4653
4654                nsibling = select_siblings(gt, class, siblings);
4655                if (nsibling < 2)
4656                        continue;
4657
4658                err = reset_virtual_engine(gt, siblings, nsibling);
4659                if (err)
4660                        return err;
4661        }
4662
4663        return 0;
4664}
4665
4666int intel_execlists_live_selftests(struct drm_i915_private *i915)
4667{
4668        static const struct i915_subtest tests[] = {
4669                SUBTEST(live_sanitycheck),
4670                SUBTEST(live_unlite_switch),
4671                SUBTEST(live_unlite_preempt),
4672                SUBTEST(live_unlite_ring),
4673                SUBTEST(live_pin_rewind),
4674                SUBTEST(live_hold_reset),
4675                SUBTEST(live_error_interrupt),
4676                SUBTEST(live_timeslice_preempt),
4677                SUBTEST(live_timeslice_rewind),
4678                SUBTEST(live_timeslice_queue),
4679                SUBTEST(live_timeslice_nopreempt),
4680                SUBTEST(live_busywait_preempt),
4681                SUBTEST(live_preempt),
4682                SUBTEST(live_late_preempt),
4683                SUBTEST(live_nopreempt),
4684                SUBTEST(live_preempt_cancel),
4685                SUBTEST(live_suppress_self_preempt),
4686                SUBTEST(live_chain_preempt),
4687                SUBTEST(live_preempt_ring),
4688                SUBTEST(live_preempt_gang),
4689                SUBTEST(live_preempt_timeout),
4690                SUBTEST(live_preempt_user),
4691                SUBTEST(live_preempt_smoke),
4692                SUBTEST(live_virtual_engine),
4693                SUBTEST(live_virtual_mask),
4694                SUBTEST(live_virtual_preserved),
4695                SUBTEST(live_virtual_slice),
4696                SUBTEST(live_virtual_bond),
4697                SUBTEST(live_virtual_reset),
4698        };
4699
4700        if (!HAS_EXECLISTS(i915))
4701                return 0;
4702
4703        if (intel_gt_is_wedged(&i915->gt))
4704                return 0;
4705
4706        return intel_gt_live_subtests(tests, &i915->gt);
4707}
4708
4709static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4710{
4711        const u32 offset =
4712                i915_ggtt_offset(ce->engine->status_page.vma) +
4713                offset_in_page(slot);
4714        struct i915_request *rq;
4715        u32 *cs;
4716
4717        rq = intel_context_create_request(ce);
4718        if (IS_ERR(rq))
4719                return PTR_ERR(rq);
4720
4721        cs = intel_ring_begin(rq, 4);
4722        if (IS_ERR(cs)) {
4723                i915_request_add(rq);
4724                return PTR_ERR(cs);
4725        }
4726
4727        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4728        *cs++ = offset;
4729        *cs++ = 0;
4730        *cs++ = 1;
4731
4732        intel_ring_advance(rq, cs);
4733
4734        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4735        i915_request_add(rq);
4736        return 0;
4737}
4738
4739static int context_flush(struct intel_context *ce, long timeout)
4740{
4741        struct i915_request *rq;
4742        struct dma_fence *fence;
4743        int err = 0;
4744
4745        rq = intel_engine_create_kernel_request(ce->engine);
4746        if (IS_ERR(rq))
4747                return PTR_ERR(rq);
4748
4749        fence = i915_active_fence_get(&ce->timeline->last_request);
4750        if (fence) {
4751                i915_request_await_dma_fence(rq, fence);
4752                dma_fence_put(fence);
4753        }
4754
4755        rq = i915_request_get(rq);
4756        i915_request_add(rq);
4757        if (i915_request_wait(rq, 0, timeout) < 0)
4758                err = -ETIME;
4759        i915_request_put(rq);
4760
4761        rmb(); /* We know the request is written, make sure all state is too! */
4762        return err;
4763}
4764
4765static int live_lrc_layout(void *arg)
4766{
4767        struct intel_gt *gt = arg;
4768        struct intel_engine_cs *engine;
4769        enum intel_engine_id id;
4770        u32 *lrc;
4771        int err;
4772
4773        /*
4774         * Check the registers offsets we use to create the initial reg state
4775         * match the layout saved by HW.
4776         */
4777
4778        lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4779        if (!lrc)
4780                return -ENOMEM;
4781
4782        err = 0;
4783        for_each_engine(engine, gt, id) {
4784                u32 *hw;
4785                int dw;
4786
4787                if (!engine->default_state)
4788                        continue;
4789
4790                hw = shmem_pin_map(engine->default_state);
4791                if (IS_ERR(hw)) {
4792                        err = PTR_ERR(hw);
4793                        break;
4794                }
4795                hw += LRC_STATE_OFFSET / sizeof(*hw);
4796
4797                execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4798                                         engine->kernel_context,
4799                                         engine,
4800                                         engine->kernel_context->ring,
4801                                         true);
4802
4803                dw = 0;
4804                do {
4805                        u32 lri = hw[dw];
4806
4807                        if (lri == 0) {
4808                                dw++;
4809                                continue;
4810                        }
4811
4812                        if (lrc[dw] == 0) {
4813                                pr_debug("%s: skipped instruction %x at dword %d\n",
4814                                         engine->name, lri, dw);
4815                                dw++;
4816                                continue;
4817                        }
4818
4819                        if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820                                pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821                                       engine->name, dw, lri);
4822                                err = -EINVAL;
4823                                break;
4824                        }
4825
4826                        if (lrc[dw] != lri) {
4827                                pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828                                       engine->name, dw, lri, lrc[dw]);
4829                                err = -EINVAL;
4830                                break;
4831                        }
4832
4833                        lri &= 0x7f;
4834                        lri++;
4835                        dw++;
4836
4837                        while (lri) {
4838                                if (hw[dw] != lrc[dw]) {
4839                                        pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840                                               engine->name, dw, hw[dw], lrc[dw]);
4841                                        err = -EINVAL;
4842                                        break;
4843                                }
4844
4845                                /*
4846                                 * Skip over the actual register value as we
4847                                 * expect that to differ.
4848                                 */
4849                                dw += 2;
4850                                lri -= 2;
4851                        }
4852                } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4853
4854                if (err) {
4855                        pr_info("%s: HW register image:\n", engine->name);
4856                        igt_hexdump(hw, PAGE_SIZE);
4857
4858                        pr_info("%s: SW register image:\n", engine->name);
4859                        igt_hexdump(lrc, PAGE_SIZE);
4860                }
4861
4862                shmem_unpin_map(engine->default_state, hw);
4863                if (err)
4864                        break;
4865        }
4866
4867        kfree(lrc);
4868        return err;
4869}
4870
4871static int find_offset(const u32 *lri, u32 offset)
4872{
4873        int i;
4874
4875        for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4876                if (lri[i] == offset)
4877                        return i;
4878
4879        return -1;
4880}
4881
4882static int live_lrc_fixed(void *arg)
4883{
4884        struct intel_gt *gt = arg;
4885        struct intel_engine_cs *engine;
4886        enum intel_engine_id id;
4887        int err = 0;
4888
4889        /*
4890         * Check the assumed register offsets match the actual locations in
4891         * the context image.
4892         */
4893
4894        for_each_engine(engine, gt, id) {
4895                const struct {
4896                        u32 reg;
4897                        u32 offset;
4898                        const char *name;
4899                } tbl[] = {
4900                        {
4901                                i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4902                                CTX_RING_START - 1,
4903                                "RING_START"
4904                        },
4905                        {
4906                                i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4907                                CTX_RING_CTL - 1,
4908                                "RING_CTL"
4909                        },
4910                        {
4911                                i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4912                                CTX_RING_HEAD - 1,
4913                                "RING_HEAD"
4914                        },
4915                        {
4916                                i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4917                                CTX_RING_TAIL - 1,
4918                                "RING_TAIL"
4919                        },
4920                        {
4921                                i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4922                                lrc_ring_mi_mode(engine),
4923                                "RING_MI_MODE"
4924                        },
4925                        {
4926                                i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4927                                CTX_BB_STATE - 1,
4928                                "BB_STATE"
4929                        },
4930                        {
4931                                i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4932                                lrc_ring_wa_bb_per_ctx(engine),
4933                                "RING_BB_PER_CTX_PTR"
4934                        },
4935                        {
4936                                i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4937                                lrc_ring_indirect_ptr(engine),
4938                                "RING_INDIRECT_CTX_PTR"
4939                        },
4940                        {
4941                                i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4942                                lrc_ring_indirect_offset(engine),
4943                                "RING_INDIRECT_CTX_OFFSET"
4944                        },
4945                        {
4946                                i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4947                                CTX_TIMESTAMP - 1,
4948                                "RING_CTX_TIMESTAMP"
4949                        },
4950                        {
4951                                i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4952                                lrc_ring_gpr0(engine),
4953                                "RING_CS_GPR0"
4954                        },
4955                        {
4956                                i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4957                                lrc_ring_cmd_buf_cctl(engine),
4958                                "RING_CMD_BUF_CCTL"
4959                        },
4960                        { },
4961                }, *t;
4962                u32 *hw;
4963
4964                if (!engine->default_state)
4965                        continue;
4966
4967                hw = shmem_pin_map(engine->default_state);
4968                if (IS_ERR(hw)) {
4969                        err = PTR_ERR(hw);
4970                        break;
4971                }
4972                hw += LRC_STATE_OFFSET / sizeof(*hw);
4973
4974                for (t = tbl; t->name; t++) {
4975                        int dw = find_offset(hw, t->reg);
4976
4977                        if (dw != t->offset) {
4978                                pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4979                                       engine->name,
4980                                       t->name,
4981                                       t->reg,
4982                                       dw,
4983                                       t->offset);
4984                                err = -EINVAL;
4985                        }
4986                }
4987
4988                shmem_unpin_map(engine->default_state, hw);
4989        }
4990
4991        return err;
4992}
4993
4994static int __live_lrc_state(struct intel_engine_cs *engine,
4995                            struct i915_vma *scratch)
4996{
4997        struct intel_context *ce;
4998        struct i915_request *rq;
4999        struct i915_gem_ww_ctx ww;
5000        enum {
5001                RING_START_IDX = 0,
5002                RING_TAIL_IDX,
5003                MAX_IDX
5004        };
5005        u32 expected[MAX_IDX];
5006        u32 *cs;
5007        int err;
5008        int n;
5009
5010        ce = intel_context_create(engine);
5011        if (IS_ERR(ce))
5012                return PTR_ERR(ce);
5013
5014        i915_gem_ww_ctx_init(&ww, false);
5015retry:
5016        err = i915_gem_object_lock(scratch->obj, &ww);
5017        if (!err)
5018                err = intel_context_pin_ww(ce, &ww);
5019        if (err)
5020                goto err_put;
5021
5022        rq = i915_request_create(ce);
5023        if (IS_ERR(rq)) {
5024                err = PTR_ERR(rq);
5025                goto err_unpin;
5026        }
5027
5028        cs = intel_ring_begin(rq, 4 * MAX_IDX);
5029        if (IS_ERR(cs)) {
5030                err = PTR_ERR(cs);
5031                i915_request_add(rq);
5032                goto err_unpin;
5033        }
5034
5035        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5036        *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
5037        *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
5038        *cs++ = 0;
5039
5040        expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
5041
5042        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5043        *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
5044        *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
5045        *cs++ = 0;
5046
5047        err = i915_request_await_object(rq, scratch->obj, true);
5048        if (!err)
5049                err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5050
5051        i915_request_get(rq);
5052        i915_request_add(rq);
5053        if (err)
5054                goto err_rq;
5055
5056        intel_engine_flush_submission(engine);
5057        expected[RING_TAIL_IDX] = ce->ring->tail;
5058
5059        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5060                err = -ETIME;
5061                goto err_rq;
5062        }
5063
5064        cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5065        if (IS_ERR(cs)) {
5066                err = PTR_ERR(cs);
5067                goto err_rq;
5068        }
5069
5070        for (n = 0; n < MAX_IDX; n++) {
5071                if (cs[n] != expected[n]) {
5072                        pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073                               engine->name, n, cs[n], expected[n]);
5074                        err = -EINVAL;
5075                        break;
5076                }
5077        }
5078
5079        i915_gem_object_unpin_map(scratch->obj);
5080
5081err_rq:
5082        i915_request_put(rq);
5083err_unpin:
5084        intel_context_unpin(ce);
5085err_put:
5086        if (err == -EDEADLK) {
5087                err = i915_gem_ww_ctx_backoff(&ww);
5088                if (!err)
5089                        goto retry;
5090        }
5091        i915_gem_ww_ctx_fini(&ww);
5092        intel_context_put(ce);
5093        return err;
5094}
5095
5096static int live_lrc_state(void *arg)
5097{
5098        struct intel_gt *gt = arg;
5099        struct intel_engine_cs *engine;
5100        struct i915_vma *scratch;
5101        enum intel_engine_id id;
5102        int err = 0;
5103
5104        /*
5105         * Check the live register state matches what we expect for this
5106         * intel_context.
5107         */
5108
5109        scratch = create_scratch(gt);
5110        if (IS_ERR(scratch))
5111                return PTR_ERR(scratch);
5112
5113        for_each_engine(engine, gt, id) {
5114                err = __live_lrc_state(engine, scratch);
5115                if (err)
5116                        break;
5117        }
5118
5119        if (igt_flush_test(gt->i915))
5120                err = -EIO;
5121
5122        i915_vma_unpin_and_release(&scratch, 0);
5123        return err;
5124}
5125
5126static int gpr_make_dirty(struct intel_context *ce)
5127{
5128        struct i915_request *rq;
5129        u32 *cs;
5130        int n;
5131
5132        rq = intel_context_create_request(ce);
5133        if (IS_ERR(rq))
5134                return PTR_ERR(rq);
5135
5136        cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
5137        if (IS_ERR(cs)) {
5138                i915_request_add(rq);
5139                return PTR_ERR(cs);
5140        }
5141
5142        *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
5143        for (n = 0; n < NUM_GPR_DW; n++) {
5144                *cs++ = CS_GPR(ce->engine, n);
5145                *cs++ = STACK_MAGIC;
5146        }
5147        *cs++ = MI_NOOP;
5148
5149        intel_ring_advance(rq, cs);
5150
5151        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5152        i915_request_add(rq);
5153
5154        return 0;
5155}
5156
5157static struct i915_request *
5158__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
5159{
5160        const u32 offset =
5161                i915_ggtt_offset(ce->engine->status_page.vma) +
5162                offset_in_page(slot);
5163        struct i915_request *rq;
5164        u32 *cs;
5165        int err;
5166        int n;
5167
5168        rq = intel_context_create_request(ce);
5169        if (IS_ERR(rq))
5170                return rq;
5171
5172        cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
5173        if (IS_ERR(cs)) {
5174                i915_request_add(rq);
5175                return ERR_CAST(cs);
5176        }
5177
5178        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5179        *cs++ = MI_NOOP;
5180
5181        *cs++ = MI_SEMAPHORE_WAIT |
5182                MI_SEMAPHORE_GLOBAL_GTT |
5183                MI_SEMAPHORE_POLL |
5184                MI_SEMAPHORE_SAD_NEQ_SDD;
5185        *cs++ = 0;
5186        *cs++ = offset;
5187        *cs++ = 0;
5188
5189        for (n = 0; n < NUM_GPR_DW; n++) {
5190                *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5191                *cs++ = CS_GPR(ce->engine, n);
5192                *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
5193                *cs++ = 0;
5194        }
5195
5196        i915_vma_lock(scratch);
5197        err = i915_request_await_object(rq, scratch->obj, true);
5198        if (!err)
5199                err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5200        i915_vma_unlock(scratch);
5201
5202        i915_request_get(rq);
5203        i915_request_add(rq);
5204        if (err) {
5205                i915_request_put(rq);
5206                rq = ERR_PTR(err);
5207        }
5208
5209        return rq;
5210}
5211
5212static int __live_lrc_gpr(struct intel_engine_cs *engine,
5213                          struct i915_vma *scratch,
5214                          bool preempt)
5215{
5216        u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
5217        struct intel_context *ce;
5218        struct i915_request *rq;
5219        u32 *cs;
5220        int err;
5221        int n;
5222
5223        if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
5224                return 0; /* GPR only on rcs0 for gen8 */
5225
5226        err = gpr_make_dirty(engine->kernel_context);
5227        if (err)
5228                return err;
5229
5230        ce = intel_context_create(engine);
5231        if (IS_ERR(ce))
5232                return PTR_ERR(ce);
5233
5234        rq = __gpr_read(ce, scratch, slot);
5235        if (IS_ERR(rq)) {
5236                err = PTR_ERR(rq);
5237                goto err_put;
5238        }
5239
5240        err = wait_for_submit(engine, rq, HZ / 2);
5241        if (err)
5242                goto err_rq;
5243
5244        if (preempt) {
5245                err = gpr_make_dirty(engine->kernel_context);
5246                if (err)
5247                        goto err_rq;
5248
5249                err = emit_semaphore_signal(engine->kernel_context, slot);
5250                if (err)
5251                        goto err_rq;
5252        } else {
5253                slot[0] = 1;
5254                wmb();
5255        }
5256
5257        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5258                err = -ETIME;
5259                goto err_rq;
5260        }
5261
5262        cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5263        if (IS_ERR(cs)) {
5264                err = PTR_ERR(cs);
5265                goto err_rq;
5266        }
5267
5268        for (n = 0; n < NUM_GPR_DW; n++) {
5269                if (cs[n]) {
5270                        pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5271                               engine->name,
5272                               n / 2, n & 1 ? "udw" : "ldw",
5273                               cs[n]);
5274                        err = -EINVAL;
5275                        break;
5276                }
5277        }
5278
5279        i915_gem_object_unpin_map(scratch->obj);
5280
5281err_rq:
5282        memset32(&slot[0], -1, 4);
5283        wmb();
5284        i915_request_put(rq);
5285err_put:
5286        intel_context_put(ce);
5287        return err;
5288}
5289
5290static int live_lrc_gpr(void *arg)
5291{
5292        struct intel_gt *gt = arg;
5293        struct intel_engine_cs *engine;
5294        struct i915_vma *scratch;
5295        enum intel_engine_id id;
5296        int err = 0;
5297
5298        /*
5299         * Check that GPR registers are cleared in new contexts as we need
5300         * to avoid leaking any information from previous contexts.
5301         */
5302
5303        scratch = create_scratch(gt);
5304        if (IS_ERR(scratch))
5305                return PTR_ERR(scratch);
5306
5307        for_each_engine(engine, gt, id) {
5308                st_engine_heartbeat_disable(engine);
5309
5310                err = __live_lrc_gpr(engine, scratch, false);
5311                if (err)
5312                        goto err;
5313
5314                err = __live_lrc_gpr(engine, scratch, true);
5315                if (err)
5316                        goto err;
5317
5318err:
5319                st_engine_heartbeat_enable(engine);
5320                if (igt_flush_test(gt->i915))
5321                        err = -EIO;
5322                if (err)
5323                        break;
5324        }
5325
5326        i915_vma_unpin_and_release(&scratch, 0);
5327        return err;
5328}
5329
5330static struct i915_request *
5331create_timestamp(struct intel_context *ce, void *slot, int idx)
5332{
5333        const u32 offset =
5334                i915_ggtt_offset(ce->engine->status_page.vma) +
5335                offset_in_page(slot);
5336        struct i915_request *rq;
5337        u32 *cs;
5338        int err;
5339
5340        rq = intel_context_create_request(ce);
5341        if (IS_ERR(rq))
5342                return rq;
5343
5344        cs = intel_ring_begin(rq, 10);
5345        if (IS_ERR(cs)) {
5346                err = PTR_ERR(cs);
5347                goto err;
5348        }
5349
5350        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5351        *cs++ = MI_NOOP;
5352
5353        *cs++ = MI_SEMAPHORE_WAIT |
5354                MI_SEMAPHORE_GLOBAL_GTT |
5355                MI_SEMAPHORE_POLL |
5356                MI_SEMAPHORE_SAD_NEQ_SDD;
5357        *cs++ = 0;
5358        *cs++ = offset;
5359        *cs++ = 0;
5360
5361        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5362        *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5363        *cs++ = offset + idx * sizeof(u32);
5364        *cs++ = 0;
5365
5366        intel_ring_advance(rq, cs);
5367
5368        rq->sched.attr.priority = I915_PRIORITY_MASK;
5369        err = 0;
5370err:
5371        i915_request_get(rq);
5372        i915_request_add(rq);
5373        if (err) {
5374                i915_request_put(rq);
5375                return ERR_PTR(err);
5376        }
5377
5378        return rq;
5379}
5380
5381struct lrc_timestamp {
5382        struct intel_engine_cs *engine;
5383        struct intel_context *ce[2];
5384        u32 poison;
5385};
5386
5387static bool timestamp_advanced(u32 start, u32 end)
5388{
5389        return (s32)(end - start) > 0;
5390}
5391
5392static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5393{
5394        u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5395        struct i915_request *rq;
5396        u32 timestamp;
5397        int err = 0;
5398
5399        arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5400        rq = create_timestamp(arg->ce[0], slot, 1);
5401        if (IS_ERR(rq))
5402                return PTR_ERR(rq);
5403
5404        err = wait_for_submit(rq->engine, rq, HZ / 2);
5405        if (err)
5406                goto err;
5407
5408        if (preempt) {
5409                arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5410                err = emit_semaphore_signal(arg->ce[1], slot);
5411                if (err)
5412                        goto err;
5413        } else {
5414                slot[0] = 1;
5415                wmb();
5416        }
5417
5418        /* And wait for switch to kernel (to save our context to memory) */
5419        err = context_flush(arg->ce[0], HZ / 2);
5420        if (err)
5421                goto err;
5422
5423        if (!timestamp_advanced(arg->poison, slot[1])) {
5424                pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425                       arg->engine->name, preempt ? "preempt" : "simple",
5426                       arg->poison, slot[1]);
5427                err = -EINVAL;
5428        }
5429
5430        timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5431        if (!timestamp_advanced(slot[1], timestamp)) {
5432                pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433                       arg->engine->name, preempt ? "preempt" : "simple",
5434                       slot[1], timestamp);
5435                err = -EINVAL;
5436        }
5437
5438err:
5439        memset32(slot, -1, 4);
5440        i915_request_put(rq);
5441        return err;
5442}
5443
5444static int live_lrc_timestamp(void *arg)
5445{
5446        struct lrc_timestamp data = {};
5447        struct intel_gt *gt = arg;
5448        enum intel_engine_id id;
5449        const u32 poison[] = {
5450                0,
5451                S32_MAX,
5452                (u32)S32_MAX + 1,
5453                U32_MAX,
5454        };
5455
5456        /*
5457         * We want to verify that the timestamp is saved and restore across
5458         * context switches and is monotonic.
5459         *
5460         * So we do this with a little bit of LRC poisoning to check various
5461         * boundary conditions, and see what happens if we preempt the context
5462         * with a second request (carrying more poison into the timestamp).
5463         */
5464
5465        for_each_engine(data.engine, gt, id) {
5466                int i, err = 0;
5467
5468                st_engine_heartbeat_disable(data.engine);
5469
5470                for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5471                        struct intel_context *tmp;
5472
5473                        tmp = intel_context_create(data.engine);
5474                        if (IS_ERR(tmp)) {
5475                                err = PTR_ERR(tmp);
5476                                goto err;
5477                        }
5478
5479                        err = intel_context_pin(tmp);
5480                        if (err) {
5481                                intel_context_put(tmp);
5482                                goto err;
5483                        }
5484
5485                        data.ce[i] = tmp;
5486                }
5487
5488                for (i = 0; i < ARRAY_SIZE(poison); i++) {
5489                        data.poison = poison[i];
5490
5491                        err = __lrc_timestamp(&data, false);
5492                        if (err)
5493                                break;
5494
5495                        err = __lrc_timestamp(&data, true);
5496                        if (err)
5497                                break;
5498                }
5499
5500err:
5501                st_engine_heartbeat_enable(data.engine);
5502                for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5503                        if (!data.ce[i])
5504                                break;
5505
5506                        intel_context_unpin(data.ce[i]);
5507                        intel_context_put(data.ce[i]);
5508                }
5509
5510                if (igt_flush_test(gt->i915))
5511                        err = -EIO;
5512                if (err)
5513                        return err;
5514        }
5515
5516        return 0;
5517}
5518
5519static struct i915_vma *
5520create_user_vma(struct i915_address_space *vm, unsigned long size)
5521{
5522        struct drm_i915_gem_object *obj;
5523        struct i915_vma *vma;
5524        int err;
5525
5526        obj = i915_gem_object_create_internal(vm->i915, size);
5527        if (IS_ERR(obj))
5528                return ERR_CAST(obj);
5529
5530        vma = i915_vma_instance(obj, vm, NULL);
5531        if (IS_ERR(vma)) {
5532                i915_gem_object_put(obj);
5533                return vma;
5534        }
5535
5536        err = i915_vma_pin(vma, 0, 0, PIN_USER);
5537        if (err) {
5538                i915_gem_object_put(obj);
5539                return ERR_PTR(err);
5540        }
5541
5542        return vma;
5543}
5544
5545static struct i915_vma *
5546store_context(struct intel_context *ce, struct i915_vma *scratch)
5547{
5548        struct i915_vma *batch;
5549        u32 dw, x, *cs, *hw;
5550        u32 *defaults;
5551
5552        batch = create_user_vma(ce->vm, SZ_64K);
5553        if (IS_ERR(batch))
5554                return batch;
5555
5556        cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5557        if (IS_ERR(cs)) {
5558                i915_vma_put(batch);
5559                return ERR_CAST(cs);
5560        }
5561
5562        defaults = shmem_pin_map(ce->engine->default_state);
5563        if (!defaults) {
5564                i915_gem_object_unpin_map(batch->obj);
5565                i915_vma_put(batch);
5566                return ERR_PTR(-ENOMEM);
5567        }
5568
5569        x = 0;
5570        dw = 0;
5571        hw = defaults;
5572        hw += LRC_STATE_OFFSET / sizeof(*hw);
5573        do {
5574                u32 len = hw[dw] & 0x7f;
5575
5576                if (hw[dw] == 0) {
5577                        dw++;
5578                        continue;
5579                }
5580
5581                if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5582                        dw += len + 2;
5583                        continue;
5584                }
5585
5586                dw++;
5587                len = (len + 1) / 2;
5588                while (len--) {
5589                        *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5590                        *cs++ = hw[dw];
5591                        *cs++ = lower_32_bits(scratch->node.start + x);
5592                        *cs++ = upper_32_bits(scratch->node.start + x);
5593
5594                        dw += 2;
5595                        x += 4;
5596                }
5597        } while (dw < PAGE_SIZE / sizeof(u32) &&
5598                 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5599
5600        *cs++ = MI_BATCH_BUFFER_END;
5601
5602        shmem_unpin_map(ce->engine->default_state, defaults);
5603
5604        i915_gem_object_flush_map(batch->obj);
5605        i915_gem_object_unpin_map(batch->obj);
5606
5607        return batch;
5608}
5609
5610static int move_to_active(struct i915_request *rq,
5611                          struct i915_vma *vma,
5612                          unsigned int flags)
5613{
5614        int err;
5615
5616        i915_vma_lock(vma);
5617        err = i915_request_await_object(rq, vma->obj, flags);
5618        if (!err)
5619                err = i915_vma_move_to_active(vma, rq, flags);
5620        i915_vma_unlock(vma);
5621
5622        return err;
5623}
5624
5625static struct i915_request *
5626record_registers(struct intel_context *ce,
5627                 struct i915_vma *before,
5628                 struct i915_vma *after,
5629                 u32 *sema)
5630{
5631        struct i915_vma *b_before, *b_after;
5632        struct i915_request *rq;
5633        u32 *cs;
5634        int err;
5635
5636        b_before = store_context(ce, before);
5637        if (IS_ERR(b_before))
5638                return ERR_CAST(b_before);
5639
5640        b_after = store_context(ce, after);
5641        if (IS_ERR(b_after)) {
5642                rq = ERR_CAST(b_after);
5643                goto err_before;
5644        }
5645
5646        rq = intel_context_create_request(ce);
5647        if (IS_ERR(rq))
5648                goto err_after;
5649
5650        err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5651        if (err)
5652                goto err_rq;
5653
5654        err = move_to_active(rq, b_before, 0);
5655        if (err)
5656                goto err_rq;
5657
5658        err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5659        if (err)
5660                goto err_rq;
5661
5662        err = move_to_active(rq, b_after, 0);
5663        if (err)
5664                goto err_rq;
5665
5666        cs = intel_ring_begin(rq, 14);
5667        if (IS_ERR(cs)) {
5668                err = PTR_ERR(cs);
5669                goto err_rq;
5670        }
5671
5672        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5673        *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5674        *cs++ = lower_32_bits(b_before->node.start);
5675        *cs++ = upper_32_bits(b_before->node.start);
5676
5677        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5678        *cs++ = MI_SEMAPHORE_WAIT |
5679                MI_SEMAPHORE_GLOBAL_GTT |
5680                MI_SEMAPHORE_POLL |
5681                MI_SEMAPHORE_SAD_NEQ_SDD;
5682        *cs++ = 0;
5683        *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5684                offset_in_page(sema);
5685        *cs++ = 0;
5686        *cs++ = MI_NOOP;
5687
5688        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5689        *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5690        *cs++ = lower_32_bits(b_after->node.start);
5691        *cs++ = upper_32_bits(b_after->node.start);
5692
5693        intel_ring_advance(rq, cs);
5694
5695        WRITE_ONCE(*sema, 0);
5696        i915_request_get(rq);
5697        i915_request_add(rq);
5698err_after:
5699        i915_vma_put(b_after);
5700err_before:
5701        i915_vma_put(b_before);
5702        return rq;
5703
5704err_rq:
5705        i915_request_add(rq);
5706        rq = ERR_PTR(err);
5707        goto err_after;
5708}
5709
5710static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5711{
5712        struct i915_vma *batch;
5713        u32 dw, *cs, *hw;
5714        u32 *defaults;
5715
5716        batch = create_user_vma(ce->vm, SZ_64K);
5717        if (IS_ERR(batch))
5718                return batch;
5719
5720        cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5721        if (IS_ERR(cs)) {
5722                i915_vma_put(batch);
5723                return ERR_CAST(cs);
5724        }
5725
5726        defaults = shmem_pin_map(ce->engine->default_state);
5727        if (!defaults) {
5728                i915_gem_object_unpin_map(batch->obj);
5729                i915_vma_put(batch);
5730                return ERR_PTR(-ENOMEM);
5731        }
5732
5733        dw = 0;
5734        hw = defaults;
5735        hw += LRC_STATE_OFFSET / sizeof(*hw);
5736        do {
5737                u32 len = hw[dw] & 0x7f;
5738
5739                if (hw[dw] == 0) {
5740                        dw++;
5741                        continue;
5742                }
5743
5744                if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5745                        dw += len + 2;
5746                        continue;
5747                }
5748
5749                dw++;
5750                len = (len + 1) / 2;
5751                *cs++ = MI_LOAD_REGISTER_IMM(len);
5752                while (len--) {
5753                        *cs++ = hw[dw];
5754                        *cs++ = poison;
5755                        dw += 2;
5756                }
5757        } while (dw < PAGE_SIZE / sizeof(u32) &&
5758                 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5759
5760        *cs++ = MI_BATCH_BUFFER_END;
5761
5762        shmem_unpin_map(ce->engine->default_state, defaults);
5763
5764        i915_gem_object_flush_map(batch->obj);
5765        i915_gem_object_unpin_map(batch->obj);
5766
5767        return batch;
5768}
5769
5770static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5771{
5772        struct i915_request *rq;
5773        struct i915_vma *batch;
5774        u32 *cs;
5775        int err;
5776
5777        batch = load_context(ce, poison);
5778        if (IS_ERR(batch))
5779                return PTR_ERR(batch);
5780
5781        rq = intel_context_create_request(ce);
5782        if (IS_ERR(rq)) {
5783                err = PTR_ERR(rq);
5784                goto err_batch;
5785        }
5786
5787        err = move_to_active(rq, batch, 0);
5788        if (err)
5789                goto err_rq;
5790
5791        cs = intel_ring_begin(rq, 8);
5792        if (IS_ERR(cs)) {
5793                err = PTR_ERR(cs);
5794                goto err_rq;
5795        }
5796
5797        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5798        *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5799        *cs++ = lower_32_bits(batch->node.start);
5800        *cs++ = upper_32_bits(batch->node.start);
5801
5802        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5803        *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5804                offset_in_page(sema);
5805        *cs++ = 0;
5806        *cs++ = 1;
5807
5808        intel_ring_advance(rq, cs);
5809
5810        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5811err_rq:
5812        i915_request_add(rq);
5813err_batch:
5814        i915_vma_put(batch);
5815        return err;
5816}
5817
5818static bool is_moving(u32 a, u32 b)
5819{
5820        return a != b;
5821}
5822
5823static int compare_isolation(struct intel_engine_cs *engine,
5824                             struct i915_vma *ref[2],
5825                             struct i915_vma *result[2],
5826                             struct intel_context *ce,
5827                             u32 poison)
5828{
5829        u32 x, dw, *hw, *lrc;
5830        u32 *A[2], *B[2];
5831        u32 *defaults;
5832        int err = 0;
5833
5834        A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5835        if (IS_ERR(A[0]))
5836                return PTR_ERR(A[0]);
5837
5838        A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5839        if (IS_ERR(A[1])) {
5840                err = PTR_ERR(A[1]);
5841                goto err_A0;
5842        }
5843
5844        B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5845        if (IS_ERR(B[0])) {
5846                err = PTR_ERR(B[0]);
5847                goto err_A1;
5848        }
5849
5850        B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5851        if (IS_ERR(B[1])) {
5852                err = PTR_ERR(B[1]);
5853                goto err_B0;
5854        }
5855
5856        lrc = i915_gem_object_pin_map(ce->state->obj,
5857                                      i915_coherent_map_type(engine->i915));
5858        if (IS_ERR(lrc)) {
5859                err = PTR_ERR(lrc);
5860                goto err_B1;
5861        }
5862        lrc += LRC_STATE_OFFSET / sizeof(*hw);
5863
5864        defaults = shmem_pin_map(ce->engine->default_state);
5865        if (!defaults) {
5866                err = -ENOMEM;
5867                goto err_lrc;
5868        }
5869
5870        x = 0;
5871        dw = 0;
5872        hw = defaults;
5873        hw += LRC_STATE_OFFSET / sizeof(*hw);
5874        do {
5875                u32 len = hw[dw] & 0x7f;
5876
5877                if (hw[dw] == 0) {
5878                        dw++;
5879                        continue;
5880                }
5881
5882                if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5883                        dw += len + 2;
5884                        continue;
5885                }
5886
5887                dw++;
5888                len = (len + 1) / 2;
5889                while (len--) {
5890                        if (!is_moving(A[0][x], A[1][x]) &&
5891                            (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5892                                switch (hw[dw] & 4095) {
5893                                case 0x30: /* RING_HEAD */
5894                                case 0x34: /* RING_TAIL */
5895                                        break;
5896
5897                                default:
5898                                        pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5899                                               engine->name, dw,
5900                                               hw[dw], hw[dw + 1],
5901                                               A[0][x], B[0][x], B[1][x],
5902                                               poison, lrc[dw + 1]);
5903                                        err = -EINVAL;
5904                                }
5905                        }
5906                        dw += 2;
5907                        x++;
5908                }
5909        } while (dw < PAGE_SIZE / sizeof(u32) &&
5910                 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5911
5912        shmem_unpin_map(ce->engine->default_state, defaults);
5913err_lrc:
5914        i915_gem_object_unpin_map(ce->state->obj);
5915err_B1:
5916        i915_gem_object_unpin_map(result[1]->obj);
5917err_B0:
5918        i915_gem_object_unpin_map(result[0]->obj);
5919err_A1:
5920        i915_gem_object_unpin_map(ref[1]->obj);
5921err_A0:
5922        i915_gem_object_unpin_map(ref[0]->obj);
5923        return err;
5924}
5925
5926static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5927{
5928        u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5929        struct i915_vma *ref[2], *result[2];
5930        struct intel_context *A, *B;
5931        struct i915_request *rq;
5932        int err;
5933
5934        A = intel_context_create(engine);
5935        if (IS_ERR(A))
5936                return PTR_ERR(A);
5937
5938        B = intel_context_create(engine);
5939        if (IS_ERR(B)) {
5940                err = PTR_ERR(B);
5941                goto err_A;
5942        }
5943
5944        ref[0] = create_user_vma(A->vm, SZ_64K);
5945        if (IS_ERR(ref[0])) {
5946                err = PTR_ERR(ref[0]);
5947                goto err_B;
5948        }
5949
5950        ref[1] = create_user_vma(A->vm, SZ_64K);
5951        if (IS_ERR(ref[1])) {
5952                err = PTR_ERR(ref[1]);
5953                goto err_ref0;
5954        }
5955
5956        rq = record_registers(A, ref[0], ref[1], sema);
5957        if (IS_ERR(rq)) {
5958                err = PTR_ERR(rq);
5959                goto err_ref1;
5960        }
5961
5962        WRITE_ONCE(*sema, 1);
5963        wmb();
5964
5965        if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5966                i915_request_put(rq);
5967                err = -ETIME;
5968                goto err_ref1;
5969        }
5970        i915_request_put(rq);
5971
5972        result[0] = create_user_vma(A->vm, SZ_64K);
5973        if (IS_ERR(result[0])) {
5974                err = PTR_ERR(result[0]);
5975                goto err_ref1;
5976        }
5977
5978        result[1] = create_user_vma(A->vm, SZ_64K);
5979        if (IS_ERR(result[1])) {
5980                err = PTR_ERR(result[1]);
5981                goto err_result0;
5982        }
5983
5984        rq = record_registers(A, result[0], result[1], sema);
5985        if (IS_ERR(rq)) {
5986                err = PTR_ERR(rq);
5987                goto err_result1;
5988        }
5989
5990        err = poison_registers(B, poison, sema);
5991        if (err) {
5992                WRITE_ONCE(*sema, -1);
5993                i915_request_put(rq);
5994                goto err_result1;
5995        }
5996
5997        if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5998                i915_request_put(rq);
5999                err = -ETIME;
6000                goto err_result1;
6001        }
6002        i915_request_put(rq);
6003
6004        err = compare_isolation(engine, ref, result, A, poison);
6005
6006err_result1:
6007        i915_vma_put(result[1]);
6008err_result0:
6009        i915_vma_put(result[0]);
6010err_ref1:
6011        i915_vma_put(ref[1]);
6012err_ref0:
6013        i915_vma_put(ref[0]);
6014err_B:
6015        intel_context_put(B);
6016err_A:
6017        intel_context_put(A);
6018        return err;
6019}
6020
6021static bool skip_isolation(const struct intel_engine_cs *engine)
6022{
6023        if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
6024                return true;
6025
6026        if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
6027                return true;
6028
6029        return false;
6030}
6031
6032static int live_lrc_isolation(void *arg)
6033{
6034        struct intel_gt *gt = arg;
6035        struct intel_engine_cs *engine;
6036        enum intel_engine_id id;
6037        const u32 poison[] = {
6038                STACK_MAGIC,
6039                0x3a3a3a3a,
6040                0x5c5c5c5c,
6041                0xffffffff,
6042                0xffff0000,
6043        };
6044        int err = 0;
6045
6046        /*
6047         * Our goal is try and verify that per-context state cannot be
6048         * tampered with by another non-privileged client.
6049         *
6050         * We take the list of context registers from the LRI in the default
6051         * context image and attempt to modify that list from a remote context.
6052         */
6053
6054        for_each_engine(engine, gt, id) {
6055                int i;
6056
6057                /* Just don't even ask */
6058                if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
6059                    skip_isolation(engine))
6060                        continue;
6061
6062                intel_engine_pm_get(engine);
6063                for (i = 0; i < ARRAY_SIZE(poison); i++) {
6064                        int result;
6065
6066                        result = __lrc_isolation(engine, poison[i]);
6067                        if (result && !err)
6068                                err = result;
6069
6070                        result = __lrc_isolation(engine, ~poison[i]);
6071                        if (result && !err)
6072                                err = result;
6073                }
6074                intel_engine_pm_put(engine);
6075                if (igt_flush_test(gt->i915)) {
6076                        err = -EIO;
6077                        break;
6078                }
6079        }
6080
6081        return err;
6082}
6083
6084static int indirect_ctx_submit_req(struct intel_context *ce)
6085{
6086        struct i915_request *rq;
6087        int err = 0;
6088
6089        rq = intel_context_create_request(ce);
6090        if (IS_ERR(rq))
6091                return PTR_ERR(rq);
6092
6093        i915_request_get(rq);
6094        i915_request_add(rq);
6095
6096        if (i915_request_wait(rq, 0, HZ / 5) < 0)
6097                err = -ETIME;
6098
6099        i915_request_put(rq);
6100
6101        return err;
6102}
6103
6104#define CTX_BB_CANARY_OFFSET (3 * 1024)
6105#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
6106
6107static u32 *
6108emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
6109{
6110        *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
6111                MI_SRM_LRM_GLOBAL_GTT |
6112                MI_LRI_LRM_CS_MMIO;
6113        *cs++ = i915_mmio_reg_offset(RING_START(0));
6114        *cs++ = i915_ggtt_offset(ce->state) +
6115                context_wa_bb_offset(ce) +
6116                CTX_BB_CANARY_OFFSET;
6117        *cs++ = 0;
6118
6119        return cs;
6120}
6121
6122static void
6123indirect_ctx_bb_setup(struct intel_context *ce)
6124{
6125        u32 *cs = context_indirect_bb(ce);
6126
6127        cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
6128
6129        setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
6130}
6131
6132static bool check_ring_start(struct intel_context *ce)
6133{
6134        const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
6135                LRC_STATE_OFFSET + context_wa_bb_offset(ce);
6136
6137        if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
6138                return true;
6139
6140        pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141               ctx_bb[CTX_BB_CANARY_INDEX],
6142               ce->lrc_reg_state[CTX_RING_START]);
6143
6144        return false;
6145}
6146
6147static int indirect_ctx_bb_check(struct intel_context *ce)
6148{
6149        int err;
6150
6151        err = indirect_ctx_submit_req(ce);
6152        if (err)
6153                return err;
6154
6155        if (!check_ring_start(ce))
6156                return -EINVAL;
6157
6158        return 0;
6159}
6160
6161static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
6162{
6163        struct intel_context *a, *b;
6164        int err;
6165
6166        a = intel_context_create(engine);
6167        if (IS_ERR(a))
6168                return PTR_ERR(a);
6169        err = intel_context_pin(a);
6170        if (err)
6171                goto put_a;
6172
6173        b = intel_context_create(engine);
6174        if (IS_ERR(b)) {
6175                err = PTR_ERR(b);
6176                goto unpin_a;
6177        }
6178        err = intel_context_pin(b);
6179        if (err)
6180                goto put_b;
6181
6182        /* We use the already reserved extra page in context state */
6183        if (!a->wa_bb_page) {
6184                GEM_BUG_ON(b->wa_bb_page);
6185                GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
6186                goto unpin_b;
6187        }
6188
6189        /*
6190         * In order to test that our per context bb is truly per context,
6191         * and executes at the intended spot on context restoring process,
6192         * make the batch store the ring start value to memory.
6193         * As ring start is restored apriori of starting the indirect ctx bb and
6194         * as it will be different for each context, it fits to this purpose.
6195         */
6196        indirect_ctx_bb_setup(a);
6197        indirect_ctx_bb_setup(b);
6198
6199        err = indirect_ctx_bb_check(a);
6200        if (err)
6201                goto unpin_b;
6202
6203        err = indirect_ctx_bb_check(b);
6204
6205unpin_b:
6206        intel_context_unpin(b);
6207put_b:
6208        intel_context_put(b);
6209unpin_a:
6210        intel_context_unpin(a);
6211put_a:
6212        intel_context_put(a);
6213
6214        return err;
6215}
6216
6217static int live_lrc_indirect_ctx_bb(void *arg)
6218{
6219        struct intel_gt *gt = arg;
6220        struct intel_engine_cs *engine;
6221        enum intel_engine_id id;
6222        int err = 0;
6223
6224        for_each_engine(engine, gt, id) {
6225                intel_engine_pm_get(engine);
6226                err = __live_lrc_indirect_ctx_bb(engine);
6227                intel_engine_pm_put(engine);
6228
6229                if (igt_flush_test(gt->i915))
6230                        err = -EIO;
6231
6232                if (err)
6233                        break;
6234        }
6235
6236        return err;
6237}
6238
6239static void garbage_reset(struct intel_engine_cs *engine,
6240                          struct i915_request *rq)
6241{
6242        const unsigned int bit = I915_RESET_ENGINE + engine->id;
6243        unsigned long *lock = &engine->gt->reset.flags;
6244
6245        if (test_and_set_bit(bit, lock))
6246                return;
6247
6248        tasklet_disable(&engine->execlists.tasklet);
6249
6250        if (!rq->fence.error)
6251                intel_engine_reset(engine, NULL);
6252
6253        tasklet_enable(&engine->execlists.tasklet);
6254        clear_and_wake_up_bit(bit, lock);
6255}
6256
6257static struct i915_request *garbage(struct intel_context *ce,
6258                                    struct rnd_state *prng)
6259{
6260        struct i915_request *rq;
6261        int err;
6262
6263        err = intel_context_pin(ce);
6264        if (err)
6265                return ERR_PTR(err);
6266
6267        prandom_bytes_state(prng,
6268                            ce->lrc_reg_state,
6269                            ce->engine->context_size -
6270                            LRC_STATE_OFFSET);
6271
6272        rq = intel_context_create_request(ce);
6273        if (IS_ERR(rq)) {
6274                err = PTR_ERR(rq);
6275                goto err_unpin;
6276        }
6277
6278        i915_request_get(rq);
6279        i915_request_add(rq);
6280        return rq;
6281
6282err_unpin:
6283        intel_context_unpin(ce);
6284        return ERR_PTR(err);
6285}
6286
6287static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6288{
6289        struct intel_context *ce;
6290        struct i915_request *hang;
6291        int err = 0;
6292
6293        ce = intel_context_create(engine);
6294        if (IS_ERR(ce))
6295                return PTR_ERR(ce);
6296
6297        hang = garbage(ce, prng);
6298        if (IS_ERR(hang)) {
6299                err = PTR_ERR(hang);
6300                goto err_ce;
6301        }
6302
6303        if (wait_for_submit(engine, hang, HZ / 2)) {
6304                i915_request_put(hang);
6305                err = -ETIME;
6306                goto err_ce;
6307        }
6308
6309        intel_context_set_banned(ce);
6310        garbage_reset(engine, hang);
6311
6312        intel_engine_flush_submission(engine);
6313        if (!hang->fence.error) {
6314                i915_request_put(hang);
6315                pr_err("%s: corrupted context was not reset\n",
6316                       engine->name);
6317                err = -EINVAL;
6318                goto err_ce;
6319        }
6320
6321        if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6322                pr_err("%s: corrupted context did not recover\n",
6323                       engine->name);
6324                i915_request_put(hang);
6325                err = -EIO;
6326                goto err_ce;
6327        }
6328        i915_request_put(hang);
6329
6330err_ce:
6331        intel_context_put(ce);
6332        return err;
6333}
6334
6335static int live_lrc_garbage(void *arg)
6336{
6337        struct intel_gt *gt = arg;
6338        struct intel_engine_cs *engine;
6339        enum intel_engine_id id;
6340
6341        /*
6342         * Verify that we can recover if one context state is completely
6343         * corrupted.
6344         */
6345
6346        if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6347                return 0;
6348
6349        for_each_engine(engine, gt, id) {
6350                I915_RND_STATE(prng);
6351                int err = 0, i;
6352
6353                if (!intel_has_reset_engine(engine->gt))
6354                        continue;
6355
6356                intel_engine_pm_get(engine);
6357                for (i = 0; i < 3; i++) {
6358                        err = __lrc_garbage(engine, &prng);
6359                        if (err)
6360                                break;
6361                }
6362                intel_engine_pm_put(engine);
6363
6364                if (igt_flush_test(gt->i915))
6365                        err = -EIO;
6366                if (err)
6367                        return err;
6368        }
6369
6370        return 0;
6371}
6372
6373static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6374{
6375        struct intel_context *ce;
6376        struct i915_request *rq;
6377        IGT_TIMEOUT(end_time);
6378        int err;
6379
6380        ce = intel_context_create(engine);
6381        if (IS_ERR(ce))
6382                return PTR_ERR(ce);
6383
6384        ce->runtime.num_underflow = 0;
6385        ce->runtime.max_underflow = 0;
6386
6387        do {
6388                unsigned int loop = 1024;
6389
6390                while (loop) {
6391                        rq = intel_context_create_request(ce);
6392                        if (IS_ERR(rq)) {
6393                                err = PTR_ERR(rq);
6394                                goto err_rq;
6395                        }
6396
6397                        if (--loop == 0)
6398                                i915_request_get(rq);
6399
6400                        i915_request_add(rq);
6401                }
6402
6403                if (__igt_timeout(end_time, NULL))
6404                        break;
6405
6406                i915_request_put(rq);
6407        } while (1);
6408
6409        err = i915_request_wait(rq, 0, HZ / 5);
6410        if (err < 0) {
6411                pr_err("%s: request not completed!\n", engine->name);
6412                goto err_wait;
6413        }
6414
6415        igt_flush_test(engine->i915);
6416
6417        pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6418                engine->name,
6419                intel_context_get_total_runtime_ns(ce),
6420                intel_context_get_avg_runtime_ns(ce));
6421
6422        err = 0;
6423        if (ce->runtime.num_underflow) {
6424                pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6425                       engine->name,
6426                       ce->runtime.num_underflow,
6427                       ce->runtime.max_underflow);
6428                GEM_TRACE_DUMP();
6429                err = -EOVERFLOW;
6430        }
6431
6432err_wait:
6433        i915_request_put(rq);
6434err_rq:
6435        intel_context_put(ce);
6436        return err;
6437}
6438
6439static int live_pphwsp_runtime(void *arg)
6440{
6441        struct intel_gt *gt = arg;
6442        struct intel_engine_cs *engine;
6443        enum intel_engine_id id;
6444        int err = 0;
6445
6446        /*
6447         * Check that cumulative context runtime as stored in the pphwsp[16]
6448         * is monotonic.
6449         */
6450
6451        for_each_engine(engine, gt, id) {
6452                err = __live_pphwsp_runtime(engine);
6453                if (err)
6454                        break;
6455        }
6456
6457        if (igt_flush_test(gt->i915))
6458                err = -EIO;
6459
6460        return err;
6461}
6462
6463int intel_lrc_live_selftests(struct drm_i915_private *i915)
6464{
6465        static const struct i915_subtest tests[] = {
6466                SUBTEST(live_lrc_layout),
6467                SUBTEST(live_lrc_fixed),
6468                SUBTEST(live_lrc_state),
6469                SUBTEST(live_lrc_gpr),
6470                SUBTEST(live_lrc_isolation),
6471                SUBTEST(live_lrc_timestamp),
6472                SUBTEST(live_lrc_garbage),
6473                SUBTEST(live_pphwsp_runtime),
6474                SUBTEST(live_lrc_indirect_ctx_bb),
6475        };
6476
6477        if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6478                return 0;
6479
6480        return intel_gt_live_subtests(tests, &i915->gt);
6481}
6482