LXR linux/drivers/gpu/drm/i915/gt/selftest

   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2018 Intel Corporation
   4 */
   5
   6#include <linux/prime_numbers.h>
   7
   8#include "gem/i915_gem_pm.h"
   9#include "gt/intel_engine_heartbeat.h"
  10#include "gt/intel_reset.h"
  11#include "gt/selftest_engine_heartbeat.h"
  12
  13#include "i915_selftest.h"
  14#include "selftests/i915_random.h"
  15#include "selftests/igt_flush_test.h"
  16#include "selftests/igt_live_test.h"
  17#include "selftests/igt_spinner.h"
  18#include "selftests/lib_sw_fence.h"
  19
  20#include "gem/selftests/igt_gem_utils.h"
  21#include "gem/selftests/mock_context.h"
  22
  23#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  24#define NUM_GPR 16
  25#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
  26
  27static bool is_active(struct i915_request *rq)
  28{
  29        if (i915_request_is_active(rq))
  30                return true;
  31
  32        if (i915_request_on_hold(rq))
  33                return true;
  34
  35        if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
  36                return true;
  37
  38        return false;
  39}
  40
  41static int wait_for_submit(struct intel_engine_cs *engine,
  42                           struct i915_request *rq,
  43                           unsigned long timeout)
  44{
  45        /* Ignore our own attempts to suppress excess tasklets */
  46        tasklet_hi_schedule(&engine->execlists.tasklet);
  47
  48        timeout += jiffies;
  49        do {
  50                bool done = time_after(jiffies, timeout);
  51
  52                if (i915_request_completed(rq)) /* that was quick! */
  53                        return 0;
  54
  55                /* Wait until the HW has acknowleged the submission (or err) */
  56                intel_engine_flush_submission(engine);
  57                if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
  58                        return 0;
  59
  60                if (done)
  61                        return -ETIME;
  62
  63                cond_resched();
  64        } while (1);
  65}
  66
  67static int wait_for_reset(struct intel_engine_cs *engine,
  68                          struct i915_request *rq,
  69                          unsigned long timeout)
  70{
  71        timeout += jiffies;
  72
  73        do {
  74                cond_resched();
  75                intel_engine_flush_submission(engine);
  76
  77                if (READ_ONCE(engine->execlists.pending[0]))
  78                        continue;
  79
  80                if (i915_request_completed(rq))
  81                        break;
  82
  83                if (READ_ONCE(rq->fence.error))
  84                        break;
  85        } while (time_before(jiffies, timeout));
  86
  87        flush_scheduled_work();
  88
  89        if (rq->fence.error != -EIO) {
  90                pr_err("%s: hanging request %llx:%lld not reset\n",
  91                       engine->name,
  92                       rq->fence.context,
  93                       rq->fence.seqno);
  94                return -EINVAL;
  95        }
  96
  97        /* Give the request a jiffie to complete after flushing the worker */
  98        if (i915_request_wait(rq, 0,
  99                              max(0l, (long)(timeout - jiffies)) + 1) < 0) {
 100                pr_err("%s: hanging request %llx:%lld did not complete\n",
 101                       engine->name,
 102                       rq->fence.context,
 103                       rq->fence.seqno);
 104                return -ETIME;
 105        }
 106
 107        return 0;
 108}
 109
 110static int live_sanitycheck(void *arg)
 111{
 112        struct intel_gt *gt = arg;
 113        struct intel_engine_cs *engine;
 114        enum intel_engine_id id;
 115        struct igt_spinner spin;
 116        int err = 0;
 117
 118        if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 119                return 0;
 120
 121        if (igt_spinner_init(&spin, gt))
 122                return -ENOMEM;
 123
 124        for_each_engine(engine, gt, id) {
 125                struct intel_context *ce;
 126                struct i915_request *rq;
 127
 128                ce = intel_context_create(engine);
 129                if (IS_ERR(ce)) {
 130                        err = PTR_ERR(ce);
 131                        break;
 132                }
 133
 134                rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 135                if (IS_ERR(rq)) {
 136                        err = PTR_ERR(rq);
 137                        goto out_ctx;
 138                }
 139
 140                i915_request_add(rq);
 141                if (!igt_wait_for_spinner(&spin, rq)) {
 142                        GEM_TRACE("spinner failed to start\n");
 143                        GEM_TRACE_DUMP();
 144                        intel_gt_set_wedged(gt);
 145                        err = -EIO;
 146                        goto out_ctx;
 147                }
 148
 149                igt_spinner_end(&spin);
 150                if (igt_flush_test(gt->i915)) {
 151                        err = -EIO;
 152                        goto out_ctx;
 153                }
 154
 155out_ctx:
 156                intel_context_put(ce);
 157                if (err)
 158                        break;
 159        }
 160
 161        igt_spinner_fini(&spin);
 162        return err;
 163}
 164
 165static int live_unlite_restore(struct intel_gt *gt, int prio)
 166{
 167        struct intel_engine_cs *engine;
 168        enum intel_engine_id id;
 169        struct igt_spinner spin;
 170        int err = -ENOMEM;
 171
 172        /*
 173         * Check that we can correctly context switch between 2 instances
 174         * on the same engine from the same parent context.
 175         */
 176
 177        if (igt_spinner_init(&spin, gt))
 178                return err;
 179
 180        err = 0;
 181        for_each_engine(engine, gt, id) {
 182                struct intel_context *ce[2] = {};
 183                struct i915_request *rq[2];
 184                struct igt_live_test t;
 185                int n;
 186
 187                if (prio && !intel_engine_has_preemption(engine))
 188                        continue;
 189
 190                if (!intel_engine_can_store_dword(engine))
 191                        continue;
 192
 193                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 194                        err = -EIO;
 195                        break;
 196                }
 197                st_engine_heartbeat_disable(engine);
 198
 199                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 200                        struct intel_context *tmp;
 201
 202                        tmp = intel_context_create(engine);
 203                        if (IS_ERR(tmp)) {
 204                                err = PTR_ERR(tmp);
 205                                goto err_ce;
 206                        }
 207
 208                        err = intel_context_pin(tmp);
 209                        if (err) {
 210                                intel_context_put(tmp);
 211                                goto err_ce;
 212                        }
 213
 214                        /*
 215                         * Setup the pair of contexts such that if we
 216                         * lite-restore using the RING_TAIL from ce[1] it
 217                         * will execute garbage from ce[0]->ring.
 218                         */
 219                        memset(tmp->ring->vaddr,
 220                               POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 221                               tmp->ring->vma->size);
 222
 223                        ce[n] = tmp;
 224                }
 225                GEM_BUG_ON(!ce[1]->ring->size);
 226                intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 227                lrc_update_regs(ce[1], engine, ce[1]->ring->head);
 228
 229                rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 230                if (IS_ERR(rq[0])) {
 231                        err = PTR_ERR(rq[0]);
 232                        goto err_ce;
 233                }
 234
 235                i915_request_get(rq[0]);
 236                i915_request_add(rq[0]);
 237                GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 238
 239                if (!igt_wait_for_spinner(&spin, rq[0])) {
 240                        i915_request_put(rq[0]);
 241                        goto err_ce;
 242                }
 243
 244                rq[1] = i915_request_create(ce[1]);
 245                if (IS_ERR(rq[1])) {
 246                        err = PTR_ERR(rq[1]);
 247                        i915_request_put(rq[0]);
 248                        goto err_ce;
 249                }
 250
 251                if (!prio) {
 252                        /*
 253                         * Ensure we do the switch to ce[1] on completion.
 254                         *
 255                         * rq[0] is already submitted, so this should reduce
 256                         * to a no-op (a wait on a request on the same engine
 257                         * uses the submit fence, not the completion fence),
 258                         * but it will install a dependency on rq[1] for rq[0]
 259                         * that will prevent the pair being reordered by
 260                         * timeslicing.
 261                         */
 262                        i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 263                }
 264
 265                i915_request_get(rq[1]);
 266                i915_request_add(rq[1]);
 267                GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 268                i915_request_put(rq[0]);
 269
 270                if (prio) {
 271                        struct i915_sched_attr attr = {
 272                                .priority = prio,
 273                        };
 274
 275                        /* Alternatively preempt the spinner with ce[1] */
 276                        engine->schedule(rq[1], &attr);
 277                }
 278
 279                /* And switch back to ce[0] for good measure */
 280                rq[0] = i915_request_create(ce[0]);
 281                if (IS_ERR(rq[0])) {
 282                        err = PTR_ERR(rq[0]);
 283                        i915_request_put(rq[1]);
 284                        goto err_ce;
 285                }
 286
 287                i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 288                i915_request_get(rq[0]);
 289                i915_request_add(rq[0]);
 290                GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 291                i915_request_put(rq[1]);
 292                i915_request_put(rq[0]);
 293
 294err_ce:
 295                intel_engine_flush_submission(engine);
 296                igt_spinner_end(&spin);
 297                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 298                        if (IS_ERR_OR_NULL(ce[n]))
 299                                break;
 300
 301                        intel_context_unpin(ce[n]);
 302                        intel_context_put(ce[n]);
 303                }
 304
 305                st_engine_heartbeat_enable(engine);
 306                if (igt_live_test_end(&t))
 307                        err = -EIO;
 308                if (err)
 309                        break;
 310        }
 311
 312        igt_spinner_fini(&spin);
 313        return err;
 314}
 315
 316static int live_unlite_switch(void *arg)
 317{
 318        return live_unlite_restore(arg, 0);
 319}
 320
 321static int live_unlite_preempt(void *arg)
 322{
 323        return live_unlite_restore(arg, I915_PRIORITY_MAX);
 324}
 325
 326static int live_unlite_ring(void *arg)
 327{
 328        struct intel_gt *gt = arg;
 329        struct intel_engine_cs *engine;
 330        struct igt_spinner spin;
 331        enum intel_engine_id id;
 332        int err = 0;
 333
 334        /*
 335         * Setup a preemption event that will cause almost the entire ring
 336         * to be unwound, potentially fooling our intel_ring_direction()
 337         * into emitting a forward lite-restore instead of the rollback.
 338         */
 339
 340        if (igt_spinner_init(&spin, gt))
 341                return -ENOMEM;
 342
 343        for_each_engine(engine, gt, id) {
 344                struct intel_context *ce[2] = {};
 345                struct i915_request *rq;
 346                struct igt_live_test t;
 347                int n;
 348
 349                if (!intel_engine_has_preemption(engine))
 350                        continue;
 351
 352                if (!intel_engine_can_store_dword(engine))
 353                        continue;
 354
 355                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 356                        err = -EIO;
 357                        break;
 358                }
 359                st_engine_heartbeat_disable(engine);
 360
 361                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 362                        struct intel_context *tmp;
 363
 364                        tmp = intel_context_create(engine);
 365                        if (IS_ERR(tmp)) {
 366                                err = PTR_ERR(tmp);
 367                                goto err_ce;
 368                        }
 369
 370                        err = intel_context_pin(tmp);
 371                        if (err) {
 372                                intel_context_put(tmp);
 373                                goto err_ce;
 374                        }
 375
 376                        memset32(tmp->ring->vaddr,
 377                                 0xdeadbeef, /* trigger a hang if executed */
 378                                 tmp->ring->vma->size / sizeof(u32));
 379
 380                        ce[n] = tmp;
 381                }
 382
 383                /* Create max prio spinner, followed by N low prio nops */
 384                rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 385                if (IS_ERR(rq)) {
 386                        err = PTR_ERR(rq);
 387                        goto err_ce;
 388                }
 389
 390                i915_request_get(rq);
 391                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 392                i915_request_add(rq);
 393
 394                if (!igt_wait_for_spinner(&spin, rq)) {
 395                        intel_gt_set_wedged(gt);
 396                        i915_request_put(rq);
 397                        err = -ETIME;
 398                        goto err_ce;
 399                }
 400
 401                /* Fill the ring, until we will cause a wrap */
 402                n = 0;
 403                while (intel_ring_direction(ce[0]->ring,
 404                                            rq->wa_tail,
 405                                            ce[0]->ring->tail) <= 0) {
 406                        struct i915_request *tmp;
 407
 408                        tmp = intel_context_create_request(ce[0]);
 409                        if (IS_ERR(tmp)) {
 410                                err = PTR_ERR(tmp);
 411                                i915_request_put(rq);
 412                                goto err_ce;
 413                        }
 414
 415                        i915_request_add(tmp);
 416                        intel_engine_flush_submission(engine);
 417                        n++;
 418                }
 419                intel_engine_flush_submission(engine);
 420                pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
 421                         engine->name, n,
 422                         ce[0]->ring->size,
 423                         ce[0]->ring->tail,
 424                         ce[0]->ring->emit,
 425                         rq->tail);
 426                GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
 427                                                rq->tail,
 428                                                ce[0]->ring->tail) <= 0);
 429                i915_request_put(rq);
 430
 431                /* Create a second ring to preempt the first ring after rq[0] */
 432                rq = intel_context_create_request(ce[1]);
 433                if (IS_ERR(rq)) {
 434                        err = PTR_ERR(rq);
 435                        goto err_ce;
 436                }
 437
 438                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 439                i915_request_get(rq);
 440                i915_request_add(rq);
 441
 442                err = wait_for_submit(engine, rq, HZ / 2);
 443                i915_request_put(rq);
 444                if (err) {
 445                        pr_err("%s: preemption request was not submitted\n",
 446                               engine->name);
 447                        err = -ETIME;
 448                }
 449
 450                pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
 451                         engine->name,
 452                         ce[0]->ring->tail, ce[0]->ring->emit,
 453                         ce[1]->ring->tail, ce[1]->ring->emit);
 454
 455err_ce:
 456                intel_engine_flush_submission(engine);
 457                igt_spinner_end(&spin);
 458                for (n = 0; n < ARRAY_SIZE(ce); n++) {
 459                        if (IS_ERR_OR_NULL(ce[n]))
 460                                break;
 461
 462                        intel_context_unpin(ce[n]);
 463                        intel_context_put(ce[n]);
 464                }
 465                st_engine_heartbeat_enable(engine);
 466                if (igt_live_test_end(&t))
 467                        err = -EIO;
 468                if (err)
 469                        break;
 470        }
 471
 472        igt_spinner_fini(&spin);
 473        return err;
 474}
 475
 476static int live_pin_rewind(void *arg)
 477{
 478        struct intel_gt *gt = arg;
 479        struct intel_engine_cs *engine;
 480        enum intel_engine_id id;
 481        int err = 0;
 482
 483        /*
 484         * We have to be careful not to trust intel_ring too much, for example
 485         * ring->head is updated upon retire which is out of sync with pinning
 486         * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
 487         * or else we risk writing an older, stale value.
 488         *
 489         * To simulate this, let's apply a bit of deliberate sabotague.
 490         */
 491
 492        for_each_engine(engine, gt, id) {
 493                struct intel_context *ce;
 494                struct i915_request *rq;
 495                struct intel_ring *ring;
 496                struct igt_live_test t;
 497
 498                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 499                        err = -EIO;
 500                        break;
 501                }
 502
 503                ce = intel_context_create(engine);
 504                if (IS_ERR(ce)) {
 505                        err = PTR_ERR(ce);
 506                        break;
 507                }
 508
 509                err = intel_context_pin(ce);
 510                if (err) {
 511                        intel_context_put(ce);
 512                        break;
 513                }
 514
 515                /* Keep the context awake while we play games */
 516                err = i915_active_acquire(&ce->active);
 517                if (err) {
 518                        intel_context_unpin(ce);
 519                        intel_context_put(ce);
 520                        break;
 521                }
 522                ring = ce->ring;
 523
 524                /* Poison the ring, and offset the next request from HEAD */
 525                memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
 526                ring->emit = ring->size / 2;
 527                ring->tail = ring->emit;
 528                GEM_BUG_ON(ring->head);
 529
 530                intel_context_unpin(ce);
 531
 532                /* Submit a simple nop request */
 533                GEM_BUG_ON(intel_context_is_pinned(ce));
 534                rq = intel_context_create_request(ce);
 535                i915_active_release(&ce->active); /* e.g. async retire */
 536                intel_context_put(ce);
 537                if (IS_ERR(rq)) {
 538                        err = PTR_ERR(rq);
 539                        break;
 540                }
 541                GEM_BUG_ON(!rq->head);
 542                i915_request_add(rq);
 543
 544                /* Expect not to hang! */
 545                if (igt_live_test_end(&t)) {
 546                        err = -EIO;
 547                        break;
 548                }
 549        }
 550
 551        return err;
 552}
 553
 554static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
 555{
 556        tasklet_disable(&engine->execlists.tasklet);
 557        local_bh_disable();
 558
 559        if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 560                             &engine->gt->reset.flags)) {
 561                local_bh_enable();
 562                tasklet_enable(&engine->execlists.tasklet);
 563
 564                intel_gt_set_wedged(engine->gt);
 565                return -EBUSY;
 566        }
 567
 568        return 0;
 569}
 570
 571static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
 572{
 573        clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
 574                              &engine->gt->reset.flags);
 575
 576        local_bh_enable();
 577        tasklet_enable(&engine->execlists.tasklet);
 578}
 579
 580static int live_hold_reset(void *arg)
 581{
 582        struct intel_gt *gt = arg;
 583        struct intel_engine_cs *engine;
 584        enum intel_engine_id id;
 585        struct igt_spinner spin;
 586        int err = 0;
 587
 588        /*
 589         * In order to support offline error capture for fast preempt reset,
 590         * we need to decouple the guilty request and ensure that it and its
 591         * descendents are not executed while the capture is in progress.
 592         */
 593
 594        if (!intel_has_reset_engine(gt))
 595                return 0;
 596
 597        if (igt_spinner_init(&spin, gt))
 598                return -ENOMEM;
 599
 600        for_each_engine(engine, gt, id) {
 601                struct intel_context *ce;
 602                struct i915_request *rq;
 603
 604                ce = intel_context_create(engine);
 605                if (IS_ERR(ce)) {
 606                        err = PTR_ERR(ce);
 607                        break;
 608                }
 609
 610                st_engine_heartbeat_disable(engine);
 611
 612                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 613                if (IS_ERR(rq)) {
 614                        err = PTR_ERR(rq);
 615                        goto out;
 616                }
 617                i915_request_add(rq);
 618
 619                if (!igt_wait_for_spinner(&spin, rq)) {
 620                        intel_gt_set_wedged(gt);
 621                        err = -ETIME;
 622                        goto out;
 623                }
 624
 625                /* We have our request executing, now remove it and reset */
 626
 627                err = engine_lock_reset_tasklet(engine);
 628                if (err)
 629                        goto out;
 630
 631                engine->execlists.tasklet.callback(&engine->execlists.tasklet);
 632                GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 633
 634                i915_request_get(rq);
 635                execlists_hold(engine, rq);
 636                GEM_BUG_ON(!i915_request_on_hold(rq));
 637
 638                __intel_engine_reset_bh(engine, NULL);
 639                GEM_BUG_ON(rq->fence.error != -EIO);
 640
 641                engine_unlock_reset_tasklet(engine);
 642
 643                /* Check that we do not resubmit the held request */
 644                if (!i915_request_wait(rq, 0, HZ / 5)) {
 645                        pr_err("%s: on hold request completed!\n",
 646                               engine->name);
 647                        i915_request_put(rq);
 648                        err = -EIO;
 649                        goto out;
 650                }
 651                GEM_BUG_ON(!i915_request_on_hold(rq));
 652
 653                /* But is resubmitted on release */
 654                execlists_unhold(engine, rq);
 655                if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 656                        pr_err("%s: held request did not complete!\n",
 657                               engine->name);
 658                        intel_gt_set_wedged(gt);
 659                        err = -ETIME;
 660                }
 661                i915_request_put(rq);
 662
 663out:
 664                st_engine_heartbeat_enable(engine);
 665                intel_context_put(ce);
 666                if (err)
 667                        break;
 668        }
 669
 670        igt_spinner_fini(&spin);
 671        return err;
 672}
 673
 674static const char *error_repr(int err)
 675{
 676        return err ? "bad" : "good";
 677}
 678
 679static int live_error_interrupt(void *arg)
 680{
 681        static const struct error_phase {
 682                enum { GOOD = 0, BAD = -EIO } error[2];
 683        } phases[] = {
 684                { { BAD,  GOOD } },
 685                { { BAD,  BAD  } },
 686                { { BAD,  GOOD } },
 687                { { GOOD, GOOD } }, /* sentinel */
 688        };
 689        struct intel_gt *gt = arg;
 690        struct intel_engine_cs *engine;
 691        enum intel_engine_id id;
 692
 693        /*
 694         * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
 695         * of invalid commands in user batches that will cause a GPU hang.
 696         * This is a faster mechanism than using hangcheck/heartbeats, but
 697         * only detects problems the HW knows about -- it will not warn when
 698         * we kill the HW!
 699         *
 700         * To verify our detection and reset, we throw some invalid commands
 701         * at the HW and wait for the interrupt.
 702         */
 703
 704        if (!intel_has_reset_engine(gt))
 705                return 0;
 706
 707        for_each_engine(engine, gt, id) {
 708                const struct error_phase *p;
 709                int err = 0;
 710
 711                st_engine_heartbeat_disable(engine);
 712
 713                for (p = phases; p->error[0] != GOOD; p++) {
 714                        struct i915_request *client[ARRAY_SIZE(phases->error)];
 715                        u32 *cs;
 716                        int i;
 717
 718                        memset(client, 0, sizeof(*client));
 719                        for (i = 0; i < ARRAY_SIZE(client); i++) {
 720                                struct intel_context *ce;
 721                                struct i915_request *rq;
 722
 723                                ce = intel_context_create(engine);
 724                                if (IS_ERR(ce)) {
 725                                        err = PTR_ERR(ce);
 726                                        goto out;
 727                                }
 728
 729                                rq = intel_context_create_request(ce);
 730                                intel_context_put(ce);
 731                                if (IS_ERR(rq)) {
 732                                        err = PTR_ERR(rq);
 733                                        goto out;
 734                                }
 735
 736                                if (rq->engine->emit_init_breadcrumb) {
 737                                        err = rq->engine->emit_init_breadcrumb(rq);
 738                                        if (err) {
 739                                                i915_request_add(rq);
 740                                                goto out;
 741                                        }
 742                                }
 743
 744                                cs = intel_ring_begin(rq, 2);
 745                                if (IS_ERR(cs)) {
 746                                        i915_request_add(rq);
 747                                        err = PTR_ERR(cs);
 748                                        goto out;
 749                                }
 750
 751                                if (p->error[i]) {
 752                                        *cs++ = 0xdeadbeef;
 753                                        *cs++ = 0xdeadbeef;
 754                                } else {
 755                                        *cs++ = MI_NOOP;
 756                                        *cs++ = MI_NOOP;
 757                                }
 758
 759                                client[i] = i915_request_get(rq);
 760                                i915_request_add(rq);
 761                        }
 762
 763                        err = wait_for_submit(engine, client[0], HZ / 2);
 764                        if (err) {
 765                                pr_err("%s: first request did not start within time!\n",
 766                                       engine->name);
 767                                err = -ETIME;
 768                                goto out;
 769                        }
 770
 771                        for (i = 0; i < ARRAY_SIZE(client); i++) {
 772                                if (i915_request_wait(client[i], 0, HZ / 5) < 0)
 773                                        pr_debug("%s: %s request incomplete!\n",
 774                                                 engine->name,
 775                                                 error_repr(p->error[i]));
 776
 777                                if (!i915_request_started(client[i])) {
 778                                        pr_err("%s: %s request not started!\n",
 779                                               engine->name,
 780                                               error_repr(p->error[i]));
 781                                        err = -ETIME;
 782                                        goto out;
 783                                }
 784
 785                                /* Kick the tasklet to process the error */
 786                                intel_engine_flush_submission(engine);
 787                                if (client[i]->fence.error != p->error[i]) {
 788                                        pr_err("%s: %s request (%s) with wrong error code: %d\n",
 789                                               engine->name,
 790                                               error_repr(p->error[i]),
 791                                               i915_request_completed(client[i]) ? "completed" : "running",
 792                                               client[i]->fence.error);
 793                                        err = -EINVAL;
 794                                        goto out;
 795                                }
 796                        }
 797
 798out:
 799                        for (i = 0; i < ARRAY_SIZE(client); i++)
 800                                if (client[i])
 801                                        i915_request_put(client[i]);
 802                        if (err) {
 803                                pr_err("%s: failed at phase[%zd] { %d, %d }\n",
 804                                       engine->name, p - phases,
 805                                       p->error[0], p->error[1]);
 806                                break;
 807                        }
 808                }
 809
 810                st_engine_heartbeat_enable(engine);
 811                if (err) {
 812                        intel_gt_set_wedged(gt);
 813                        return err;
 814                }
 815        }
 816
 817        return 0;
 818}
 819
 820static int
 821emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 822{
 823        u32 *cs;
 824
 825        cs = intel_ring_begin(rq, 10);
 826        if (IS_ERR(cs))
 827                return PTR_ERR(cs);
 828
 829        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 830
 831        *cs++ = MI_SEMAPHORE_WAIT |
 832                MI_SEMAPHORE_GLOBAL_GTT |
 833                MI_SEMAPHORE_POLL |
 834                MI_SEMAPHORE_SAD_NEQ_SDD;
 835        *cs++ = 0;
 836        *cs++ = i915_ggtt_offset(vma) + 4 * idx;
 837        *cs++ = 0;
 838
 839        if (idx > 0) {
 840                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 841                *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 842                *cs++ = 0;
 843                *cs++ = 1;
 844        } else {
 845                *cs++ = MI_NOOP;
 846                *cs++ = MI_NOOP;
 847                *cs++ = MI_NOOP;
 848                *cs++ = MI_NOOP;
 849        }
 850
 851        *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 852
 853        intel_ring_advance(rq, cs);
 854        return 0;
 855}
 856
 857static struct i915_request *
 858semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 859{
 860        struct intel_context *ce;
 861        struct i915_request *rq;
 862        int err;
 863
 864        ce = intel_context_create(engine);
 865        if (IS_ERR(ce))
 866                return ERR_CAST(ce);
 867
 868        rq = intel_context_create_request(ce);
 869        if (IS_ERR(rq))
 870                goto out_ce;
 871
 872        err = 0;
 873        if (rq->engine->emit_init_breadcrumb)
 874                err = rq->engine->emit_init_breadcrumb(rq);
 875        if (err == 0)
 876                err = emit_semaphore_chain(rq, vma, idx);
 877        if (err == 0)
 878                i915_request_get(rq);
 879        i915_request_add(rq);
 880        if (err)
 881                rq = ERR_PTR(err);
 882
 883out_ce:
 884        intel_context_put(ce);
 885        return rq;
 886}
 887
 888static int
 889release_queue(struct intel_engine_cs *engine,
 890              struct i915_vma *vma,
 891              int idx, int prio)
 892{
 893        struct i915_sched_attr attr = {
 894                .priority = prio,
 895        };
 896        struct i915_request *rq;
 897        u32 *cs;
 898
 899        rq = intel_engine_create_kernel_request(engine);
 900        if (IS_ERR(rq))
 901                return PTR_ERR(rq);
 902
 903        cs = intel_ring_begin(rq, 4);
 904        if (IS_ERR(cs)) {
 905                i915_request_add(rq);
 906                return PTR_ERR(cs);
 907        }
 908
 909        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 910        *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 911        *cs++ = 0;
 912        *cs++ = 1;
 913
 914        intel_ring_advance(rq, cs);
 915
 916        i915_request_get(rq);
 917        i915_request_add(rq);
 918
 919        local_bh_disable();
 920        engine->schedule(rq, &attr);
 921        local_bh_enable(); /* kick tasklet */
 922
 923        i915_request_put(rq);
 924
 925        return 0;
 926}
 927
 928static int
 929slice_semaphore_queue(struct intel_engine_cs *outer,
 930                      struct i915_vma *vma,
 931                      int count)
 932{
 933        struct intel_engine_cs *engine;
 934        struct i915_request *head;
 935        enum intel_engine_id id;
 936        int err, i, n = 0;
 937
 938        head = semaphore_queue(outer, vma, n++);
 939        if (IS_ERR(head))
 940                return PTR_ERR(head);
 941
 942        for_each_engine(engine, outer->gt, id) {
 943                if (!intel_engine_has_preemption(engine))
 944                        continue;
 945
 946                for (i = 0; i < count; i++) {
 947                        struct i915_request *rq;
 948
 949                        rq = semaphore_queue(engine, vma, n++);
 950                        if (IS_ERR(rq)) {
 951                                err = PTR_ERR(rq);
 952                                goto out;
 953                        }
 954
 955                        i915_request_put(rq);
 956                }
 957        }
 958
 959        err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
 960        if (err)
 961                goto out;
 962
 963        if (i915_request_wait(head, 0,
 964                              2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
 965                pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
 966                       outer->name, count, n);
 967                GEM_TRACE_DUMP();
 968                intel_gt_set_wedged(outer->gt);
 969                err = -EIO;
 970        }
 971
 972out:
 973        i915_request_put(head);
 974        return err;
 975}
 976
 977static int live_timeslice_preempt(void *arg)
 978{
 979        struct intel_gt *gt = arg;
 980        struct drm_i915_gem_object *obj;
 981        struct intel_engine_cs *engine;
 982        enum intel_engine_id id;
 983        struct i915_vma *vma;
 984        void *vaddr;
 985        int err = 0;
 986
 987        /*
 988         * If a request takes too long, we would like to give other users
 989         * a fair go on the GPU. In particular, users may create batches
 990         * that wait upon external input, where that input may even be
 991         * supplied by another GPU job. To avoid blocking forever, we
 992         * need to preempt the current task and replace it with another
 993         * ready task.
 994         */
 995        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 996                return 0;
 997
 998        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 999        if (IS_ERR(obj))
1000                return PTR_ERR(obj);

1001
1002        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1003        if (IS_ERR(vma)) {
1004                err = PTR_ERR(vma);
1005                goto err_obj;
1006        }
1007
1008        vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009        if (IS_ERR(vaddr)) {
1010                err = PTR_ERR(vaddr);
1011                goto err_obj;
1012        }
1013
1014        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015        if (err)
1016                goto err_map;
1017
1018        err = i915_vma_sync(vma);
1019        if (err)
1020                goto err_pin;
1021
1022        for_each_engine(engine, gt, id) {
1023                if (!intel_engine_has_preemption(engine))
1024                        continue;
1025
1026                memset(vaddr, 0, PAGE_SIZE);
1027
1028                st_engine_heartbeat_disable(engine);
1029                err = slice_semaphore_queue(engine, vma, 5);
1030                st_engine_heartbeat_enable(engine);
1031                if (err)
1032                        goto err_pin;
1033
1034                if (igt_flush_test(gt->i915)) {
1035                        err = -EIO;
1036                        goto err_pin;
1037                }
1038        }
1039
1040err_pin:
1041        i915_vma_unpin(vma);
1042err_map:
1043        i915_gem_object_unpin_map(obj);
1044err_obj:
1045        i915_gem_object_put(obj);
1046        return err;
1047}
1048
1049static struct i915_request *
1050create_rewinder(struct intel_context *ce,
1051                struct i915_request *wait,
1052                void *slot, int idx)
1053{
1054        const u32 offset =
1055                i915_ggtt_offset(ce->engine->status_page.vma) +
1056                offset_in_page(slot);
1057        struct i915_request *rq;
1058        u32 *cs;
1059        int err;
1060
1061        rq = intel_context_create_request(ce);
1062        if (IS_ERR(rq))
1063                return rq;
1064
1065        if (wait) {
1066                err = i915_request_await_dma_fence(rq, &wait->fence);
1067                if (err)
1068                        goto err;
1069        }
1070
1071        cs = intel_ring_begin(rq, 14);
1072        if (IS_ERR(cs)) {
1073                err = PTR_ERR(cs);
1074                goto err;
1075        }
1076
1077        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078        *cs++ = MI_NOOP;
1079
1080        *cs++ = MI_SEMAPHORE_WAIT |
1081                MI_SEMAPHORE_GLOBAL_GTT |
1082                MI_SEMAPHORE_POLL |
1083                MI_SEMAPHORE_SAD_GTE_SDD;
1084        *cs++ = idx;
1085        *cs++ = offset;
1086        *cs++ = 0;
1087
1088        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089        *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090        *cs++ = offset + idx * sizeof(u32);
1091        *cs++ = 0;
1092
1093        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094        *cs++ = offset;
1095        *cs++ = 0;
1096        *cs++ = idx + 1;
1097
1098        intel_ring_advance(rq, cs);
1099
1100        err = 0;
1101err:
1102        i915_request_get(rq);
1103        i915_request_add(rq);
1104        if (err) {
1105                i915_request_put(rq);
1106                return ERR_PTR(err);
1107        }
1108
1109        return rq;
1110}
1111
1112static int live_timeslice_rewind(void *arg)
1113{
1114        struct intel_gt *gt = arg;
1115        struct intel_engine_cs *engine;
1116        enum intel_engine_id id;
1117
1118        /*
1119         * The usual presumption on timeslice expiration is that we replace
1120         * the active context with another. However, given a chain of
1121         * dependencies we may end up with replacing the context with itself,
1122         * but only a few of those requests, forcing us to rewind the
1123         * RING_TAIL of the original request.
1124         */
1125        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126                return 0;
1127
1128        for_each_engine(engine, gt, id) {
1129                enum { A1, A2, B1 };
1130                enum { X = 1, Z, Y };
1131                struct i915_request *rq[3] = {};
1132                struct intel_context *ce;
1133                unsigned long timeslice;
1134                int i, err = 0;
1135                u32 *slot;
1136
1137                if (!intel_engine_has_timeslices(engine))
1138                        continue;
1139
1140                /*
1141                 * A:rq1 -- semaphore wait, timestamp X
1142                 * A:rq2 -- write timestamp Y
1143                 *
1144                 * B:rq1 [await A:rq1] -- write timestamp Z
1145                 *
1146                 * Force timeslice, release semaphore.
1147                 *
1148                 * Expect execution/evaluation order XZY
1149                 */
1150
1151                st_engine_heartbeat_disable(engine);
1152                timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153
1154                slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155
1156                ce = intel_context_create(engine);
1157                if (IS_ERR(ce)) {
1158                        err = PTR_ERR(ce);
1159                        goto err;
1160                }
1161
1162                rq[A1] = create_rewinder(ce, NULL, slot, X);
1163                if (IS_ERR(rq[A1])) {
1164                        intel_context_put(ce);
1165                        goto err;
1166                }
1167
1168                rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169                intel_context_put(ce);
1170                if (IS_ERR(rq[A2]))
1171                        goto err;
1172
1173                err = wait_for_submit(engine, rq[A2], HZ / 2);
1174                if (err) {
1175                        pr_err("%s: failed to submit first context\n",
1176                               engine->name);
1177                        goto err;
1178                }
1179
1180                ce = intel_context_create(engine);
1181                if (IS_ERR(ce)) {
1182                        err = PTR_ERR(ce);
1183                        goto err;
1184                }
1185
1186                rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187                intel_context_put(ce);
1188                if (IS_ERR(rq[2]))
1189                        goto err;
1190
1191                err = wait_for_submit(engine, rq[B1], HZ / 2);
1192                if (err) {
1193                        pr_err("%s: failed to submit second context\n",
1194                               engine->name);
1195                        goto err;
1196                }
1197
1198                /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199                ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200                while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201                        /* Wait for the timeslice to kick in */
1202                        del_timer(&engine->execlists.timer);
1203                        tasklet_hi_schedule(&engine->execlists.tasklet);
1204                        intel_engine_flush_submission(engine);
1205                }
1206                /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207                GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208                GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209                GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210
1211                /* Release the hounds! */
1212                slot[0] = 1;
1213                wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214
1215                for (i = 1; i <= 3; i++) {
1216                        unsigned long timeout = jiffies + HZ / 2;
1217
1218                        while (!READ_ONCE(slot[i]) &&
1219                               time_before(jiffies, timeout))
1220                                ;
1221
1222                        if (!time_before(jiffies, timeout)) {
1223                                pr_err("%s: rq[%d] timed out\n",
1224                                       engine->name, i - 1);
1225                                err = -ETIME;
1226                                goto err;
1227                        }
1228
1229                        pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230                }
1231
1232                /* XZY: XZ < XY */
1233                if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234                        pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235                               engine->name,
1236                               slot[Z] - slot[X],
1237                               slot[Y] - slot[X]);
1238                        err = -EINVAL;
1239                }
1240
1241err:
1242                memset32(&slot[0], -1, 4);
1243                wmb();
1244
1245                engine->props.timeslice_duration_ms = timeslice;
1246                st_engine_heartbeat_enable(engine);
1247                for (i = 0; i < 3; i++)
1248                        i915_request_put(rq[i]);
1249                if (igt_flush_test(gt->i915))
1250                        err = -EIO;
1251                if (err)
1252                        return err;
1253        }
1254
1255        return 0;
1256}
1257
1258static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259{
1260        struct i915_request *rq;
1261
1262        rq = intel_engine_create_kernel_request(engine);
1263        if (IS_ERR(rq))
1264                return rq;
1265
1266        i915_request_get(rq);
1267        i915_request_add(rq);
1268
1269        return rq;
1270}
1271
1272static long slice_timeout(struct intel_engine_cs *engine)
1273{
1274        long timeout;
1275
1276        /* Enough time for a timeslice to kick in, and kick out */
1277        timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278
1279        /* Enough time for the nop request to complete */
1280        timeout += HZ / 5;
1281
1282        return timeout + 1;
1283}
1284
1285static int live_timeslice_queue(void *arg)
1286{
1287        struct intel_gt *gt = arg;
1288        struct drm_i915_gem_object *obj;
1289        struct intel_engine_cs *engine;
1290        enum intel_engine_id id;
1291        struct i915_vma *vma;
1292        void *vaddr;
1293        int err = 0;
1294
1295        /*
1296         * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297         * timeslicing between them disabled, we *do* enable timeslicing
1298         * if the queue demands it. (Normally, we do not submit if
1299         * ELSP[1] is already occupied, so must rely on timeslicing to
1300         * eject ELSP[0] in favour of the queue.)
1301         */
1302        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303                return 0;
1304
1305        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306        if (IS_ERR(obj))
1307                return PTR_ERR(obj);
1308
1309        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1310        if (IS_ERR(vma)) {
1311                err = PTR_ERR(vma);
1312                goto err_obj;
1313        }
1314
1315        vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316        if (IS_ERR(vaddr)) {
1317                err = PTR_ERR(vaddr);
1318                goto err_obj;
1319        }
1320
1321        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322        if (err)
1323                goto err_map;
1324
1325        err = i915_vma_sync(vma);
1326        if (err)
1327                goto err_pin;
1328
1329        for_each_engine(engine, gt, id) {
1330                struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331                struct i915_request *rq, *nop;
1332
1333                if (!intel_engine_has_preemption(engine))
1334                        continue;
1335
1336                st_engine_heartbeat_disable(engine);
1337                memset(vaddr, 0, PAGE_SIZE);
1338
1339                /* ELSP[0]: semaphore wait */
1340                rq = semaphore_queue(engine, vma, 0);
1341                if (IS_ERR(rq)) {
1342                        err = PTR_ERR(rq);
1343                        goto err_heartbeat;
1344                }
1345                engine->schedule(rq, &attr);
1346                err = wait_for_submit(engine, rq, HZ / 2);
1347                if (err) {
1348                        pr_err("%s: Timed out trying to submit semaphores\n",
1349                               engine->name);
1350                        goto err_rq;
1351                }
1352
1353                /* ELSP[1]: nop request */
1354                nop = nop_request(engine);
1355                if (IS_ERR(nop)) {
1356                        err = PTR_ERR(nop);
1357                        goto err_rq;
1358                }
1359                err = wait_for_submit(engine, nop, HZ / 2);
1360                i915_request_put(nop);
1361                if (err) {
1362                        pr_err("%s: Timed out trying to submit nop\n",
1363                               engine->name);
1364                        goto err_rq;
1365                }
1366
1367                GEM_BUG_ON(i915_request_completed(rq));
1368                GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369
1370                /* Queue: semaphore signal, matching priority as semaphore */
1371                err = release_queue(engine, vma, 1, effective_prio(rq));
1372                if (err)
1373                        goto err_rq;
1374
1375                /* Wait until we ack the release_queue and start timeslicing */
1376                do {
1377                        cond_resched();
1378                        intel_engine_flush_submission(engine);
1379                } while (READ_ONCE(engine->execlists.pending[0]));
1380
1381                /* Timeslice every jiffy, so within 2 we should signal */
1382                if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383                        struct drm_printer p =
1384                                drm_info_printer(gt->i915->drm.dev);
1385
1386                        pr_err("%s: Failed to timeslice into queue\n",
1387                               engine->name);
1388                        intel_engine_dump(engine, &p,
1389                                          "%s\n", engine->name);
1390
1391                        memset(vaddr, 0xff, PAGE_SIZE);
1392                        err = -EIO;
1393                }
1394err_rq:
1395                i915_request_put(rq);
1396err_heartbeat:
1397                st_engine_heartbeat_enable(engine);
1398                if (err)
1399                        break;
1400        }
1401
1402err_pin:
1403        i915_vma_unpin(vma);
1404err_map:
1405        i915_gem_object_unpin_map(obj);
1406err_obj:
1407        i915_gem_object_put(obj);
1408        return err;
1409}
1410
1411static int live_timeslice_nopreempt(void *arg)
1412{
1413        struct intel_gt *gt = arg;
1414        struct intel_engine_cs *engine;
1415        enum intel_engine_id id;
1416        struct igt_spinner spin;
1417        int err = 0;
1418
1419        /*
1420         * We should not timeslice into a request that is marked with
1421         * I915_REQUEST_NOPREEMPT.
1422         */
1423        if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424                return 0;
1425
1426        if (igt_spinner_init(&spin, gt))
1427                return -ENOMEM;
1428
1429        for_each_engine(engine, gt, id) {
1430                struct intel_context *ce;
1431                struct i915_request *rq;
1432                unsigned long timeslice;
1433
1434                if (!intel_engine_has_preemption(engine))
1435                        continue;
1436
1437                ce = intel_context_create(engine);
1438                if (IS_ERR(ce)) {
1439                        err = PTR_ERR(ce);
1440                        break;
1441                }
1442
1443                st_engine_heartbeat_disable(engine);
1444                timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445
1446                /* Create an unpreemptible spinner */
1447
1448                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449                intel_context_put(ce);
1450                if (IS_ERR(rq)) {
1451                        err = PTR_ERR(rq);
1452                        goto out_heartbeat;
1453                }
1454
1455                i915_request_get(rq);
1456                i915_request_add(rq);
1457
1458                if (!igt_wait_for_spinner(&spin, rq)) {
1459                        i915_request_put(rq);
1460                        err = -ETIME;
1461                        goto out_spin;
1462                }
1463
1464                set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465                i915_request_put(rq);
1466
1467                /* Followed by a maximum priority barrier (heartbeat) */
1468
1469                ce = intel_context_create(engine);
1470                if (IS_ERR(ce)) {
1471                        err = PTR_ERR(ce);
1472                        goto out_spin;
1473                }
1474
1475                rq = intel_context_create_request(ce);
1476                intel_context_put(ce);
1477                if (IS_ERR(rq)) {
1478                        err = PTR_ERR(rq);
1479                        goto out_spin;
1480                }
1481
1482                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483                i915_request_get(rq);
1484                i915_request_add(rq);
1485
1486                /*
1487                 * Wait until the barrier is in ELSP, and we know timeslicing
1488                 * will have been activated.
1489                 */
1490                if (wait_for_submit(engine, rq, HZ / 2)) {
1491                        i915_request_put(rq);
1492                        err = -ETIME;
1493                        goto out_spin;
1494                }
1495
1496                /*
1497                 * Since the ELSP[0] request is unpreemptible, it should not
1498                 * allow the maximum priority barrier through. Wait long
1499                 * enough to see if it is timesliced in by mistake.
1500                 */
1501                if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502                        pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503                               engine->name);
1504                        err = -EINVAL;
1505                }
1506                i915_request_put(rq);
1507
1508out_spin:
1509                igt_spinner_end(&spin);
1510out_heartbeat:
1511                xchg(&engine->props.timeslice_duration_ms, timeslice);
1512                st_engine_heartbeat_enable(engine);
1513                if (err)
1514                        break;
1515
1516                if (igt_flush_test(gt->i915)) {
1517                        err = -EIO;
1518                        break;
1519                }
1520        }
1521
1522        igt_spinner_fini(&spin);
1523        return err;
1524}
1525
1526static int live_busywait_preempt(void *arg)
1527{
1528        struct intel_gt *gt = arg;
1529        struct i915_gem_context *ctx_hi, *ctx_lo;
1530        struct intel_engine_cs *engine;
1531        struct drm_i915_gem_object *obj;
1532        struct i915_vma *vma;
1533        enum intel_engine_id id;
1534        int err = -ENOMEM;
1535        u32 *map;
1536
1537        /*
1538         * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539         * preempt the busywaits used to synchronise between rings.
1540         */
1541
1542        ctx_hi = kernel_context(gt->i915);
1543        if (!ctx_hi)
1544                return -ENOMEM;
1545        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1546
1547        ctx_lo = kernel_context(gt->i915);
1548        if (!ctx_lo)
1549                goto err_ctx_hi;
1550        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1551
1552        obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1553        if (IS_ERR(obj)) {
1554                err = PTR_ERR(obj);
1555                goto err_ctx_lo;
1556        }
1557
1558        map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1559        if (IS_ERR(map)) {
1560                err = PTR_ERR(map);
1561                goto err_obj;
1562        }
1563
1564        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1565        if (IS_ERR(vma)) {
1566                err = PTR_ERR(vma);
1567                goto err_map;
1568        }
1569
1570        err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1571        if (err)
1572                goto err_map;
1573
1574        err = i915_vma_sync(vma);
1575        if (err)
1576                goto err_vma;
1577
1578        for_each_engine(engine, gt, id) {
1579                struct i915_request *lo, *hi;
1580                struct igt_live_test t;
1581                u32 *cs;
1582
1583                if (!intel_engine_has_preemption(engine))
1584                        continue;
1585
1586                if (!intel_engine_can_store_dword(engine))
1587                        continue;
1588
1589                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1590                        err = -EIO;
1591                        goto err_vma;
1592                }
1593
1594                /*
1595                 * We create two requests. The low priority request
1596                 * busywaits on a semaphore (inside the ringbuffer where
1597                 * is should be preemptible) and the high priority requests
1598                 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1599                 * allowing the first request to complete. If preemption
1600                 * fails, we hang instead.
1601                 */
1602
1603                lo = igt_request_alloc(ctx_lo, engine);
1604                if (IS_ERR(lo)) {
1605                        err = PTR_ERR(lo);
1606                        goto err_vma;
1607                }
1608
1609                cs = intel_ring_begin(lo, 8);
1610                if (IS_ERR(cs)) {
1611                        err = PTR_ERR(cs);
1612                        i915_request_add(lo);
1613                        goto err_vma;
1614                }
1615
1616                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1617                *cs++ = i915_ggtt_offset(vma);
1618                *cs++ = 0;
1619                *cs++ = 1;
1620
1621                /* XXX Do we need a flush + invalidate here? */
1622
1623                *cs++ = MI_SEMAPHORE_WAIT |
1624                        MI_SEMAPHORE_GLOBAL_GTT |
1625                        MI_SEMAPHORE_POLL |
1626                        MI_SEMAPHORE_SAD_EQ_SDD;
1627                *cs++ = 0;
1628                *cs++ = i915_ggtt_offset(vma);
1629                *cs++ = 0;
1630
1631                intel_ring_advance(lo, cs);
1632
1633                i915_request_get(lo);
1634                i915_request_add(lo);
1635
1636                if (wait_for(READ_ONCE(*map), 10)) {
1637                        i915_request_put(lo);
1638                        err = -ETIMEDOUT;
1639                        goto err_vma;
1640                }
1641
1642                /* Low priority request should be busywaiting now */
1643                if (i915_request_wait(lo, 0, 1) != -ETIME) {
1644                        i915_request_put(lo);
1645                        pr_err("%s: Busywaiting request did not!\n",
1646                               engine->name);
1647                        err = -EIO;
1648                        goto err_vma;
1649                }
1650
1651                hi = igt_request_alloc(ctx_hi, engine);
1652                if (IS_ERR(hi)) {
1653                        err = PTR_ERR(hi);
1654                        i915_request_put(lo);
1655                        goto err_vma;
1656                }
1657
1658                cs = intel_ring_begin(hi, 4);
1659                if (IS_ERR(cs)) {
1660                        err = PTR_ERR(cs);
1661                        i915_request_add(hi);
1662                        i915_request_put(lo);
1663                        goto err_vma;
1664                }
1665
1666                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1667                *cs++ = i915_ggtt_offset(vma);
1668                *cs++ = 0;
1669                *cs++ = 0;
1670
1671                intel_ring_advance(hi, cs);
1672                i915_request_add(hi);
1673
1674                if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1675                        struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1676
1677                        pr_err("%s: Failed to preempt semaphore busywait!\n",
1678                               engine->name);
1679
1680                        intel_engine_dump(engine, &p, "%s\n", engine->name);
1681                        GEM_TRACE_DUMP();
1682
1683                        i915_request_put(lo);
1684                        intel_gt_set_wedged(gt);
1685                        err = -EIO;
1686                        goto err_vma;
1687                }
1688                GEM_BUG_ON(READ_ONCE(*map));
1689                i915_request_put(lo);
1690
1691                if (igt_live_test_end(&t)) {
1692                        err = -EIO;
1693                        goto err_vma;
1694                }
1695        }
1696
1697        err = 0;
1698err_vma:
1699        i915_vma_unpin(vma);
1700err_map:
1701        i915_gem_object_unpin_map(obj);
1702err_obj:
1703        i915_gem_object_put(obj);
1704err_ctx_lo:
1705        kernel_context_close(ctx_lo);
1706err_ctx_hi:
1707        kernel_context_close(ctx_hi);
1708        return err;
1709}
1710
1711static struct i915_request *
1712spinner_create_request(struct igt_spinner *spin,
1713                       struct i915_gem_context *ctx,
1714                       struct intel_engine_cs *engine,
1715                       u32 arb)
1716{
1717        struct intel_context *ce;
1718        struct i915_request *rq;
1719
1720        ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1721        if (IS_ERR(ce))
1722                return ERR_CAST(ce);
1723
1724        rq = igt_spinner_create_request(spin, ce, arb);
1725        intel_context_put(ce);
1726        return rq;
1727}
1728
1729static int live_preempt(void *arg)
1730{
1731        struct intel_gt *gt = arg;
1732        struct i915_gem_context *ctx_hi, *ctx_lo;
1733        struct igt_spinner spin_hi, spin_lo;
1734        struct intel_engine_cs *engine;
1735        enum intel_engine_id id;
1736        int err = -ENOMEM;
1737
1738        if (igt_spinner_init(&spin_hi, gt))
1739                return -ENOMEM;
1740
1741        if (igt_spinner_init(&spin_lo, gt))
1742                goto err_spin_hi;
1743
1744        ctx_hi = kernel_context(gt->i915);
1745        if (!ctx_hi)
1746                goto err_spin_lo;
1747        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748
1749        ctx_lo = kernel_context(gt->i915);
1750        if (!ctx_lo)
1751                goto err_ctx_hi;
1752        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753
1754        for_each_engine(engine, gt, id) {
1755                struct igt_live_test t;
1756                struct i915_request *rq;
1757
1758                if (!intel_engine_has_preemption(engine))
1759                        continue;
1760
1761                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1762                        err = -EIO;
1763                        goto err_ctx_lo;
1764                }
1765
1766                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1767                                            MI_ARB_CHECK);
1768                if (IS_ERR(rq)) {
1769                        err = PTR_ERR(rq);
1770                        goto err_ctx_lo;
1771                }
1772
1773                i915_request_add(rq);
1774                if (!igt_wait_for_spinner(&spin_lo, rq)) {
1775                        GEM_TRACE("lo spinner failed to start\n");
1776                        GEM_TRACE_DUMP();
1777                        intel_gt_set_wedged(gt);
1778                        err = -EIO;
1779                        goto err_ctx_lo;
1780                }
1781
1782                rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1783                                            MI_ARB_CHECK);
1784                if (IS_ERR(rq)) {
1785                        igt_spinner_end(&spin_lo);
1786                        err = PTR_ERR(rq);
1787                        goto err_ctx_lo;
1788                }
1789
1790                i915_request_add(rq);
1791                if (!igt_wait_for_spinner(&spin_hi, rq)) {
1792                        GEM_TRACE("hi spinner failed to start\n");
1793                        GEM_TRACE_DUMP();
1794                        intel_gt_set_wedged(gt);
1795                        err = -EIO;
1796                        goto err_ctx_lo;
1797                }
1798
1799                igt_spinner_end(&spin_hi);
1800                igt_spinner_end(&spin_lo);
1801
1802                if (igt_live_test_end(&t)) {
1803                        err = -EIO;
1804                        goto err_ctx_lo;
1805                }
1806        }
1807
1808        err = 0;
1809err_ctx_lo:
1810        kernel_context_close(ctx_lo);
1811err_ctx_hi:
1812        kernel_context_close(ctx_hi);
1813err_spin_lo:
1814        igt_spinner_fini(&spin_lo);
1815err_spin_hi:
1816        igt_spinner_fini(&spin_hi);
1817        return err;
1818}
1819
1820static int live_late_preempt(void *arg)
1821{
1822        struct intel_gt *gt = arg;
1823        struct i915_gem_context *ctx_hi, *ctx_lo;
1824        struct igt_spinner spin_hi, spin_lo;
1825        struct intel_engine_cs *engine;
1826        struct i915_sched_attr attr = {};
1827        enum intel_engine_id id;
1828        int err = -ENOMEM;
1829
1830        if (igt_spinner_init(&spin_hi, gt))
1831                return -ENOMEM;
1832
1833        if (igt_spinner_init(&spin_lo, gt))
1834                goto err_spin_hi;
1835
1836        ctx_hi = kernel_context(gt->i915);
1837        if (!ctx_hi)
1838                goto err_spin_lo;
1839
1840        ctx_lo = kernel_context(gt->i915);
1841        if (!ctx_lo)
1842                goto err_ctx_hi;
1843
1844        /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1845        ctx_lo->sched.priority = 1;
1846
1847        for_each_engine(engine, gt, id) {
1848                struct igt_live_test t;
1849                struct i915_request *rq;
1850
1851                if (!intel_engine_has_preemption(engine))
1852                        continue;
1853
1854                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1855                        err = -EIO;
1856                        goto err_ctx_lo;
1857                }
1858
1859                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1860                                            MI_ARB_CHECK);
1861                if (IS_ERR(rq)) {
1862                        err = PTR_ERR(rq);
1863                        goto err_ctx_lo;
1864                }
1865
1866                i915_request_add(rq);
1867                if (!igt_wait_for_spinner(&spin_lo, rq)) {
1868                        pr_err("First context failed to start\n");
1869                        goto err_wedged;
1870                }
1871
1872                rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1873                                            MI_NOOP);
1874                if (IS_ERR(rq)) {
1875                        igt_spinner_end(&spin_lo);
1876                        err = PTR_ERR(rq);
1877                        goto err_ctx_lo;
1878                }
1879
1880                i915_request_add(rq);
1881                if (igt_wait_for_spinner(&spin_hi, rq)) {
1882                        pr_err("Second context overtook first?\n");
1883                        goto err_wedged;
1884                }
1885
1886                attr.priority = I915_PRIORITY_MAX;
1887                engine->schedule(rq, &attr);
1888
1889                if (!igt_wait_for_spinner(&spin_hi, rq)) {
1890                        pr_err("High priority context failed to preempt the low priority context\n");
1891                        GEM_TRACE_DUMP();
1892                        goto err_wedged;
1893                }
1894
1895                igt_spinner_end(&spin_hi);
1896                igt_spinner_end(&spin_lo);
1897
1898                if (igt_live_test_end(&t)) {
1899                        err = -EIO;
1900                        goto err_ctx_lo;
1901                }
1902        }
1903
1904        err = 0;
1905err_ctx_lo:
1906        kernel_context_close(ctx_lo);
1907err_ctx_hi:
1908        kernel_context_close(ctx_hi);
1909err_spin_lo:
1910        igt_spinner_fini(&spin_lo);
1911err_spin_hi:
1912        igt_spinner_fini(&spin_hi);
1913        return err;
1914
1915err_wedged:
1916        igt_spinner_end(&spin_hi);
1917        igt_spinner_end(&spin_lo);
1918        intel_gt_set_wedged(gt);
1919        err = -EIO;
1920        goto err_ctx_lo;
1921}
1922
1923struct preempt_client {
1924        struct igt_spinner spin;
1925        struct i915_gem_context *ctx;
1926};
1927
1928static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1929{
1930        c->ctx = kernel_context(gt->i915);
1931        if (!c->ctx)
1932                return -ENOMEM;
1933
1934        if (igt_spinner_init(&c->spin, gt))
1935                goto err_ctx;
1936
1937        return 0;
1938
1939err_ctx:
1940        kernel_context_close(c->ctx);
1941        return -ENOMEM;
1942}
1943
1944static void preempt_client_fini(struct preempt_client *c)
1945{
1946        igt_spinner_fini(&c->spin);
1947        kernel_context_close(c->ctx);
1948}
1949
1950static int live_nopreempt(void *arg)
1951{
1952        struct intel_gt *gt = arg;
1953        struct intel_engine_cs *engine;
1954        struct preempt_client a, b;
1955        enum intel_engine_id id;
1956        int err = -ENOMEM;
1957
1958        /*
1959         * Verify that we can disable preemption for an individual request
1960         * that may be being observed and not want to be interrupted.
1961         */
1962
1963        if (preempt_client_init(gt, &a))
1964                return -ENOMEM;
1965        if (preempt_client_init(gt, &b))
1966                goto err_client_a;
1967        b.ctx->sched.priority = I915_PRIORITY_MAX;
1968
1969        for_each_engine(engine, gt, id) {
1970                struct i915_request *rq_a, *rq_b;
1971
1972                if (!intel_engine_has_preemption(engine))
1973                        continue;
1974
1975                engine->execlists.preempt_hang.count = 0;
1976
1977                rq_a = spinner_create_request(&a.spin,
1978                                              a.ctx, engine,
1979                                              MI_ARB_CHECK);
1980                if (IS_ERR(rq_a)) {
1981                        err = PTR_ERR(rq_a);
1982                        goto err_client_b;
1983                }
1984
1985                /* Low priority client, but unpreemptable! */
1986                __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1987
1988                i915_request_add(rq_a);
1989                if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1990                        pr_err("First client failed to start\n");
1991                        goto err_wedged;
1992                }
1993
1994                rq_b = spinner_create_request(&b.spin,
1995                                              b.ctx, engine,
1996                                              MI_ARB_CHECK);
1997                if (IS_ERR(rq_b)) {
1998                        err = PTR_ERR(rq_b);
1999                        goto err_client_b;
2000                }

2001
2002                i915_request_add(rq_b);
2003
2004                /* B is much more important than A! (But A is unpreemptable.) */
2005                GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2006
2007                /* Wait long enough for preemption and timeslicing */
2008                if (igt_wait_for_spinner(&b.spin, rq_b)) {
2009                        pr_err("Second client started too early!\n");
2010                        goto err_wedged;
2011                }
2012
2013                igt_spinner_end(&a.spin);
2014
2015                if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2016                        pr_err("Second client failed to start\n");
2017                        goto err_wedged;
2018                }
2019
2020                igt_spinner_end(&b.spin);
2021
2022                if (engine->execlists.preempt_hang.count) {
2023                        pr_err("Preemption recorded x%d; should have been suppressed!\n",
2024                               engine->execlists.preempt_hang.count);
2025                        err = -EINVAL;
2026                        goto err_wedged;
2027                }
2028
2029                if (igt_flush_test(gt->i915))
2030                        goto err_wedged;
2031        }
2032
2033        err = 0;
2034err_client_b:
2035        preempt_client_fini(&b);
2036err_client_a:
2037        preempt_client_fini(&a);
2038        return err;
2039
2040err_wedged:
2041        igt_spinner_end(&b.spin);
2042        igt_spinner_end(&a.spin);
2043        intel_gt_set_wedged(gt);
2044        err = -EIO;
2045        goto err_client_b;
2046}
2047
2048struct live_preempt_cancel {
2049        struct intel_engine_cs *engine;
2050        struct preempt_client a, b;
2051};
2052
2053static int __cancel_active0(struct live_preempt_cancel *arg)
2054{
2055        struct i915_request *rq;
2056        struct igt_live_test t;
2057        int err;
2058
2059        /* Preempt cancel of ELSP0 */
2060        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061        if (igt_live_test_begin(&t, arg->engine->i915,
2062                                __func__, arg->engine->name))
2063                return -EIO;
2064
2065        rq = spinner_create_request(&arg->a.spin,
2066                                    arg->a.ctx, arg->engine,
2067                                    MI_ARB_CHECK);
2068        if (IS_ERR(rq))
2069                return PTR_ERR(rq);
2070
2071        clear_bit(CONTEXT_BANNED, &rq->context->flags);
2072        i915_request_get(rq);
2073        i915_request_add(rq);
2074        if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2075                err = -EIO;
2076                goto out;
2077        }
2078
2079        intel_context_set_banned(rq->context);
2080        err = intel_engine_pulse(arg->engine);
2081        if (err)
2082                goto out;
2083
2084        err = wait_for_reset(arg->engine, rq, HZ / 2);
2085        if (err) {
2086                pr_err("Cancelled inflight0 request did not reset\n");
2087                goto out;
2088        }
2089
2090out:
2091        i915_request_put(rq);
2092        if (igt_live_test_end(&t))
2093                err = -EIO;
2094        return err;
2095}
2096
2097static int __cancel_active1(struct live_preempt_cancel *arg)
2098{
2099        struct i915_request *rq[2] = {};
2100        struct igt_live_test t;
2101        int err;
2102
2103        /* Preempt cancel of ELSP1 */
2104        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2105        if (igt_live_test_begin(&t, arg->engine->i915,
2106                                __func__, arg->engine->name))
2107                return -EIO;
2108
2109        rq[0] = spinner_create_request(&arg->a.spin,
2110                                       arg->a.ctx, arg->engine,
2111                                       MI_NOOP); /* no preemption */
2112        if (IS_ERR(rq[0]))
2113                return PTR_ERR(rq[0]);
2114
2115        clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2116        i915_request_get(rq[0]);
2117        i915_request_add(rq[0]);
2118        if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2119                err = -EIO;
2120                goto out;
2121        }
2122
2123        rq[1] = spinner_create_request(&arg->b.spin,
2124                                       arg->b.ctx, arg->engine,
2125                                       MI_ARB_CHECK);
2126        if (IS_ERR(rq[1])) {
2127                err = PTR_ERR(rq[1]);
2128                goto out;
2129        }
2130
2131        clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2132        i915_request_get(rq[1]);
2133        err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2134        i915_request_add(rq[1]);
2135        if (err)
2136                goto out;
2137
2138        intel_context_set_banned(rq[1]->context);
2139        err = intel_engine_pulse(arg->engine);
2140        if (err)
2141                goto out;
2142
2143        igt_spinner_end(&arg->a.spin);
2144        err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2145        if (err)
2146                goto out;
2147
2148        if (rq[0]->fence.error != 0) {
2149                pr_err("Normal inflight0 request did not complete\n");
2150                err = -EINVAL;
2151                goto out;
2152        }
2153
2154        if (rq[1]->fence.error != -EIO) {
2155                pr_err("Cancelled inflight1 request did not report -EIO\n");
2156                err = -EINVAL;
2157                goto out;
2158        }
2159
2160out:
2161        i915_request_put(rq[1]);
2162        i915_request_put(rq[0]);
2163        if (igt_live_test_end(&t))
2164                err = -EIO;
2165        return err;
2166}
2167
2168static int __cancel_queued(struct live_preempt_cancel *arg)
2169{
2170        struct i915_request *rq[3] = {};
2171        struct igt_live_test t;
2172        int err;
2173
2174        /* Full ELSP and one in the wings */
2175        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2176        if (igt_live_test_begin(&t, arg->engine->i915,
2177                                __func__, arg->engine->name))
2178                return -EIO;
2179
2180        rq[0] = spinner_create_request(&arg->a.spin,
2181                                       arg->a.ctx, arg->engine,
2182                                       MI_ARB_CHECK);
2183        if (IS_ERR(rq[0]))
2184                return PTR_ERR(rq[0]);
2185
2186        clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2187        i915_request_get(rq[0]);
2188        i915_request_add(rq[0]);
2189        if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2190                err = -EIO;
2191                goto out;
2192        }
2193
2194        rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2195        if (IS_ERR(rq[1])) {
2196                err = PTR_ERR(rq[1]);
2197                goto out;
2198        }
2199
2200        clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2201        i915_request_get(rq[1]);
2202        err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2203        i915_request_add(rq[1]);
2204        if (err)
2205                goto out;
2206
2207        rq[2] = spinner_create_request(&arg->b.spin,
2208                                       arg->a.ctx, arg->engine,
2209                                       MI_ARB_CHECK);
2210        if (IS_ERR(rq[2])) {
2211                err = PTR_ERR(rq[2]);
2212                goto out;
2213        }
2214
2215        i915_request_get(rq[2]);
2216        err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2217        i915_request_add(rq[2]);
2218        if (err)
2219                goto out;
2220
2221        intel_context_set_banned(rq[2]->context);
2222        err = intel_engine_pulse(arg->engine);
2223        if (err)
2224                goto out;
2225
2226        err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2227        if (err)
2228                goto out;
2229
2230        if (rq[0]->fence.error != -EIO) {
2231                pr_err("Cancelled inflight0 request did not report -EIO\n");
2232                err = -EINVAL;
2233                goto out;
2234        }
2235
2236        if (rq[1]->fence.error != 0) {
2237                pr_err("Normal inflight1 request did not complete\n");
2238                err = -EINVAL;
2239                goto out;
2240        }
2241
2242        if (rq[2]->fence.error != -EIO) {
2243                pr_err("Cancelled queued request did not report -EIO\n");
2244                err = -EINVAL;
2245                goto out;
2246        }
2247
2248out:
2249        i915_request_put(rq[2]);
2250        i915_request_put(rq[1]);
2251        i915_request_put(rq[0]);
2252        if (igt_live_test_end(&t))
2253                err = -EIO;
2254        return err;
2255}
2256
2257static int __cancel_hostile(struct live_preempt_cancel *arg)
2258{
2259        struct i915_request *rq;
2260        int err;
2261
2262        /* Preempt cancel non-preemptible spinner in ELSP0 */
2263        if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2264                return 0;
2265
2266        if (!intel_has_reset_engine(arg->engine->gt))
2267                return 0;
2268
2269        GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2270        rq = spinner_create_request(&arg->a.spin,
2271                                    arg->a.ctx, arg->engine,
2272                                    MI_NOOP); /* preemption disabled */
2273        if (IS_ERR(rq))
2274                return PTR_ERR(rq);
2275
2276        clear_bit(CONTEXT_BANNED, &rq->context->flags);
2277        i915_request_get(rq);
2278        i915_request_add(rq);
2279        if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2280                err = -EIO;
2281                goto out;
2282        }
2283
2284        intel_context_set_banned(rq->context);
2285        err = intel_engine_pulse(arg->engine); /* force reset */
2286        if (err)
2287                goto out;
2288
2289        err = wait_for_reset(arg->engine, rq, HZ / 2);
2290        if (err) {
2291                pr_err("Cancelled inflight0 request did not reset\n");
2292                goto out;
2293        }
2294
2295out:
2296        i915_request_put(rq);
2297        if (igt_flush_test(arg->engine->i915))
2298                err = -EIO;
2299        return err;
2300}
2301
2302static void force_reset_timeout(struct intel_engine_cs *engine)
2303{
2304        engine->reset_timeout.probability = 999;
2305        atomic_set(&engine->reset_timeout.times, -1);
2306}
2307
2308static void cancel_reset_timeout(struct intel_engine_cs *engine)
2309{
2310        memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2311}
2312
2313static int __cancel_fail(struct live_preempt_cancel *arg)
2314{
2315        struct intel_engine_cs *engine = arg->engine;
2316        struct i915_request *rq;
2317        int err;
2318
2319        if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2320                return 0;
2321
2322        if (!intel_has_reset_engine(engine->gt))
2323                return 0;
2324
2325        GEM_TRACE("%s(%s)\n", __func__, engine->name);
2326        rq = spinner_create_request(&arg->a.spin,
2327                                    arg->a.ctx, engine,
2328                                    MI_NOOP); /* preemption disabled */
2329        if (IS_ERR(rq))
2330                return PTR_ERR(rq);
2331
2332        clear_bit(CONTEXT_BANNED, &rq->context->flags);
2333        i915_request_get(rq);
2334        i915_request_add(rq);
2335        if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2336                err = -EIO;
2337                goto out;
2338        }
2339
2340        intel_context_set_banned(rq->context);
2341
2342        err = intel_engine_pulse(engine);
2343        if (err)
2344                goto out;
2345
2346        force_reset_timeout(engine);
2347
2348        /* force preempt reset [failure] */
2349        while (!engine->execlists.pending[0])
2350                intel_engine_flush_submission(engine);
2351        del_timer_sync(&engine->execlists.preempt);
2352        intel_engine_flush_submission(engine);
2353
2354        cancel_reset_timeout(engine);
2355
2356        /* after failure, require heartbeats to reset device */
2357        intel_engine_set_heartbeat(engine, 1);
2358        err = wait_for_reset(engine, rq, HZ / 2);
2359        intel_engine_set_heartbeat(engine,
2360                                   engine->defaults.heartbeat_interval_ms);
2361        if (err) {
2362                pr_err("Cancelled inflight0 request did not reset\n");
2363                goto out;
2364        }
2365
2366out:
2367        i915_request_put(rq);
2368        if (igt_flush_test(engine->i915))
2369                err = -EIO;
2370        return err;
2371}
2372
2373static int live_preempt_cancel(void *arg)
2374{
2375        struct intel_gt *gt = arg;
2376        struct live_preempt_cancel data;
2377        enum intel_engine_id id;
2378        int err = -ENOMEM;
2379
2380        /*
2381         * To cancel an inflight context, we need to first remove it from the
2382         * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2383         */
2384
2385        if (preempt_client_init(gt, &data.a))
2386                return -ENOMEM;
2387        if (preempt_client_init(gt, &data.b))
2388                goto err_client_a;
2389
2390        for_each_engine(data.engine, gt, id) {
2391                if (!intel_engine_has_preemption(data.engine))
2392                        continue;
2393
2394                err = __cancel_active0(&data);
2395                if (err)
2396                        goto err_wedged;
2397
2398                err = __cancel_active1(&data);
2399                if (err)
2400                        goto err_wedged;
2401
2402                err = __cancel_queued(&data);
2403                if (err)
2404                        goto err_wedged;
2405
2406                err = __cancel_hostile(&data);
2407                if (err)
2408                        goto err_wedged;
2409
2410                err = __cancel_fail(&data);
2411                if (err)
2412                        goto err_wedged;
2413        }
2414
2415        err = 0;
2416err_client_b:
2417        preempt_client_fini(&data.b);
2418err_client_a:
2419        preempt_client_fini(&data.a);
2420        return err;
2421
2422err_wedged:
2423        GEM_TRACE_DUMP();
2424        igt_spinner_end(&data.b.spin);
2425        igt_spinner_end(&data.a.spin);
2426        intel_gt_set_wedged(gt);
2427        goto err_client_b;
2428}
2429
2430static int live_suppress_self_preempt(void *arg)
2431{
2432        struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2433        struct intel_gt *gt = arg;
2434        struct intel_engine_cs *engine;
2435        struct preempt_client a, b;
2436        enum intel_engine_id id;
2437        int err = -ENOMEM;
2438
2439        /*
2440         * Verify that if a preemption request does not cause a change in
2441         * the current execution order, the preempt-to-idle injection is
2442         * skipped and that we do not accidentally apply it after the CS
2443         * completion event.
2444         */
2445
2446        if (intel_uc_uses_guc_submission(&gt->uc))
2447                return 0; /* presume black blox */
2448
2449        if (intel_vgpu_active(gt->i915))
2450                return 0; /* GVT forces single port & request submission */
2451
2452        if (preempt_client_init(gt, &a))
2453                return -ENOMEM;
2454        if (preempt_client_init(gt, &b))
2455                goto err_client_a;
2456
2457        for_each_engine(engine, gt, id) {
2458                struct i915_request *rq_a, *rq_b;
2459                int depth;
2460
2461                if (!intel_engine_has_preemption(engine))
2462                        continue;
2463
2464                if (igt_flush_test(gt->i915))
2465                        goto err_wedged;
2466
2467                st_engine_heartbeat_disable(engine);
2468                engine->execlists.preempt_hang.count = 0;
2469
2470                rq_a = spinner_create_request(&a.spin,
2471                                              a.ctx, engine,
2472                                              MI_NOOP);
2473                if (IS_ERR(rq_a)) {
2474                        err = PTR_ERR(rq_a);
2475                        st_engine_heartbeat_enable(engine);
2476                        goto err_client_b;
2477                }
2478
2479                i915_request_add(rq_a);
2480                if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2481                        pr_err("First client failed to start\n");
2482                        st_engine_heartbeat_enable(engine);
2483                        goto err_wedged;
2484                }
2485
2486                /* Keep postponing the timer to avoid premature slicing */
2487                mod_timer(&engine->execlists.timer, jiffies + HZ);
2488                for (depth = 0; depth < 8; depth++) {
2489                        rq_b = spinner_create_request(&b.spin,
2490                                                      b.ctx, engine,
2491                                                      MI_NOOP);
2492                        if (IS_ERR(rq_b)) {
2493                                err = PTR_ERR(rq_b);
2494                                st_engine_heartbeat_enable(engine);
2495                                goto err_client_b;
2496                        }
2497                        i915_request_add(rq_b);
2498
2499                        GEM_BUG_ON(i915_request_completed(rq_a));
2500                        engine->schedule(rq_a, &attr);
2501                        igt_spinner_end(&a.spin);
2502
2503                        if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2504                                pr_err("Second client failed to start\n");
2505                                st_engine_heartbeat_enable(engine);
2506                                goto err_wedged;
2507                        }
2508
2509                        swap(a, b);
2510                        rq_a = rq_b;
2511                }
2512                igt_spinner_end(&a.spin);
2513
2514                if (engine->execlists.preempt_hang.count) {
2515                        pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2516                               engine->name,
2517                               engine->execlists.preempt_hang.count,
2518                               depth);
2519                        st_engine_heartbeat_enable(engine);
2520                        err = -EINVAL;
2521                        goto err_client_b;
2522                }
2523
2524                st_engine_heartbeat_enable(engine);
2525                if (igt_flush_test(gt->i915))
2526                        goto err_wedged;
2527        }
2528
2529        err = 0;
2530err_client_b:
2531        preempt_client_fini(&b);
2532err_client_a:
2533        preempt_client_fini(&a);
2534        return err;
2535
2536err_wedged:
2537        igt_spinner_end(&b.spin);
2538        igt_spinner_end(&a.spin);
2539        intel_gt_set_wedged(gt);
2540        err = -EIO;
2541        goto err_client_b;
2542}
2543
2544static int live_chain_preempt(void *arg)
2545{
2546        struct intel_gt *gt = arg;
2547        struct intel_engine_cs *engine;
2548        struct preempt_client hi, lo;
2549        enum intel_engine_id id;
2550        int err = -ENOMEM;
2551
2552        /*
2553         * Build a chain AB...BA between two contexts (A, B) and request
2554         * preemption of the last request. It should then complete before
2555         * the previously submitted spinner in B.
2556         */
2557
2558        if (preempt_client_init(gt, &hi))
2559                return -ENOMEM;
2560
2561        if (preempt_client_init(gt, &lo))
2562                goto err_client_hi;
2563
2564        for_each_engine(engine, gt, id) {
2565                struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2566                struct igt_live_test t;
2567                struct i915_request *rq;
2568                int ring_size, count, i;
2569
2570                if (!intel_engine_has_preemption(engine))
2571                        continue;
2572
2573                rq = spinner_create_request(&lo.spin,
2574                                            lo.ctx, engine,
2575                                            MI_ARB_CHECK);
2576                if (IS_ERR(rq))
2577                        goto err_wedged;
2578
2579                i915_request_get(rq);
2580                i915_request_add(rq);
2581
2582                ring_size = rq->wa_tail - rq->head;
2583                if (ring_size < 0)
2584                        ring_size += rq->ring->size;
2585                ring_size = rq->ring->size / ring_size;
2586                pr_debug("%s(%s): Using maximum of %d requests\n",
2587                         __func__, engine->name, ring_size);
2588
2589                igt_spinner_end(&lo.spin);
2590                if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2591                        pr_err("Timed out waiting to flush %s\n", engine->name);
2592                        i915_request_put(rq);
2593                        goto err_wedged;
2594                }
2595                i915_request_put(rq);
2596
2597                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2598                        err = -EIO;
2599                        goto err_wedged;
2600                }
2601
2602                for_each_prime_number_from(count, 1, ring_size) {
2603                        rq = spinner_create_request(&hi.spin,
2604                                                    hi.ctx, engine,
2605                                                    MI_ARB_CHECK);
2606                        if (IS_ERR(rq))
2607                                goto err_wedged;
2608                        i915_request_add(rq);
2609                        if (!igt_wait_for_spinner(&hi.spin, rq))
2610                                goto err_wedged;
2611
2612                        rq = spinner_create_request(&lo.spin,
2613                                                    lo.ctx, engine,
2614                                                    MI_ARB_CHECK);
2615                        if (IS_ERR(rq))
2616                                goto err_wedged;
2617                        i915_request_add(rq);
2618
2619                        for (i = 0; i < count; i++) {
2620                                rq = igt_request_alloc(lo.ctx, engine);
2621                                if (IS_ERR(rq))
2622                                        goto err_wedged;
2623                                i915_request_add(rq);
2624                        }
2625
2626                        rq = igt_request_alloc(hi.ctx, engine);
2627                        if (IS_ERR(rq))
2628                                goto err_wedged;
2629
2630                        i915_request_get(rq);
2631                        i915_request_add(rq);
2632                        engine->schedule(rq, &attr);
2633
2634                        igt_spinner_end(&hi.spin);
2635                        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2636                                struct drm_printer p =
2637                                        drm_info_printer(gt->i915->drm.dev);
2638
2639                                pr_err("Failed to preempt over chain of %d\n",
2640                                       count);
2641                                intel_engine_dump(engine, &p,
2642                                                  "%s\n", engine->name);
2643                                i915_request_put(rq);
2644                                goto err_wedged;
2645                        }
2646                        igt_spinner_end(&lo.spin);
2647                        i915_request_put(rq);
2648
2649                        rq = igt_request_alloc(lo.ctx, engine);
2650                        if (IS_ERR(rq))
2651                                goto err_wedged;
2652
2653                        i915_request_get(rq);
2654                        i915_request_add(rq);
2655
2656                        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2657                                struct drm_printer p =
2658                                        drm_info_printer(gt->i915->drm.dev);
2659
2660                                pr_err("Failed to flush low priority chain of %d requests\n",
2661                                       count);
2662                                intel_engine_dump(engine, &p,
2663                                                  "%s\n", engine->name);
2664
2665                                i915_request_put(rq);
2666                                goto err_wedged;
2667                        }
2668                        i915_request_put(rq);
2669                }
2670
2671                if (igt_live_test_end(&t)) {
2672                        err = -EIO;
2673                        goto err_wedged;
2674                }
2675        }
2676
2677        err = 0;
2678err_client_lo:
2679        preempt_client_fini(&lo);
2680err_client_hi:
2681        preempt_client_fini(&hi);
2682        return err;
2683
2684err_wedged:
2685        igt_spinner_end(&hi.spin);
2686        igt_spinner_end(&lo.spin);
2687        intel_gt_set_wedged(gt);
2688        err = -EIO;
2689        goto err_client_lo;
2690}
2691
2692static int create_gang(struct intel_engine_cs *engine,
2693                       struct i915_request **prev)
2694{
2695        struct drm_i915_gem_object *obj;
2696        struct intel_context *ce;
2697        struct i915_request *rq;
2698        struct i915_vma *vma;
2699        u32 *cs;
2700        int err;
2701
2702        ce = intel_context_create(engine);
2703        if (IS_ERR(ce))
2704                return PTR_ERR(ce);
2705
2706        obj = i915_gem_object_create_internal(engine->i915, 4096);
2707        if (IS_ERR(obj)) {
2708                err = PTR_ERR(obj);
2709                goto err_ce;
2710        }
2711
2712        vma = i915_vma_instance(obj, ce->vm, NULL);
2713        if (IS_ERR(vma)) {
2714                err = PTR_ERR(vma);
2715                goto err_obj;
2716        }
2717
2718        err = i915_vma_pin(vma, 0, 0, PIN_USER);
2719        if (err)
2720                goto err_obj;
2721
2722        cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2723        if (IS_ERR(cs)) {
2724                err = PTR_ERR(cs);
2725                goto err_obj;
2726        }
2727
2728        /* Semaphore target: spin until zero */
2729        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2730
2731        *cs++ = MI_SEMAPHORE_WAIT |
2732                MI_SEMAPHORE_POLL |
2733                MI_SEMAPHORE_SAD_EQ_SDD;
2734        *cs++ = 0;
2735        *cs++ = lower_32_bits(vma->node.start);
2736        *cs++ = upper_32_bits(vma->node.start);
2737
2738        if (*prev) {
2739                u64 offset = (*prev)->batch->node.start;
2740
2741                /* Terminate the spinner in the next lower priority batch. */
2742                *cs++ = MI_STORE_DWORD_IMM_GEN4;
2743                *cs++ = lower_32_bits(offset);
2744                *cs++ = upper_32_bits(offset);
2745                *cs++ = 0;
2746        }
2747
2748        *cs++ = MI_BATCH_BUFFER_END;
2749        i915_gem_object_flush_map(obj);
2750        i915_gem_object_unpin_map(obj);
2751
2752        rq = intel_context_create_request(ce);
2753        if (IS_ERR(rq)) {
2754                err = PTR_ERR(rq);
2755                goto err_obj;
2756        }
2757
2758        rq->batch = i915_vma_get(vma);
2759        i915_request_get(rq);
2760
2761        i915_vma_lock(vma);
2762        err = i915_request_await_object(rq, vma->obj, false);
2763        if (!err)
2764                err = i915_vma_move_to_active(vma, rq, 0);
2765        if (!err)
2766                err = rq->engine->emit_bb_start(rq,
2767                                                vma->node.start,
2768                                                PAGE_SIZE, 0);
2769        i915_vma_unlock(vma);
2770        i915_request_add(rq);
2771        if (err)
2772                goto err_rq;
2773
2774        i915_gem_object_put(obj);
2775        intel_context_put(ce);
2776
2777        rq->mock.link.next = &(*prev)->mock.link;
2778        *prev = rq;
2779        return 0;
2780
2781err_rq:
2782        i915_vma_put(rq->batch);
2783        i915_request_put(rq);
2784err_obj:
2785        i915_gem_object_put(obj);
2786err_ce:
2787        intel_context_put(ce);
2788        return err;
2789}
2790
2791static int __live_preempt_ring(struct intel_engine_cs *engine,
2792                               struct igt_spinner *spin,
2793                               int queue_sz, int ring_sz)
2794{
2795        struct intel_context *ce[2] = {};
2796        struct i915_request *rq;
2797        struct igt_live_test t;
2798        int err = 0;
2799        int n;
2800
2801        if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2802                return -EIO;
2803
2804        for (n = 0; n < ARRAY_SIZE(ce); n++) {
2805                struct intel_context *tmp;
2806
2807                tmp = intel_context_create(engine);
2808                if (IS_ERR(tmp)) {
2809                        err = PTR_ERR(tmp);
2810                        goto err_ce;
2811                }
2812
2813                tmp->ring = __intel_context_ring_size(ring_sz);
2814
2815                err = intel_context_pin(tmp);
2816                if (err) {
2817                        intel_context_put(tmp);
2818                        goto err_ce;
2819                }
2820
2821                memset32(tmp->ring->vaddr,
2822                         0xdeadbeef, /* trigger a hang if executed */
2823                         tmp->ring->vma->size / sizeof(u32));
2824
2825                ce[n] = tmp;
2826        }
2827
2828        rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829        if (IS_ERR(rq)) {
2830                err = PTR_ERR(rq);
2831                goto err_ce;
2832        }
2833
2834        i915_request_get(rq);
2835        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2836        i915_request_add(rq);
2837
2838        if (!igt_wait_for_spinner(spin, rq)) {
2839                intel_gt_set_wedged(engine->gt);
2840                i915_request_put(rq);
2841                err = -ETIME;
2842                goto err_ce;
2843        }
2844
2845        /* Fill the ring, until we will cause a wrap */
2846        n = 0;
2847        while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2848                struct i915_request *tmp;
2849
2850                tmp = intel_context_create_request(ce[0]);
2851                if (IS_ERR(tmp)) {
2852                        err = PTR_ERR(tmp);
2853                        i915_request_put(rq);
2854                        goto err_ce;
2855                }
2856
2857                i915_request_add(tmp);
2858                intel_engine_flush_submission(engine);
2859                n++;
2860        }
2861        intel_engine_flush_submission(engine);
2862        pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2863                 engine->name, queue_sz, n,
2864                 ce[0]->ring->size,
2865                 ce[0]->ring->tail,
2866                 ce[0]->ring->emit,
2867                 rq->tail);
2868        i915_request_put(rq);
2869
2870        /* Create a second request to preempt the first ring */
2871        rq = intel_context_create_request(ce[1]);
2872        if (IS_ERR(rq)) {
2873                err = PTR_ERR(rq);
2874                goto err_ce;
2875        }
2876
2877        rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2878        i915_request_get(rq);
2879        i915_request_add(rq);
2880
2881        err = wait_for_submit(engine, rq, HZ / 2);
2882        i915_request_put(rq);
2883        if (err) {
2884                pr_err("%s: preemption request was not submitted\n",
2885                       engine->name);
2886                err = -ETIME;
2887        }
2888
2889        pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2890                 engine->name,
2891                 ce[0]->ring->tail, ce[0]->ring->emit,
2892                 ce[1]->ring->tail, ce[1]->ring->emit);
2893
2894err_ce:
2895        intel_engine_flush_submission(engine);
2896        igt_spinner_end(spin);
2897        for (n = 0; n < ARRAY_SIZE(ce); n++) {
2898                if (IS_ERR_OR_NULL(ce[n]))
2899                        break;
2900
2901                intel_context_unpin(ce[n]);
2902                intel_context_put(ce[n]);
2903        }
2904        if (igt_live_test_end(&t))
2905                err = -EIO;
2906        return err;
2907}
2908
2909static int live_preempt_ring(void *arg)
2910{
2911        struct intel_gt *gt = arg;
2912        struct intel_engine_cs *engine;
2913        struct igt_spinner spin;
2914        enum intel_engine_id id;
2915        int err = 0;
2916
2917        /*
2918         * Check that we rollback large chunks of a ring in order to do a
2919         * preemption event. Similar to live_unlite_ring, but looking at
2920         * ring size rather than the impact of intel_ring_direction().
2921         */
2922
2923        if (igt_spinner_init(&spin, gt))
2924                return -ENOMEM;
2925
2926        for_each_engine(engine, gt, id) {
2927                int n;
2928
2929                if (!intel_engine_has_preemption(engine))
2930                        continue;
2931
2932                if (!intel_engine_can_store_dword(engine))
2933                        continue;
2934
2935                st_engine_heartbeat_disable(engine);
2936
2937                for (n = 0; n <= 3; n++) {
2938                        err = __live_preempt_ring(engine, &spin,
2939                                                  n * SZ_4K / 4, SZ_4K);
2940                        if (err)
2941                                break;
2942                }
2943
2944                st_engine_heartbeat_enable(engine);
2945                if (err)
2946                        break;
2947        }
2948
2949        igt_spinner_fini(&spin);
2950        return err;
2951}
2952
2953static int live_preempt_gang(void *arg)
2954{
2955        struct intel_gt *gt = arg;
2956        struct intel_engine_cs *engine;
2957        enum intel_engine_id id;
2958
2959        /*
2960         * Build as long a chain of preempters as we can, with each
2961         * request higher priority than the last. Once we are ready, we release
2962         * the last batch which then precolates down the chain, each releasing
2963         * the next oldest in turn. The intent is to simply push as hard as we
2964         * can with the number of preemptions, trying to exceed narrow HW
2965         * limits. At a minimum, we insist that we can sort all the user
2966         * high priority levels into execution order.
2967         */
2968
2969        for_each_engine(engine, gt, id) {
2970                struct i915_request *rq = NULL;
2971                struct igt_live_test t;
2972                IGT_TIMEOUT(end_time);
2973                int prio = 0;
2974                int err = 0;
2975                u32 *cs;
2976
2977                if (!intel_engine_has_preemption(engine))
2978                        continue;
2979
2980                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2981                        return -EIO;
2982
2983                do {
2984                        struct i915_sched_attr attr = { .priority = prio++ };
2985
2986                        err = create_gang(engine, &rq);
2987                        if (err)
2988                                break;
2989
2990                        /* Submit each spinner at increasing priority */
2991                        engine->schedule(rq, &attr);
2992                } while (prio <= I915_PRIORITY_MAX &&
2993                         !__igt_timeout(end_time, NULL));
2994                pr_debug("%s: Preempt chain of %d requests\n",
2995                         engine->name, prio);
2996
2997                /*
2998                 * Such that the last spinner is the highest priority and
2999                 * should execute first. When that spinner completes,
3000                 * it will terminate the next lowest spinner until there

3001                 * are no more spinners and the gang is complete.
3002                 */
3003                cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3004                if (!IS_ERR(cs)) {
3005                        *cs = 0;
3006                        i915_gem_object_unpin_map(rq->batch->obj);
3007                } else {
3008                        err = PTR_ERR(cs);
3009                        intel_gt_set_wedged(gt);
3010                }
3011
3012                while (rq) { /* wait for each rq from highest to lowest prio */
3013                        struct i915_request *n = list_next_entry(rq, mock.link);
3014
3015                        if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3016                                struct drm_printer p =
3017                                        drm_info_printer(engine->i915->drm.dev);
3018
3019                                pr_err("Failed to flush chain of %d requests, at %d\n",
3020                                       prio, rq_prio(rq));
3021                                intel_engine_dump(engine, &p,
3022                                                  "%s\n", engine->name);
3023
3024                                err = -ETIME;
3025                        }
3026
3027                        i915_vma_put(rq->batch);
3028                        i915_request_put(rq);
3029                        rq = n;
3030                }
3031
3032                if (igt_live_test_end(&t))
3033                        err = -EIO;
3034                if (err)
3035                        return err;
3036        }
3037
3038        return 0;
3039}
3040
3041static struct i915_vma *
3042create_gpr_user(struct intel_engine_cs *engine,
3043                struct i915_vma *result,
3044                unsigned int offset)
3045{
3046        struct drm_i915_gem_object *obj;
3047        struct i915_vma *vma;
3048        u32 *cs;
3049        int err;
3050        int i;
3051
3052        obj = i915_gem_object_create_internal(engine->i915, 4096);
3053        if (IS_ERR(obj))
3054                return ERR_CAST(obj);
3055
3056        vma = i915_vma_instance(obj, result->vm, NULL);
3057        if (IS_ERR(vma)) {
3058                i915_gem_object_put(obj);
3059                return vma;
3060        }
3061
3062        err = i915_vma_pin(vma, 0, 0, PIN_USER);
3063        if (err) {
3064                i915_vma_put(vma);
3065                return ERR_PTR(err);
3066        }
3067
3068        cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3069        if (IS_ERR(cs)) {
3070                i915_vma_put(vma);
3071                return ERR_CAST(cs);
3072        }
3073
3074        /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3075        *cs++ = MI_LOAD_REGISTER_IMM(1);
3076        *cs++ = CS_GPR(engine, 0);
3077        *cs++ = 1;
3078
3079        for (i = 1; i < NUM_GPR; i++) {
3080                u64 addr;
3081
3082                /*
3083                 * Perform: GPR[i]++
3084                 *
3085                 * As we read and write into the context saved GPR[i], if
3086                 * we restart this batch buffer from an earlier point, we
3087                 * will repeat the increment and store a value > 1.
3088                 */
3089                *cs++ = MI_MATH(4);
3090                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3091                *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3092                *cs++ = MI_MATH_ADD;
3093                *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3094
3095                addr = result->node.start + offset + i * sizeof(*cs);
3096                *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3097                *cs++ = CS_GPR(engine, 2 * i);
3098                *cs++ = lower_32_bits(addr);
3099                *cs++ = upper_32_bits(addr);
3100
3101                *cs++ = MI_SEMAPHORE_WAIT |
3102                        MI_SEMAPHORE_POLL |
3103                        MI_SEMAPHORE_SAD_GTE_SDD;
3104                *cs++ = i;
3105                *cs++ = lower_32_bits(result->node.start);
3106                *cs++ = upper_32_bits(result->node.start);
3107        }
3108
3109        *cs++ = MI_BATCH_BUFFER_END;
3110        i915_gem_object_flush_map(obj);
3111        i915_gem_object_unpin_map(obj);
3112
3113        return vma;
3114}
3115
3116static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3117{
3118        struct drm_i915_gem_object *obj;
3119        struct i915_vma *vma;
3120        int err;
3121
3122        obj = i915_gem_object_create_internal(gt->i915, sz);
3123        if (IS_ERR(obj))
3124                return ERR_CAST(obj);
3125
3126        vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3127        if (IS_ERR(vma)) {
3128                i915_gem_object_put(obj);
3129                return vma;
3130        }
3131
3132        err = i915_ggtt_pin(vma, NULL, 0, 0);
3133        if (err) {
3134                i915_vma_put(vma);
3135                return ERR_PTR(err);
3136        }
3137
3138        return vma;
3139}
3140
3141static struct i915_request *
3142create_gpr_client(struct intel_engine_cs *engine,
3143                  struct i915_vma *global,
3144                  unsigned int offset)
3145{
3146        struct i915_vma *batch, *vma;
3147        struct intel_context *ce;
3148        struct i915_request *rq;
3149        int err;
3150
3151        ce = intel_context_create(engine);
3152        if (IS_ERR(ce))
3153                return ERR_CAST(ce);
3154
3155        vma = i915_vma_instance(global->obj, ce->vm, NULL);
3156        if (IS_ERR(vma)) {
3157                err = PTR_ERR(vma);
3158                goto out_ce;
3159        }
3160
3161        err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162        if (err)
3163                goto out_ce;
3164
3165        batch = create_gpr_user(engine, vma, offset);
3166        if (IS_ERR(batch)) {
3167                err = PTR_ERR(batch);
3168                goto out_vma;
3169        }
3170
3171        rq = intel_context_create_request(ce);
3172        if (IS_ERR(rq)) {
3173                err = PTR_ERR(rq);
3174                goto out_batch;
3175        }
3176
3177        i915_vma_lock(vma);
3178        err = i915_request_await_object(rq, vma->obj, false);
3179        if (!err)
3180                err = i915_vma_move_to_active(vma, rq, 0);
3181        i915_vma_unlock(vma);
3182
3183        i915_vma_lock(batch);
3184        if (!err)
3185                err = i915_request_await_object(rq, batch->obj, false);
3186        if (!err)
3187                err = i915_vma_move_to_active(batch, rq, 0);
3188        if (!err)
3189                err = rq->engine->emit_bb_start(rq,
3190                                                batch->node.start,
3191                                                PAGE_SIZE, 0);
3192        i915_vma_unlock(batch);
3193        i915_vma_unpin(batch);
3194
3195        if (!err)
3196                i915_request_get(rq);
3197        i915_request_add(rq);
3198
3199out_batch:
3200        i915_vma_put(batch);
3201out_vma:
3202        i915_vma_unpin(vma);
3203out_ce:
3204        intel_context_put(ce);
3205        return err ? ERR_PTR(err) : rq;
3206}
3207
3208static int preempt_user(struct intel_engine_cs *engine,
3209                        struct i915_vma *global,
3210                        int id)
3211{
3212        struct i915_sched_attr attr = {
3213                .priority = I915_PRIORITY_MAX
3214        };
3215        struct i915_request *rq;
3216        int err = 0;
3217        u32 *cs;
3218
3219        rq = intel_engine_create_kernel_request(engine);
3220        if (IS_ERR(rq))
3221                return PTR_ERR(rq);
3222
3223        cs = intel_ring_begin(rq, 4);
3224        if (IS_ERR(cs)) {
3225                i915_request_add(rq);
3226                return PTR_ERR(cs);
3227        }
3228
3229        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3230        *cs++ = i915_ggtt_offset(global);
3231        *cs++ = 0;
3232        *cs++ = id;
3233
3234        intel_ring_advance(rq, cs);
3235
3236        i915_request_get(rq);
3237        i915_request_add(rq);
3238
3239        engine->schedule(rq, &attr);
3240
3241        if (i915_request_wait(rq, 0, HZ / 2) < 0)
3242                err = -ETIME;
3243        i915_request_put(rq);
3244
3245        return err;
3246}
3247
3248static int live_preempt_user(void *arg)
3249{
3250        struct intel_gt *gt = arg;
3251        struct intel_engine_cs *engine;
3252        struct i915_vma *global;
3253        enum intel_engine_id id;
3254        u32 *result;
3255        int err = 0;
3256
3257        /*
3258         * In our other tests, we look at preemption in carefully
3259         * controlled conditions in the ringbuffer. Since most of the
3260         * time is spent in user batches, most of our preemptions naturally
3261         * occur there. We want to verify that when we preempt inside a batch
3262         * we continue on from the current instruction and do not roll back
3263         * to the start, or another earlier arbitration point.
3264         *
3265         * To verify this, we create a batch which is a mixture of
3266         * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3267         * a few preempting contexts thrown into the mix, we look for any
3268         * repeated instructions (which show up as incorrect values).
3269         */
3270
3271        global = create_global(gt, 4096);
3272        if (IS_ERR(global))
3273                return PTR_ERR(global);
3274
3275        result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3276        if (IS_ERR(result)) {
3277                i915_vma_unpin_and_release(&global, 0);
3278                return PTR_ERR(result);
3279        }
3280
3281        for_each_engine(engine, gt, id) {
3282                struct i915_request *client[3] = {};
3283                struct igt_live_test t;
3284                int i;
3285
3286                if (!intel_engine_has_preemption(engine))
3287                        continue;
3288
3289                if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3290                        continue; /* we need per-context GPR */
3291
3292                if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3293                        err = -EIO;
3294                        break;
3295                }
3296
3297                memset(result, 0, 4096);
3298
3299                for (i = 0; i < ARRAY_SIZE(client); i++) {
3300                        struct i915_request *rq;
3301
3302                        rq = create_gpr_client(engine, global,
3303                                               NUM_GPR * i * sizeof(u32));
3304                        if (IS_ERR(rq)) {
3305                                err = PTR_ERR(rq);
3306                                goto end_test;
3307                        }
3308
3309                        client[i] = rq;
3310                }
3311
3312                /* Continuously preempt the set of 3 running contexts */
3313                for (i = 1; i <= NUM_GPR; i++) {
3314                        err = preempt_user(engine, global, i);
3315                        if (err)
3316                                goto end_test;
3317                }
3318
3319                if (READ_ONCE(result[0]) != NUM_GPR) {
3320                        pr_err("%s: Failed to release semaphore\n",
3321                               engine->name);
3322                        err = -EIO;
3323                        goto end_test;
3324                }
3325
3326                for (i = 0; i < ARRAY_SIZE(client); i++) {
3327                        int gpr;
3328
3329                        if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3330                                err = -ETIME;
3331                                goto end_test;
3332                        }
3333
3334                        for (gpr = 1; gpr < NUM_GPR; gpr++) {
3335                                if (result[NUM_GPR * i + gpr] != 1) {
3336                                        pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3337                                               engine->name,
3338                                               i, gpr, result[NUM_GPR * i + gpr]);
3339                                        err = -EINVAL;
3340                                        goto end_test;
3341                                }
3342                        }
3343                }
3344
3345end_test:
3346                for (i = 0; i < ARRAY_SIZE(client); i++) {
3347                        if (!client[i])
3348                                break;
3349
3350                        i915_request_put(client[i]);
3351                }
3352
3353                /* Flush the semaphores on error */
3354                smp_store_mb(result[0], -1);
3355                if (igt_live_test_end(&t))
3356                        err = -EIO;
3357                if (err)
3358                        break;
3359        }
3360
3361        i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362        return err;
3363}
3364
3365static int live_preempt_timeout(void *arg)
3366{
3367        struct intel_gt *gt = arg;
3368        struct i915_gem_context *ctx_hi, *ctx_lo;
3369        struct igt_spinner spin_lo;
3370        struct intel_engine_cs *engine;
3371        enum intel_engine_id id;
3372        int err = -ENOMEM;
3373
3374        /*
3375         * Check that we force preemption to occur by cancelling the previous
3376         * context if it refuses to yield the GPU.
3377         */
3378        if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3379                return 0;
3380
3381        if (!intel_has_reset_engine(gt))
3382                return 0;
3383
3384        if (igt_spinner_init(&spin_lo, gt))
3385                return -ENOMEM;
3386
3387        ctx_hi = kernel_context(gt->i915);
3388        if (!ctx_hi)
3389                goto err_spin_lo;
3390        ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3391
3392        ctx_lo = kernel_context(gt->i915);
3393        if (!ctx_lo)
3394                goto err_ctx_hi;
3395        ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3396
3397        for_each_engine(engine, gt, id) {
3398                unsigned long saved_timeout;
3399                struct i915_request *rq;
3400
3401                if (!intel_engine_has_preemption(engine))
3402                        continue;
3403
3404                rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3405                                            MI_NOOP); /* preemption disabled */
3406                if (IS_ERR(rq)) {
3407                        err = PTR_ERR(rq);
3408                        goto err_ctx_lo;
3409                }
3410
3411                i915_request_add(rq);
3412                if (!igt_wait_for_spinner(&spin_lo, rq)) {
3413                        intel_gt_set_wedged(gt);
3414                        err = -EIO;
3415                        goto err_ctx_lo;
3416                }
3417
3418                rq = igt_request_alloc(ctx_hi, engine);
3419                if (IS_ERR(rq)) {
3420                        igt_spinner_end(&spin_lo);
3421                        err = PTR_ERR(rq);
3422                        goto err_ctx_lo;
3423                }
3424
3425                /* Flush the previous CS ack before changing timeouts */
3426                while (READ_ONCE(engine->execlists.pending[0]))
3427                        cpu_relax();
3428
3429                saved_timeout = engine->props.preempt_timeout_ms;
3430                engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431
3432                i915_request_get(rq);
3433                i915_request_add(rq);
3434
3435                intel_engine_flush_submission(engine);
3436                engine->props.preempt_timeout_ms = saved_timeout;
3437
3438                if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3439                        intel_gt_set_wedged(gt);
3440                        i915_request_put(rq);
3441                        err = -ETIME;
3442                        goto err_ctx_lo;
3443                }
3444
3445                igt_spinner_end(&spin_lo);
3446                i915_request_put(rq);
3447        }
3448
3449        err = 0;
3450err_ctx_lo:
3451        kernel_context_close(ctx_lo);
3452err_ctx_hi:
3453        kernel_context_close(ctx_hi);
3454err_spin_lo:
3455        igt_spinner_fini(&spin_lo);
3456        return err;
3457}
3458
3459static int random_range(struct rnd_state *rnd, int min, int max)
3460{
3461        return i915_prandom_u32_max_state(max - min, rnd) + min;
3462}
3463
3464static int random_priority(struct rnd_state *rnd)
3465{
3466        return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3467}
3468
3469struct preempt_smoke {
3470        struct intel_gt *gt;
3471        struct i915_gem_context **contexts;
3472        struct intel_engine_cs *engine;
3473        struct drm_i915_gem_object *batch;
3474        unsigned int ncontext;
3475        struct rnd_state prng;
3476        unsigned long count;
3477};
3478
3479static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480{
3481        return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3482                                                          &smoke->prng)];
3483}
3484
3485static int smoke_submit(struct preempt_smoke *smoke,
3486                        struct i915_gem_context *ctx, int prio,
3487                        struct drm_i915_gem_object *batch)
3488{
3489        struct i915_request *rq;
3490        struct i915_vma *vma = NULL;
3491        int err = 0;
3492
3493        if (batch) {
3494                struct i915_address_space *vm;
3495
3496                vm = i915_gem_context_get_vm_rcu(ctx);
3497                vma = i915_vma_instance(batch, vm, NULL);
3498                i915_vm_put(vm);
3499                if (IS_ERR(vma))
3500                        return PTR_ERR(vma);
3501
3502                err = i915_vma_pin(vma, 0, 0, PIN_USER);
3503                if (err)
3504                        return err;
3505        }
3506
3507        ctx->sched.priority = prio;
3508
3509        rq = igt_request_alloc(ctx, smoke->engine);
3510        if (IS_ERR(rq)) {
3511                err = PTR_ERR(rq);
3512                goto unpin;
3513        }
3514
3515        if (vma) {
3516                i915_vma_lock(vma);
3517                err = i915_request_await_object(rq, vma->obj, false);
3518                if (!err)
3519                        err = i915_vma_move_to_active(vma, rq, 0);
3520                if (!err)
3521                        err = rq->engine->emit_bb_start(rq,
3522                                                        vma->node.start,
3523                                                        PAGE_SIZE, 0);
3524                i915_vma_unlock(vma);
3525        }
3526
3527        i915_request_add(rq);
3528
3529unpin:
3530        if (vma)
3531                i915_vma_unpin(vma);
3532
3533        return err;
3534}
3535
3536static int smoke_crescendo_thread(void *arg)
3537{
3538        struct preempt_smoke *smoke = arg;
3539        IGT_TIMEOUT(end_time);
3540        unsigned long count;
3541
3542        count = 0;
3543        do {
3544                struct i915_gem_context *ctx = smoke_context(smoke);
3545                int err;
3546
3547                err = smoke_submit(smoke,
3548                                   ctx, count % I915_PRIORITY_MAX,
3549                                   smoke->batch);
3550                if (err)
3551                        return err;
3552
3553                count++;
3554        } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555
3556        smoke->count = count;
3557        return 0;
3558}
3559
3560static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3561#define BATCH BIT(0)
3562{
3563        struct task_struct *tsk[I915_NUM_ENGINES] = {};
3564        struct preempt_smoke arg[I915_NUM_ENGINES];
3565        struct intel_engine_cs *engine;
3566        enum intel_engine_id id;
3567        unsigned long count;
3568        int err = 0;
3569
3570        for_each_engine(engine, smoke->gt, id) {
3571                arg[id] = *smoke;
3572                arg[id].engine = engine;
3573                if (!(flags & BATCH))
3574                        arg[id].batch = NULL;
3575                arg[id].count = 0;
3576
3577                tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3578                                      "igt/smoke:%d", id);
3579                if (IS_ERR(tsk[id])) {
3580                        err = PTR_ERR(tsk[id]);
3581                        break;
3582                }
3583                get_task_struct(tsk[id]);
3584        }
3585
3586        yield(); /* start all threads before we kthread_stop() */
3587
3588        count = 0;
3589        for_each_engine(engine, smoke->gt, id) {
3590                int status;
3591
3592                if (IS_ERR_OR_NULL(tsk[id]))
3593                        continue;
3594
3595                status = kthread_stop(tsk[id]);
3596                if (status && !err)
3597                        err = status;
3598
3599                count += arg[id].count;
3600
3601                put_task_struct(tsk[id]);
3602        }
3603
3604        pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605                count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606        return 0;
3607}
3608
3609static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610{
3611        enum intel_engine_id id;
3612        IGT_TIMEOUT(end_time);
3613        unsigned long count;
3614
3615        count = 0;
3616        do {
3617                for_each_engine(smoke->engine, smoke->gt, id) {
3618                        struct i915_gem_context *ctx = smoke_context(smoke);
3619                        int err;
3620
3621                        err = smoke_submit(smoke,
3622                                           ctx, random_priority(&smoke->prng),
3623                                           flags & BATCH ? smoke->batch : NULL);
3624                        if (err)
3625                                return err;
3626
3627                        count++;
3628                }
3629        } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630
3631        pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3632                count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3633        return 0;
3634}
3635
3636static int live_preempt_smoke(void *arg)
3637{
3638        struct preempt_smoke smoke = {
3639                .gt = arg,
3640                .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3641                .ncontext = 256,
3642        };
3643        const unsigned int phase[] = { 0, BATCH };
3644        struct igt_live_test t;
3645        int err = -ENOMEM;
3646        u32 *cs;
3647        int n;
3648
3649        smoke.contexts = kmalloc_array(smoke.ncontext,
3650                                       sizeof(*smoke.contexts),
3651                                       GFP_KERNEL);
3652        if (!smoke.contexts)
3653                return -ENOMEM;
3654
3655        smoke.batch =
3656                i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657        if (IS_ERR(smoke.batch)) {
3658                err = PTR_ERR(smoke.batch);
3659                goto err_free;
3660        }
3661
3662        cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663        if (IS_ERR(cs)) {
3664                err = PTR_ERR(cs);
3665                goto err_batch;
3666        }
3667        for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668                cs[n] = MI_ARB_CHECK;
3669        cs[n] = MI_BATCH_BUFFER_END;
3670        i915_gem_object_flush_map(smoke.batch);
3671        i915_gem_object_unpin_map(smoke.batch);
3672
3673        if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674                err = -EIO;
3675                goto err_batch;
3676        }
3677
3678        for (n = 0; n < smoke.ncontext; n++) {
3679                smoke.contexts[n] = kernel_context(smoke.gt->i915);
3680                if (!smoke.contexts[n])
3681                        goto err_ctx;
3682        }
3683
3684        for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685                err = smoke_crescendo(&smoke, phase[n]);
3686                if (err)
3687                        goto err_ctx;
3688
3689                err = smoke_random(&smoke, phase[n]);
3690                if (err)
3691                        goto err_ctx;
3692        }
3693
3694err_ctx:
3695        if (igt_live_test_end(&t))
3696                err = -EIO;
3697
3698        for (n = 0; n < smoke.ncontext; n++) {
3699                if (!smoke.contexts[n])
3700                        break;
3701                kernel_context_close(smoke.contexts[n]);
3702        }
3703
3704err_batch:
3705        i915_gem_object_put(smoke.batch);
3706err_free:
3707        kfree(smoke.contexts);
3708
3709        return err;
3710}
3711
3712static int nop_virtual_engine(struct intel_gt *gt,
3713                              struct intel_engine_cs **siblings,
3714                              unsigned int nsibling,
3715                              unsigned int nctx,
3716                              unsigned int flags)
3717#define CHAIN BIT(0)
3718{
3719        IGT_TIMEOUT(end_time);
3720        struct i915_request *request[16] = {};
3721        struct intel_context *ve[16];
3722        unsigned long n, prime, nc;
3723        struct igt_live_test t;
3724        ktime_t times[2] = {};
3725        int err;
3726
3727        GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728
3729        for (n = 0; n < nctx; n++) {
3730                ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3731                if (IS_ERR(ve[n])) {
3732                        err = PTR_ERR(ve[n]);
3733                        nctx = n;
3734                        goto out;
3735                }
3736
3737                err = intel_context_pin(ve[n]);
3738                if (err) {
3739                        intel_context_put(ve[n]);
3740                        nctx = n;
3741                        goto out;
3742                }
3743        }
3744
3745        err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746        if (err)
3747                goto out;
3748
3749        for_each_prime_number_from(prime, 1, 8192) {
3750                times[1] = ktime_get_raw();
3751
3752                if (flags & CHAIN) {
3753                        for (nc = 0; nc < nctx; nc++) {
3754                                for (n = 0; n < prime; n++) {
3755                                        struct i915_request *rq;
3756
3757                                        rq = i915_request_create(ve[nc]);
3758                                        if (IS_ERR(rq)) {
3759                                                err = PTR_ERR(rq);
3760                                                goto out;
3761                                        }
3762
3763                                        if (request[nc])
3764                                                i915_request_put(request[nc]);
3765                                        request[nc] = i915_request_get(rq);
3766                                        i915_request_add(rq);
3767                                }
3768                        }
3769                } else {
3770                        for (n = 0; n < prime; n++) {
3771                                for (nc = 0; nc < nctx; nc++) {
3772                                        struct i915_request *rq;
3773
3774                                        rq = i915_request_create(ve[nc]);
3775                                        if (IS_ERR(rq)) {
3776                                                err = PTR_ERR(rq);
3777                                                goto out;
3778                                        }
3779
3780                                        if (request[nc])
3781                                                i915_request_put(request[nc]);
3782                                        request[nc] = i915_request_get(rq);
3783                                        i915_request_add(rq);
3784                                }
3785                        }
3786                }
3787
3788                for (nc = 0; nc < nctx; nc++) {
3789                        if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790                                pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791                                       __func__, ve[0]->engine->name,
3792                                       request[nc]->fence.context,
3793                                       request[nc]->fence.seqno);
3794
3795                                GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796                                          __func__, ve[0]->engine->name,
3797                                          request[nc]->fence.context,
3798                                          request[nc]->fence.seqno);
3799                                GEM_TRACE_DUMP();
3800                                intel_gt_set_wedged(gt);
3801                                break;
3802                        }
3803                }
3804
3805                times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806                if (prime == 1)
3807                        times[0] = times[1];
3808
3809                for (nc = 0; nc < nctx; nc++) {
3810                        i915_request_put(request[nc]);
3811                        request[nc] = NULL;
3812                }
3813
3814                if (__igt_timeout(end_time, NULL))
3815                        break;
3816        }
3817
3818        err = igt_live_test_end(&t);
3819        if (err)
3820                goto out;
3821
3822        pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823                nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824                prime, div64_u64(ktime_to_ns(times[1]), prime));
3825
3826out:
3827        if (igt_flush_test(gt->i915))
3828                err = -EIO;
3829
3830        for (nc = 0; nc < nctx; nc++) {
3831                i915_request_put(request[nc]);
3832                intel_context_unpin(ve[nc]);
3833                intel_context_put(ve[nc]);
3834        }
3835        return err;
3836}
3837
3838static unsigned int
3839__select_siblings(struct intel_gt *gt,
3840                  unsigned int class,
3841                  struct intel_engine_cs **siblings,
3842                  bool (*filter)(const struct intel_engine_cs *))
3843{
3844        unsigned int n = 0;
3845        unsigned int inst;
3846
3847        for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848                if (!gt->engine_class[class][inst])
3849                        continue;
3850
3851                if (filter && !filter(gt->engine_class[class][inst]))
3852                        continue;
3853
3854                siblings[n++] = gt->engine_class[class][inst];
3855        }
3856
3857        return n;
3858}
3859
3860static unsigned int
3861select_siblings(struct intel_gt *gt,
3862                unsigned int class,
3863                struct intel_engine_cs **siblings)
3864{
3865        return __select_siblings(gt, class, siblings, NULL);
3866}
3867
3868static int live_virtual_engine(void *arg)
3869{
3870        struct intel_gt *gt = arg;
3871        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872        struct intel_engine_cs *engine;
3873        enum intel_engine_id id;
3874        unsigned int class;
3875        int err;
3876
3877        if (intel_uc_uses_guc_submission(&gt->uc))
3878                return 0;
3879
3880        for_each_engine(engine, gt, id) {
3881                err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882                if (err) {
3883                        pr_err("Failed to wrap engine %s: err=%d\n",
3884                               engine->name, err);
3885                        return err;
3886                }
3887        }
3888
3889        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890                int nsibling, n;
3891
3892                nsibling = select_siblings(gt, class, siblings);
3893                if (nsibling < 2)
3894                        continue;
3895
3896                for (n = 1; n <= nsibling + 1; n++) {
3897                        err = nop_virtual_engine(gt, siblings, nsibling,
3898                                                 n, 0);
3899                        if (err)
3900                                return err;
3901                }
3902
3903                err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904                if (err)
3905                        return err;
3906        }
3907
3908        return 0;
3909}
3910
3911static int mask_virtual_engine(struct intel_gt *gt,
3912                               struct intel_engine_cs **siblings,
3913                               unsigned int nsibling)
3914{
3915        struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916        struct intel_context *ve;
3917        struct igt_live_test t;
3918        unsigned int n;
3919        int err;
3920
3921        /*
3922         * Check that by setting the execution mask on a request, we can
3923         * restrict it to our desired engine within the virtual engine.
3924         */
3925
3926        ve = intel_execlists_create_virtual(siblings, nsibling);
3927        if (IS_ERR(ve)) {
3928                err = PTR_ERR(ve);
3929                goto out_close;
3930        }
3931
3932        err = intel_context_pin(ve);
3933        if (err)
3934                goto out_put;
3935
3936        err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937        if (err)
3938                goto out_unpin;
3939
3940        for (n = 0; n < nsibling; n++) {
3941                request[n] = i915_request_create(ve);
3942                if (IS_ERR(request[n])) {
3943                        err = PTR_ERR(request[n]);
3944                        nsibling = n;
3945                        goto out;
3946                }
3947
3948                /* Reverse order as it's more likely to be unnatural */
3949                request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950
3951                i915_request_get(request[n]);
3952                i915_request_add(request[n]);
3953        }
3954
3955        for (n = 0; n < nsibling; n++) {
3956                if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957                        pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958                               __func__, ve->engine->name,
3959                               request[n]->fence.context,
3960                               request[n]->fence.seqno);
3961
3962                        GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963                                  __func__, ve->engine->name,
3964                                  request[n]->fence.context,
3965                                  request[n]->fence.seqno);
3966                        GEM_TRACE_DUMP();
3967                        intel_gt_set_wedged(gt);
3968                        err = -EIO;
3969                        goto out;
3970                }
3971
3972                if (request[n]->engine != siblings[nsibling - n - 1]) {
3973                        pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974                               request[n]->engine->name,
3975                               siblings[nsibling - n - 1]->name);
3976                        err = -EINVAL;
3977                        goto out;
3978                }
3979        }
3980
3981        err = igt_live_test_end(&t);
3982out:
3983        if (igt_flush_test(gt->i915))
3984                err = -EIO;
3985
3986        for (n = 0; n < nsibling; n++)
3987                i915_request_put(request[n]);
3988
3989out_unpin:
3990        intel_context_unpin(ve);
3991out_put:
3992        intel_context_put(ve);
3993out_close:
3994        return err;
3995}
3996
3997static int live_virtual_mask(void *arg)
3998{
3999        struct intel_gt *gt = arg;
4000        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];

4001        unsigned int class;
4002        int err;
4003
4004        if (intel_uc_uses_guc_submission(&gt->uc))
4005                return 0;
4006
4007        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008                unsigned int nsibling;
4009
4010                nsibling = select_siblings(gt, class, siblings);
4011                if (nsibling < 2)
4012                        continue;
4013
4014                err = mask_virtual_engine(gt, siblings, nsibling);
4015                if (err)
4016                        return err;
4017        }
4018
4019        return 0;
4020}
4021
4022static int slicein_virtual_engine(struct intel_gt *gt,
4023                                  struct intel_engine_cs **siblings,
4024                                  unsigned int nsibling)
4025{
4026        const long timeout = slice_timeout(siblings[0]);
4027        struct intel_context *ce;
4028        struct i915_request *rq;
4029        struct igt_spinner spin;
4030        unsigned int n;
4031        int err = 0;
4032
4033        /*
4034         * Virtual requests must take part in timeslicing on the target engines.
4035         */
4036
4037        if (igt_spinner_init(&spin, gt))
4038                return -ENOMEM;
4039
4040        for (n = 0; n < nsibling; n++) {
4041                ce = intel_context_create(siblings[n]);
4042                if (IS_ERR(ce)) {
4043                        err = PTR_ERR(ce);
4044                        goto out;
4045                }
4046
4047                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048                intel_context_put(ce);
4049                if (IS_ERR(rq)) {
4050                        err = PTR_ERR(rq);
4051                        goto out;
4052                }
4053
4054                i915_request_add(rq);
4055        }
4056
4057        ce = intel_execlists_create_virtual(siblings, nsibling);
4058        if (IS_ERR(ce)) {
4059                err = PTR_ERR(ce);
4060                goto out;
4061        }
4062
4063        rq = intel_context_create_request(ce);
4064        intel_context_put(ce);
4065        if (IS_ERR(rq)) {
4066                err = PTR_ERR(rq);
4067                goto out;
4068        }
4069
4070        i915_request_get(rq);
4071        i915_request_add(rq);
4072        if (i915_request_wait(rq, 0, timeout) < 0) {
4073                GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074                              __func__, rq->engine->name);
4075                GEM_TRACE_DUMP();
4076                intel_gt_set_wedged(gt);
4077                err = -EIO;
4078        }
4079        i915_request_put(rq);
4080
4081out:
4082        igt_spinner_end(&spin);
4083        if (igt_flush_test(gt->i915))
4084                err = -EIO;
4085        igt_spinner_fini(&spin);
4086        return err;
4087}
4088
4089static int sliceout_virtual_engine(struct intel_gt *gt,
4090                                   struct intel_engine_cs **siblings,
4091                                   unsigned int nsibling)
4092{
4093        const long timeout = slice_timeout(siblings[0]);
4094        struct intel_context *ce;
4095        struct i915_request *rq;
4096        struct igt_spinner spin;
4097        unsigned int n;
4098        int err = 0;
4099
4100        /*
4101         * Virtual requests must allow others a fair timeslice.
4102         */
4103
4104        if (igt_spinner_init(&spin, gt))
4105                return -ENOMEM;
4106
4107        /* XXX We do not handle oversubscription and fairness with normal rq */
4108        for (n = 0; n < nsibling; n++) {
4109                ce = intel_execlists_create_virtual(siblings, nsibling);
4110                if (IS_ERR(ce)) {
4111                        err = PTR_ERR(ce);
4112                        goto out;
4113                }
4114
4115                rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116                intel_context_put(ce);
4117                if (IS_ERR(rq)) {
4118                        err = PTR_ERR(rq);
4119                        goto out;
4120                }
4121
4122                i915_request_add(rq);
4123        }
4124
4125        for (n = 0; !err && n < nsibling; n++) {
4126                ce = intel_context_create(siblings[n]);
4127                if (IS_ERR(ce)) {
4128                        err = PTR_ERR(ce);
4129                        goto out;
4130                }
4131
4132                rq = intel_context_create_request(ce);
4133                intel_context_put(ce);
4134                if (IS_ERR(rq)) {
4135                        err = PTR_ERR(rq);
4136                        goto out;
4137                }
4138
4139                i915_request_get(rq);
4140                i915_request_add(rq);
4141                if (i915_request_wait(rq, 0, timeout) < 0) {
4142                        GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143                                      __func__, siblings[n]->name);
4144                        GEM_TRACE_DUMP();
4145                        intel_gt_set_wedged(gt);
4146                        err = -EIO;
4147                }
4148                i915_request_put(rq);
4149        }
4150
4151out:
4152        igt_spinner_end(&spin);
4153        if (igt_flush_test(gt->i915))
4154                err = -EIO;
4155        igt_spinner_fini(&spin);
4156        return err;
4157}
4158
4159static int live_virtual_slice(void *arg)
4160{
4161        struct intel_gt *gt = arg;
4162        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163        unsigned int class;
4164        int err;
4165
4166        if (intel_uc_uses_guc_submission(&gt->uc))
4167                return 0;
4168
4169        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170                unsigned int nsibling;
4171
4172                nsibling = __select_siblings(gt, class, siblings,
4173                                             intel_engine_has_timeslices);
4174                if (nsibling < 2)
4175                        continue;
4176
4177                err = slicein_virtual_engine(gt, siblings, nsibling);
4178                if (err)
4179                        return err;
4180
4181                err = sliceout_virtual_engine(gt, siblings, nsibling);
4182                if (err)
4183                        return err;
4184        }
4185
4186        return 0;
4187}
4188
4189static int preserved_virtual_engine(struct intel_gt *gt,
4190                                    struct intel_engine_cs **siblings,
4191                                    unsigned int nsibling)
4192{
4193        struct i915_request *last = NULL;
4194        struct intel_context *ve;
4195        struct i915_vma *scratch;
4196        struct igt_live_test t;
4197        unsigned int n;
4198        int err = 0;
4199        u32 *cs;
4200
4201        scratch =
4202                __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203                                                    PAGE_SIZE);
4204        if (IS_ERR(scratch))
4205                return PTR_ERR(scratch);
4206
4207        err = i915_vma_sync(scratch);
4208        if (err)
4209                goto out_scratch;
4210
4211        ve = intel_execlists_create_virtual(siblings, nsibling);
4212        if (IS_ERR(ve)) {
4213                err = PTR_ERR(ve);
4214                goto out_scratch;
4215        }
4216
4217        err = intel_context_pin(ve);
4218        if (err)
4219                goto out_put;
4220
4221        err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222        if (err)
4223                goto out_unpin;
4224
4225        for (n = 0; n < NUM_GPR_DW; n++) {
4226                struct intel_engine_cs *engine = siblings[n % nsibling];
4227                struct i915_request *rq;
4228
4229                rq = i915_request_create(ve);
4230                if (IS_ERR(rq)) {
4231                        err = PTR_ERR(rq);
4232                        goto out_end;
4233                }
4234
4235                i915_request_put(last);
4236                last = i915_request_get(rq);
4237
4238                cs = intel_ring_begin(rq, 8);
4239                if (IS_ERR(cs)) {
4240                        i915_request_add(rq);
4241                        err = PTR_ERR(cs);
4242                        goto out_end;
4243                }
4244
4245                *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246                *cs++ = CS_GPR(engine, n);
4247                *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248                *cs++ = 0;
4249
4250                *cs++ = MI_LOAD_REGISTER_IMM(1);
4251                *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252                *cs++ = n + 1;
4253
4254                *cs++ = MI_NOOP;
4255                intel_ring_advance(rq, cs);
4256
4257                /* Restrict this request to run on a particular engine */
4258                rq->execution_mask = engine->mask;
4259                i915_request_add(rq);
4260        }
4261
4262        if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263                err = -ETIME;
4264                goto out_end;
4265        }
4266
4267        cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268        if (IS_ERR(cs)) {
4269                err = PTR_ERR(cs);
4270                goto out_end;
4271        }
4272
4273        for (n = 0; n < NUM_GPR_DW; n++) {
4274                if (cs[n] != n) {
4275                        pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276                               cs[n], n);
4277                        err = -EINVAL;
4278                        break;
4279                }
4280        }
4281
4282        i915_gem_object_unpin_map(scratch->obj);
4283
4284out_end:
4285        if (igt_live_test_end(&t))
4286                err = -EIO;
4287        i915_request_put(last);
4288out_unpin:
4289        intel_context_unpin(ve);
4290out_put:
4291        intel_context_put(ve);
4292out_scratch:
4293        i915_vma_unpin_and_release(&scratch, 0);
4294        return err;
4295}
4296
4297static int live_virtual_preserved(void *arg)
4298{
4299        struct intel_gt *gt = arg;
4300        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301        unsigned int class;
4302
4303        /*
4304         * Check that the context image retains non-privileged (user) registers
4305         * from one engine to the next. For this we check that the CS_GPR
4306         * are preserved.
4307         */
4308
4309        if (intel_uc_uses_guc_submission(&gt->uc))
4310                return 0;
4311
4312        /* As we use CS_GPR we cannot run before they existed on all engines. */
4313        if (GRAPHICS_VER(gt->i915) < 9)
4314                return 0;
4315
4316        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317                int nsibling, err;
4318
4319                nsibling = select_siblings(gt, class, siblings);
4320                if (nsibling < 2)
4321                        continue;
4322
4323                err = preserved_virtual_engine(gt, siblings, nsibling);
4324                if (err)
4325                        return err;
4326        }
4327
4328        return 0;
4329}
4330
4331static int bond_virtual_engine(struct intel_gt *gt,
4332                               unsigned int class,
4333                               struct intel_engine_cs **siblings,
4334                               unsigned int nsibling,
4335                               unsigned int flags)
4336#define BOND_SCHEDULE BIT(0)
4337{
4338        struct intel_engine_cs *master;
4339        struct i915_request *rq[16];
4340        enum intel_engine_id id;
4341        struct igt_spinner spin;
4342        unsigned long n;
4343        int err;
4344
4345        /*
4346         * A set of bonded requests is intended to be run concurrently
4347         * across a number of engines. We use one request per-engine
4348         * and a magic fence to schedule each of the bonded requests
4349         * at the same time. A consequence of our current scheduler is that
4350         * we only move requests to the HW ready queue when the request
4351         * becomes ready, that is when all of its prerequisite fences have
4352         * been signaled. As one of those fences is the master submit fence,
4353         * there is a delay on all secondary fences as the HW may be
4354         * currently busy. Equally, as all the requests are independent,
4355         * they may have other fences that delay individual request
4356         * submission to HW. Ergo, we do not guarantee that all requests are
4357         * immediately submitted to HW at the same time, just that if the
4358         * rules are abided by, they are ready at the same time as the
4359         * first is submitted. Userspace can embed semaphores in its batch
4360         * to ensure parallel execution of its phases as it requires.
4361         * Though naturally it gets requested that perhaps the scheduler should
4362         * take care of parallel execution, even across preemption events on
4363         * different HW. (The proper answer is of course "lalalala".)
4364         *
4365         * With the submit-fence, we have identified three possible phases
4366         * of synchronisation depending on the master fence: queued (not
4367         * ready), executing, and signaled. The first two are quite simple
4368         * and checked below. However, the signaled master fence handling is
4369         * contentious. Currently we do not distinguish between a signaled
4370         * fence and an expired fence, as once signaled it does not convey
4371         * any information about the previous execution. It may even be freed
4372         * and hence checking later it may not exist at all. Ergo we currently
4373         * do not apply the bonding constraint for an already signaled fence,
4374         * as our expectation is that it should not constrain the secondaries
4375         * and is outside of the scope of the bonded request API (i.e. all
4376         * userspace requests are meant to be running in parallel). As
4377         * it imposes no constraint, and is effectively a no-op, we do not
4378         * check below as normal execution flows are checked extensively above.
4379         *
4380         * XXX Is the degenerate handling of signaled submit fences the
4381         * expected behaviour for userpace?
4382         */
4383
4384        GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4385
4386        if (igt_spinner_init(&spin, gt))
4387                return -ENOMEM;
4388
4389        err = 0;
4390        rq[0] = ERR_PTR(-ENOMEM);
4391        for_each_engine(master, gt, id) {
4392                struct i915_sw_fence fence = {};
4393                struct intel_context *ce;
4394
4395                if (master->class == class)
4396                        continue;
4397
4398                ce = intel_context_create(master);
4399                if (IS_ERR(ce)) {
4400                        err = PTR_ERR(ce);
4401                        goto out;
4402                }
4403
4404                memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4405
4406                rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4407                intel_context_put(ce);
4408                if (IS_ERR(rq[0])) {
4409                        err = PTR_ERR(rq[0]);
4410                        goto out;
4411                }
4412                i915_request_get(rq[0]);
4413
4414                if (flags & BOND_SCHEDULE) {
4415                        onstack_fence_init(&fence);
4416                        err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4417                                                               &fence,
4418                                                               GFP_KERNEL);
4419                }
4420
4421                i915_request_add(rq[0]);
4422                if (err < 0)
4423                        goto out;
4424
4425                if (!(flags & BOND_SCHEDULE) &&
4426                    !igt_wait_for_spinner(&spin, rq[0])) {
4427                        err = -EIO;
4428                        goto out;
4429                }
4430
4431                for (n = 0; n < nsibling; n++) {
4432                        struct intel_context *ve;
4433
4434                        ve = intel_execlists_create_virtual(siblings, nsibling);
4435                        if (IS_ERR(ve)) {
4436                                err = PTR_ERR(ve);
4437                                onstack_fence_fini(&fence);
4438                                goto out;
4439                        }
4440
4441                        err = intel_virtual_engine_attach_bond(ve->engine,
4442                                                               master,
4443                                                               siblings[n]);
4444                        if (err) {
4445                                intel_context_put(ve);
4446                                onstack_fence_fini(&fence);
4447                                goto out;
4448                        }
4449
4450                        err = intel_context_pin(ve);
4451                        intel_context_put(ve);
4452                        if (err) {
4453                                onstack_fence_fini(&fence);
4454                                goto out;
4455                        }
4456
4457                        rq[n + 1] = i915_request_create(ve);
4458                        intel_context_unpin(ve);
4459                        if (IS_ERR(rq[n + 1])) {
4460                                err = PTR_ERR(rq[n + 1]);
4461                                onstack_fence_fini(&fence);
4462                                goto out;
4463                        }
4464                        i915_request_get(rq[n + 1]);
4465
4466                        err = i915_request_await_execution(rq[n + 1],
4467                                                           &rq[0]->fence,
4468                                                           ve->engine->bond_execute);
4469                        i915_request_add(rq[n + 1]);
4470                        if (err < 0) {
4471                                onstack_fence_fini(&fence);
4472                                goto out;
4473                        }
4474                }
4475                onstack_fence_fini(&fence);
4476                intel_engine_flush_submission(master);
4477                igt_spinner_end(&spin);
4478
4479                if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4480                        pr_err("Master request did not execute (on %s)!\n",
4481                               rq[0]->engine->name);
4482                        err = -EIO;
4483                        goto out;
4484                }
4485
4486                for (n = 0; n < nsibling; n++) {
4487                        if (i915_request_wait(rq[n + 1], 0,
4488                                              MAX_SCHEDULE_TIMEOUT) < 0) {
4489                                err = -EIO;
4490                                goto out;
4491                        }
4492
4493                        if (rq[n + 1]->engine != siblings[n]) {
4494                                pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4495                                       siblings[n]->name,
4496                                       rq[n + 1]->engine->name,
4497                                       rq[0]->engine->name);
4498                                err = -EINVAL;
4499                                goto out;
4500                        }
4501                }
4502
4503                for (n = 0; !IS_ERR(rq[n]); n++)
4504                        i915_request_put(rq[n]);
4505                rq[0] = ERR_PTR(-ENOMEM);
4506        }
4507
4508out:
4509        for (n = 0; !IS_ERR(rq[n]); n++)
4510                i915_request_put(rq[n]);
4511        if (igt_flush_test(gt->i915))
4512                err = -EIO;
4513
4514        igt_spinner_fini(&spin);
4515        return err;
4516}
4517
4518static int live_virtual_bond(void *arg)
4519{
4520        static const struct phase {
4521                const char *name;
4522                unsigned int flags;
4523        } phases[] = {
4524                { "", 0 },
4525                { "schedule", BOND_SCHEDULE },
4526                { },
4527        };
4528        struct intel_gt *gt = arg;
4529        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4530        unsigned int class;
4531        int err;
4532
4533        if (intel_uc_uses_guc_submission(&gt->uc))
4534                return 0;
4535
4536        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4537                const struct phase *p;
4538                int nsibling;
4539
4540                nsibling = select_siblings(gt, class, siblings);
4541                if (nsibling < 2)
4542                        continue;
4543
4544                for (p = phases; p->name; p++) {
4545                        err = bond_virtual_engine(gt,
4546                                                  class, siblings, nsibling,
4547                                                  p->flags);
4548                        if (err) {
4549                                pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4550                                       __func__, p->name, class, nsibling, err);
4551                                return err;
4552                        }
4553                }
4554        }
4555
4556        return 0;
4557}
4558
4559static int reset_virtual_engine(struct intel_gt *gt,
4560                                struct intel_engine_cs **siblings,
4561                                unsigned int nsibling)
4562{
4563        struct intel_engine_cs *engine;
4564        struct intel_context *ve;
4565        struct igt_spinner spin;
4566        struct i915_request *rq;
4567        unsigned int n;
4568        int err = 0;
4569
4570        /*
4571         * In order to support offline error capture for fast preempt reset,
4572         * we need to decouple the guilty request and ensure that it and its
4573         * descendents are not executed while the capture is in progress.
4574         */
4575
4576        if (igt_spinner_init(&spin, gt))
4577                return -ENOMEM;
4578
4579        ve = intel_execlists_create_virtual(siblings, nsibling);
4580        if (IS_ERR(ve)) {
4581                err = PTR_ERR(ve);
4582                goto out_spin;
4583        }
4584
4585        for (n = 0; n < nsibling; n++)
4586                st_engine_heartbeat_disable(siblings[n]);
4587
4588        rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4589        if (IS_ERR(rq)) {
4590                err = PTR_ERR(rq);
4591                goto out_heartbeat;
4592        }
4593        i915_request_add(rq);
4594
4595        if (!igt_wait_for_spinner(&spin, rq)) {
4596                intel_gt_set_wedged(gt);
4597                err = -ETIME;
4598                goto out_heartbeat;
4599        }
4600
4601        engine = rq->engine;
4602        GEM_BUG_ON(engine == ve->engine);
4603
4604        /* Take ownership of the reset and tasklet */
4605        err = engine_lock_reset_tasklet(engine);
4606        if (err)
4607                goto out_heartbeat;
4608
4609        engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4610        GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4611
4612        /* Fake a preemption event; failed of course */
4613        spin_lock_irq(&engine->active.lock);
4614        __unwind_incomplete_requests(engine);
4615        spin_unlock_irq(&engine->active.lock);
4616        GEM_BUG_ON(rq->engine != engine);
4617
4618        /* Reset the engine while keeping our active request on hold */
4619        execlists_hold(engine, rq);
4620        GEM_BUG_ON(!i915_request_on_hold(rq));
4621
4622        __intel_engine_reset_bh(engine, NULL);
4623        GEM_BUG_ON(rq->fence.error != -EIO);
4624
4625        /* Release our grasp on the engine, letting CS flow again */
4626        engine_unlock_reset_tasklet(engine);
4627
4628        /* Check that we do not resubmit the held request */
4629        i915_request_get(rq);
4630        if (!i915_request_wait(rq, 0, HZ / 5)) {
4631                pr_err("%s: on hold request completed!\n",
4632                       engine->name);
4633                intel_gt_set_wedged(gt);
4634                err = -EIO;
4635                goto out_rq;
4636        }
4637        GEM_BUG_ON(!i915_request_on_hold(rq));
4638
4639        /* But is resubmitted on release */
4640        execlists_unhold(engine, rq);
4641        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4642                pr_err("%s: held request did not complete!\n",
4643                       engine->name);
4644                intel_gt_set_wedged(gt);
4645                err = -ETIME;
4646        }
4647
4648out_rq:
4649        i915_request_put(rq);
4650out_heartbeat:
4651        for (n = 0; n < nsibling; n++)
4652                st_engine_heartbeat_enable(siblings[n]);
4653
4654        intel_context_put(ve);
4655out_spin:
4656        igt_spinner_fini(&spin);
4657        return err;
4658}
4659
4660static int live_virtual_reset(void *arg)
4661{
4662        struct intel_gt *gt = arg;
4663        struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4664        unsigned int class;
4665
4666        /*
4667         * Check that we handle a reset event within a virtual engine.
4668         * Only the physical engine is reset, but we have to check the flow
4669         * of the virtual requests around the reset, and make sure it is not
4670         * forgotten.
4671         */
4672
4673        if (intel_uc_uses_guc_submission(&gt->uc))
4674                return 0;
4675
4676        if (!intel_has_reset_engine(gt))
4677                return 0;
4678
4679        for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4680                int nsibling, err;
4681
4682                nsibling = select_siblings(gt, class, siblings);
4683                if (nsibling < 2)
4684                        continue;
4685
4686                err = reset_virtual_engine(gt, siblings, nsibling);
4687                if (err)
4688                        return err;
4689        }
4690
4691        return 0;
4692}
4693
4694int intel_execlists_live_selftests(struct drm_i915_private *i915)
4695{
4696        static const struct i915_subtest tests[] = {
4697                SUBTEST(live_sanitycheck),
4698                SUBTEST(live_unlite_switch),
4699                SUBTEST(live_unlite_preempt),
4700                SUBTEST(live_unlite_ring),
4701                SUBTEST(live_pin_rewind),
4702                SUBTEST(live_hold_reset),
4703                SUBTEST(live_error_interrupt),
4704                SUBTEST(live_timeslice_preempt),
4705                SUBTEST(live_timeslice_rewind),
4706                SUBTEST(live_timeslice_queue),
4707                SUBTEST(live_timeslice_nopreempt),
4708                SUBTEST(live_busywait_preempt),
4709                SUBTEST(live_preempt),
4710                SUBTEST(live_late_preempt),
4711                SUBTEST(live_nopreempt),
4712                SUBTEST(live_preempt_cancel),
4713                SUBTEST(live_suppress_self_preempt),
4714                SUBTEST(live_chain_preempt),
4715                SUBTEST(live_preempt_ring),
4716                SUBTEST(live_preempt_gang),
4717                SUBTEST(live_preempt_timeout),
4718                SUBTEST(live_preempt_user),
4719                SUBTEST(live_preempt_smoke),
4720                SUBTEST(live_virtual_engine),
4721                SUBTEST(live_virtual_mask),
4722                SUBTEST(live_virtual_preserved),
4723                SUBTEST(live_virtual_slice),
4724                SUBTEST(live_virtual_bond),
4725                SUBTEST(live_virtual_reset),
4726        };
4727
4728        if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4729                return 0;
4730
4731        if (intel_gt_is_wedged(&i915->gt))
4732                return 0;
4733
4734        return intel_gt_live_subtests(tests, &i915->gt);
4735}
4736