linux/drivers/gpu/drm/i915/selftests/i915_request.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/prime_numbers.h>
  26#include <linux/pm_qos.h>
  27#include <linux/sort.h>
  28
  29#include "gem/i915_gem_pm.h"
  30#include "gem/selftests/mock_context.h"
  31
  32#include "gt/intel_engine_heartbeat.h"
  33#include "gt/intel_engine_pm.h"
  34#include "gt/intel_engine_user.h"
  35#include "gt/intel_gt.h"
  36#include "gt/intel_gt_clock_utils.h"
  37#include "gt/intel_gt_requests.h"
  38#include "gt/selftest_engine_heartbeat.h"
  39
  40#include "i915_random.h"
  41#include "i915_selftest.h"
  42#include "igt_flush_test.h"
  43#include "igt_live_test.h"
  44#include "igt_spinner.h"
  45#include "lib_sw_fence.h"
  46
  47#include "mock_drm.h"
  48#include "mock_gem_device.h"
  49
  50static unsigned int num_uabi_engines(struct drm_i915_private *i915)
  51{
  52        struct intel_engine_cs *engine;
  53        unsigned int count;
  54
  55        count = 0;
  56        for_each_uabi_engine(engine, i915)
  57                count++;
  58
  59        return count;
  60}
  61
  62static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
  63{
  64        return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0);
  65}
  66
  67static int igt_add_request(void *arg)
  68{
  69        struct drm_i915_private *i915 = arg;
  70        struct i915_request *request;
  71
  72        /* Basic preliminary test to create a request and let it loose! */
  73
  74        request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
  75        if (!request)
  76                return -ENOMEM;
  77
  78        i915_request_add(request);
  79
  80        return 0;
  81}
  82
  83static int igt_wait_request(void *arg)
  84{
  85        const long T = HZ / 4;
  86        struct drm_i915_private *i915 = arg;
  87        struct i915_request *request;
  88        int err = -EINVAL;
  89
  90        /* Submit a request, then wait upon it */
  91
  92        request = mock_request(rcs0(i915)->kernel_context, T);
  93        if (!request)
  94                return -ENOMEM;
  95
  96        i915_request_get(request);
  97
  98        if (i915_request_wait(request, 0, 0) != -ETIME) {
  99                pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
 100                goto out_request;
 101        }
 102
 103        if (i915_request_wait(request, 0, T) != -ETIME) {
 104                pr_err("request wait succeeded (expected timeout before submit!)\n");
 105                goto out_request;
 106        }
 107
 108        if (i915_request_completed(request)) {
 109                pr_err("request completed before submit!!\n");
 110                goto out_request;
 111        }
 112
 113        i915_request_add(request);
 114
 115        if (i915_request_wait(request, 0, 0) != -ETIME) {
 116                pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
 117                goto out_request;
 118        }
 119
 120        if (i915_request_completed(request)) {
 121                pr_err("request completed immediately!\n");
 122                goto out_request;
 123        }
 124
 125        if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 126                pr_err("request wait succeeded (expected timeout!)\n");
 127                goto out_request;
 128        }
 129
 130        if (i915_request_wait(request, 0, T) == -ETIME) {
 131                pr_err("request wait timed out!\n");
 132                goto out_request;
 133        }
 134
 135        if (!i915_request_completed(request)) {
 136                pr_err("request not complete after waiting!\n");
 137                goto out_request;
 138        }
 139
 140        if (i915_request_wait(request, 0, T) == -ETIME) {
 141                pr_err("request wait timed out when already complete!\n");
 142                goto out_request;
 143        }
 144
 145        err = 0;
 146out_request:
 147        i915_request_put(request);
 148        mock_device_flush(i915);
 149        return err;
 150}
 151
 152static int igt_fence_wait(void *arg)
 153{
 154        const long T = HZ / 4;
 155        struct drm_i915_private *i915 = arg;
 156        struct i915_request *request;
 157        int err = -EINVAL;
 158
 159        /* Submit a request, treat it as a fence and wait upon it */
 160
 161        request = mock_request(rcs0(i915)->kernel_context, T);
 162        if (!request)
 163                return -ENOMEM;
 164
 165        if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 166                pr_err("fence wait success before submit (expected timeout)!\n");
 167                goto out;
 168        }
 169
 170        i915_request_add(request);
 171
 172        if (dma_fence_is_signaled(&request->fence)) {
 173                pr_err("fence signaled immediately!\n");
 174                goto out;
 175        }
 176
 177        if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 178                pr_err("fence wait success after submit (expected timeout)!\n");
 179                goto out;
 180        }
 181
 182        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 183                pr_err("fence wait timed out (expected success)!\n");
 184                goto out;
 185        }
 186
 187        if (!dma_fence_is_signaled(&request->fence)) {
 188                pr_err("fence unsignaled after waiting!\n");
 189                goto out;
 190        }
 191
 192        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 193                pr_err("fence wait timed out when complete (expected success)!\n");
 194                goto out;
 195        }
 196
 197        err = 0;
 198out:
 199        mock_device_flush(i915);
 200        return err;
 201}
 202
 203static int igt_request_rewind(void *arg)
 204{
 205        struct drm_i915_private *i915 = arg;
 206        struct i915_request *request, *vip;
 207        struct i915_gem_context *ctx[2];
 208        struct intel_context *ce;
 209        int err = -EINVAL;
 210
 211        ctx[0] = mock_context(i915, "A");
 212
 213        ce = i915_gem_context_get_engine(ctx[0], RCS0);
 214        GEM_BUG_ON(IS_ERR(ce));
 215        request = mock_request(ce, 2 * HZ);
 216        intel_context_put(ce);
 217        if (!request) {
 218                err = -ENOMEM;
 219                goto err_context_0;
 220        }
 221
 222        i915_request_get(request);
 223        i915_request_add(request);
 224
 225        ctx[1] = mock_context(i915, "B");
 226
 227        ce = i915_gem_context_get_engine(ctx[1], RCS0);
 228        GEM_BUG_ON(IS_ERR(ce));
 229        vip = mock_request(ce, 0);
 230        intel_context_put(ce);
 231        if (!vip) {
 232                err = -ENOMEM;
 233                goto err_context_1;
 234        }
 235
 236        /* Simulate preemption by manual reordering */
 237        if (!mock_cancel_request(request)) {
 238                pr_err("failed to cancel request (already executed)!\n");
 239                i915_request_add(vip);
 240                goto err_context_1;
 241        }
 242        i915_request_get(vip);
 243        i915_request_add(vip);
 244        rcu_read_lock();
 245        request->engine->submit_request(request);
 246        rcu_read_unlock();
 247
 248
 249        if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 250                pr_err("timed out waiting for high priority request\n");
 251                goto err;
 252        }
 253
 254        if (i915_request_completed(request)) {
 255                pr_err("low priority request already completed\n");
 256                goto err;
 257        }
 258
 259        err = 0;
 260err:
 261        i915_request_put(vip);
 262err_context_1:
 263        mock_context_close(ctx[1]);
 264        i915_request_put(request);
 265err_context_0:
 266        mock_context_close(ctx[0]);
 267        mock_device_flush(i915);
 268        return err;
 269}
 270
 271struct smoketest {
 272        struct intel_engine_cs *engine;
 273        struct i915_gem_context **contexts;
 274        atomic_long_t num_waits, num_fences;
 275        int ncontexts, max_batch;
 276        struct i915_request *(*request_alloc)(struct intel_context *ce);
 277};
 278
 279static struct i915_request *
 280__mock_request_alloc(struct intel_context *ce)
 281{
 282        return mock_request(ce, 0);
 283}
 284
 285static struct i915_request *
 286__live_request_alloc(struct intel_context *ce)
 287{
 288        return intel_context_create_request(ce);
 289}
 290
 291static int __igt_breadcrumbs_smoketest(void *arg)
 292{
 293        struct smoketest *t = arg;
 294        const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 295        const unsigned int total = 4 * t->ncontexts + 1;
 296        unsigned int num_waits = 0, num_fences = 0;
 297        struct i915_request **requests;
 298        I915_RND_STATE(prng);
 299        unsigned int *order;
 300        int err = 0;
 301
 302        /*
 303         * A very simple test to catch the most egregious of list handling bugs.
 304         *
 305         * At its heart, we simply create oodles of requests running across
 306         * multiple kthreads and enable signaling on them, for the sole purpose
 307         * of stressing our breadcrumb handling. The only inspection we do is
 308         * that the fences were marked as signaled.
 309         */
 310
 311        requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
 312        if (!requests)
 313                return -ENOMEM;
 314
 315        order = i915_random_order(total, &prng);
 316        if (!order) {
 317                err = -ENOMEM;
 318                goto out_requests;
 319        }
 320
 321        while (!kthread_should_stop()) {
 322                struct i915_sw_fence *submit, *wait;
 323                unsigned int n, count;
 324
 325                submit = heap_fence_create(GFP_KERNEL);
 326                if (!submit) {
 327                        err = -ENOMEM;
 328                        break;
 329                }
 330
 331                wait = heap_fence_create(GFP_KERNEL);
 332                if (!wait) {
 333                        i915_sw_fence_commit(submit);
 334                        heap_fence_put(submit);
 335                        err = -ENOMEM;
 336                        break;
 337                }
 338
 339                i915_random_reorder(order, total, &prng);
 340                count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
 341
 342                for (n = 0; n < count; n++) {
 343                        struct i915_gem_context *ctx =
 344                                t->contexts[order[n] % t->ncontexts];
 345                        struct i915_request *rq;
 346                        struct intel_context *ce;
 347
 348                        ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 349                        GEM_BUG_ON(IS_ERR(ce));
 350                        rq = t->request_alloc(ce);
 351                        intel_context_put(ce);
 352                        if (IS_ERR(rq)) {
 353                                err = PTR_ERR(rq);
 354                                count = n;
 355                                break;
 356                        }
 357
 358                        err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 359                                                               submit,
 360                                                               GFP_KERNEL);
 361
 362                        requests[n] = i915_request_get(rq);
 363                        i915_request_add(rq);
 364
 365                        if (err >= 0)
 366                                err = i915_sw_fence_await_dma_fence(wait,
 367                                                                    &rq->fence,
 368                                                                    0,
 369                                                                    GFP_KERNEL);
 370
 371                        if (err < 0) {
 372                                i915_request_put(rq);
 373                                count = n;
 374                                break;
 375                        }
 376                }
 377
 378                i915_sw_fence_commit(submit);
 379                i915_sw_fence_commit(wait);
 380
 381                if (!wait_event_timeout(wait->wait,
 382                                        i915_sw_fence_done(wait),
 383                                        5 * HZ)) {
 384                        struct i915_request *rq = requests[count - 1];
 385
 386                        pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
 387                               atomic_read(&wait->pending), count,
 388                               rq->fence.context, rq->fence.seqno,
 389                               t->engine->name);
 390                        GEM_TRACE_DUMP();
 391
 392                        intel_gt_set_wedged(t->engine->gt);
 393                        GEM_BUG_ON(!i915_request_completed(rq));
 394                        i915_sw_fence_wait(wait);
 395                        err = -EIO;
 396                }
 397
 398                for (n = 0; n < count; n++) {
 399                        struct i915_request *rq = requests[n];
 400
 401                        if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 402                                      &rq->fence.flags)) {
 403                                pr_err("%llu:%llu was not signaled!\n",
 404                                       rq->fence.context, rq->fence.seqno);
 405                                err = -EINVAL;
 406                        }
 407
 408                        i915_request_put(rq);
 409                }
 410
 411                heap_fence_put(wait);
 412                heap_fence_put(submit);
 413
 414                if (err < 0)
 415                        break;
 416
 417                num_fences += count;
 418                num_waits++;
 419
 420                cond_resched();
 421        }
 422
 423        atomic_long_add(num_fences, &t->num_fences);
 424        atomic_long_add(num_waits, &t->num_waits);
 425
 426        kfree(order);
 427out_requests:
 428        kfree(requests);
 429        return err;
 430}
 431
 432static int mock_breadcrumbs_smoketest(void *arg)
 433{
 434        struct drm_i915_private *i915 = arg;
 435        struct smoketest t = {
 436                .engine = rcs0(i915),
 437                .ncontexts = 1024,
 438                .max_batch = 1024,
 439                .request_alloc = __mock_request_alloc
 440        };
 441        unsigned int ncpus = num_online_cpus();
 442        struct task_struct **threads;
 443        unsigned int n;
 444        int ret = 0;
 445
 446        /*
 447         * Smoketest our breadcrumb/signal handling for requests across multiple
 448         * threads. A very simple test to only catch the most egregious of bugs.
 449         * See __igt_breadcrumbs_smoketest();
 450         */
 451
 452        threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
 453        if (!threads)
 454                return -ENOMEM;
 455
 456        t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 457        if (!t.contexts) {
 458                ret = -ENOMEM;
 459                goto out_threads;
 460        }
 461
 462        for (n = 0; n < t.ncontexts; n++) {
 463                t.contexts[n] = mock_context(t.engine->i915, "mock");
 464                if (!t.contexts[n]) {
 465                        ret = -ENOMEM;
 466                        goto out_contexts;
 467                }
 468        }
 469
 470        for (n = 0; n < ncpus; n++) {
 471                threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
 472                                         &t, "igt/%d", n);
 473                if (IS_ERR(threads[n])) {
 474                        ret = PTR_ERR(threads[n]);
 475                        ncpus = n;
 476                        break;
 477                }
 478
 479                get_task_struct(threads[n]);
 480        }
 481
 482        yield(); /* start all threads before we begin */
 483        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 484
 485        for (n = 0; n < ncpus; n++) {
 486                int err;
 487
 488                err = kthread_stop(threads[n]);
 489                if (err < 0 && !ret)
 490                        ret = err;
 491
 492                put_task_struct(threads[n]);
 493        }
 494        pr_info("Completed %lu waits for %lu fence across %d cpus\n",
 495                atomic_long_read(&t.num_waits),
 496                atomic_long_read(&t.num_fences),
 497                ncpus);
 498
 499out_contexts:
 500        for (n = 0; n < t.ncontexts; n++) {
 501                if (!t.contexts[n])
 502                        break;
 503                mock_context_close(t.contexts[n]);
 504        }
 505        kfree(t.contexts);
 506out_threads:
 507        kfree(threads);
 508        return ret;
 509}
 510
 511int i915_request_mock_selftests(void)
 512{
 513        static const struct i915_subtest tests[] = {
 514                SUBTEST(igt_add_request),
 515                SUBTEST(igt_wait_request),
 516                SUBTEST(igt_fence_wait),
 517                SUBTEST(igt_request_rewind),
 518                SUBTEST(mock_breadcrumbs_smoketest),
 519        };
 520        struct drm_i915_private *i915;
 521        intel_wakeref_t wakeref;
 522        int err = 0;
 523
 524        i915 = mock_gem_device();
 525        if (!i915)
 526                return -ENOMEM;
 527
 528        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 529                err = i915_subtests(tests, i915);
 530
 531        mock_destroy_device(i915);
 532
 533        return err;
 534}
 535
 536static int live_nop_request(void *arg)
 537{
 538        struct drm_i915_private *i915 = arg;
 539        struct intel_engine_cs *engine;
 540        struct igt_live_test t;
 541        int err = -ENODEV;
 542
 543        /*
 544         * Submit various sized batches of empty requests, to each engine
 545         * (individually), and wait for the batch to complete. We can check
 546         * the overhead of submitting requests to the hardware.
 547         */
 548
 549        for_each_uabi_engine(engine, i915) {
 550                unsigned long n, prime;
 551                IGT_TIMEOUT(end_time);
 552                ktime_t times[2] = {};
 553
 554                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 555                if (err)
 556                        return err;
 557
 558                intel_engine_pm_get(engine);
 559                for_each_prime_number_from(prime, 1, 8192) {
 560                        struct i915_request *request = NULL;
 561
 562                        times[1] = ktime_get_raw();
 563
 564                        for (n = 0; n < prime; n++) {
 565                                i915_request_put(request);
 566                                request = i915_request_create(engine->kernel_context);
 567                                if (IS_ERR(request))
 568                                        return PTR_ERR(request);
 569
 570                                /*
 571                                 * This space is left intentionally blank.
 572                                 *
 573                                 * We do not actually want to perform any
 574                                 * action with this request, we just want
 575                                 * to measure the latency in allocation
 576                                 * and submission of our breadcrumbs -
 577                                 * ensuring that the bare request is sufficient
 578                                 * for the system to work (i.e. proper HEAD
 579                                 * tracking of the rings, interrupt handling,
 580                                 * etc). It also gives us the lowest bounds
 581                                 * for latency.
 582                                 */
 583
 584                                i915_request_get(request);
 585                                i915_request_add(request);
 586                        }
 587                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 588                        i915_request_put(request);
 589
 590                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 591                        if (prime == 1)
 592                                times[0] = times[1];
 593
 594                        if (__igt_timeout(end_time, NULL))
 595                                break;
 596                }
 597                intel_engine_pm_put(engine);
 598
 599                err = igt_live_test_end(&t);
 600                if (err)
 601                        return err;
 602
 603                pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 604                        engine->name,
 605                        ktime_to_ns(times[0]),
 606                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 607        }
 608
 609        return err;
 610}
 611
 612static int __cancel_inactive(struct intel_engine_cs *engine)
 613{
 614        struct intel_context *ce;
 615        struct igt_spinner spin;
 616        struct i915_request *rq;
 617        int err = 0;
 618
 619        if (igt_spinner_init(&spin, engine->gt))
 620                return -ENOMEM;
 621
 622        ce = intel_context_create(engine);
 623        if (IS_ERR(ce)) {
 624                err = PTR_ERR(ce);
 625                goto out_spin;
 626        }
 627
 628        rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 629        if (IS_ERR(rq)) {
 630                err = PTR_ERR(rq);
 631                goto out_ce;
 632        }
 633
 634        pr_debug("%s: Cancelling inactive request\n", engine->name);
 635        i915_request_cancel(rq, -EINTR);
 636        i915_request_get(rq);
 637        i915_request_add(rq);
 638
 639        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 640                struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
 641
 642                pr_err("%s: Failed to cancel inactive request\n", engine->name);
 643                intel_engine_dump(engine, &p, "%s\n", engine->name);
 644                err = -ETIME;
 645                goto out_rq;
 646        }
 647
 648        if (rq->fence.error != -EINTR) {
 649                pr_err("%s: fence not cancelled (%u)\n",
 650                       engine->name, rq->fence.error);
 651                err = -EINVAL;
 652        }
 653
 654out_rq:
 655        i915_request_put(rq);
 656out_ce:
 657        intel_context_put(ce);
 658out_spin:
 659        igt_spinner_fini(&spin);
 660        if (err)
 661                pr_err("%s: %s error %d\n", __func__, engine->name, err);
 662        return err;
 663}
 664
 665static int __cancel_active(struct intel_engine_cs *engine)
 666{
 667        struct intel_context *ce;
 668        struct igt_spinner spin;
 669        struct i915_request *rq;
 670        int err = 0;
 671
 672        if (igt_spinner_init(&spin, engine->gt))
 673                return -ENOMEM;
 674
 675        ce = intel_context_create(engine);
 676        if (IS_ERR(ce)) {
 677                err = PTR_ERR(ce);
 678                goto out_spin;
 679        }
 680
 681        rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 682        if (IS_ERR(rq)) {
 683                err = PTR_ERR(rq);
 684                goto out_ce;
 685        }
 686
 687        pr_debug("%s: Cancelling active request\n", engine->name);
 688        i915_request_get(rq);
 689        i915_request_add(rq);
 690        if (!igt_wait_for_spinner(&spin, rq)) {
 691                struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
 692
 693                pr_err("Failed to start spinner on %s\n", engine->name);
 694                intel_engine_dump(engine, &p, "%s\n", engine->name);
 695                err = -ETIME;
 696                goto out_rq;
 697        }
 698        i915_request_cancel(rq, -EINTR);
 699
 700        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 701                struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
 702
 703                pr_err("%s: Failed to cancel active request\n", engine->name);
 704                intel_engine_dump(engine, &p, "%s\n", engine->name);
 705                err = -ETIME;
 706                goto out_rq;
 707        }
 708
 709        if (rq->fence.error != -EINTR) {
 710                pr_err("%s: fence not cancelled (%u)\n",
 711                       engine->name, rq->fence.error);
 712                err = -EINVAL;
 713        }
 714
 715out_rq:
 716        i915_request_put(rq);
 717out_ce:
 718        intel_context_put(ce);
 719out_spin:
 720        igt_spinner_fini(&spin);
 721        if (err)
 722                pr_err("%s: %s error %d\n", __func__, engine->name, err);
 723        return err;
 724}
 725
 726static int __cancel_completed(struct intel_engine_cs *engine)
 727{
 728        struct intel_context *ce;
 729        struct igt_spinner spin;
 730        struct i915_request *rq;
 731        int err = 0;
 732
 733        if (igt_spinner_init(&spin, engine->gt))
 734                return -ENOMEM;
 735
 736        ce = intel_context_create(engine);
 737        if (IS_ERR(ce)) {
 738                err = PTR_ERR(ce);
 739                goto out_spin;
 740        }
 741
 742        rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 743        if (IS_ERR(rq)) {
 744                err = PTR_ERR(rq);
 745                goto out_ce;
 746        }
 747        igt_spinner_end(&spin);
 748        i915_request_get(rq);
 749        i915_request_add(rq);
 750
 751        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 752                err = -ETIME;
 753                goto out_rq;
 754        }
 755
 756        pr_debug("%s: Cancelling completed request\n", engine->name);
 757        i915_request_cancel(rq, -EINTR);
 758        if (rq->fence.error) {
 759                pr_err("%s: fence not cancelled (%u)\n",
 760                       engine->name, rq->fence.error);
 761                err = -EINVAL;
 762        }
 763
 764out_rq:
 765        i915_request_put(rq);
 766out_ce:
 767        intel_context_put(ce);
 768out_spin:
 769        igt_spinner_fini(&spin);
 770        if (err)
 771                pr_err("%s: %s error %d\n", __func__, engine->name, err);
 772        return err;
 773}
 774
 775static int live_cancel_request(void *arg)
 776{
 777        struct drm_i915_private *i915 = arg;
 778        struct intel_engine_cs *engine;
 779
 780        /*
 781         * Check cancellation of requests. We expect to be able to immediately
 782         * cancel active requests, even if they are currently on the GPU.
 783         */
 784
 785        for_each_uabi_engine(engine, i915) {
 786                struct igt_live_test t;
 787                int err, err2;
 788
 789                if (!intel_engine_has_preemption(engine))
 790                        continue;
 791
 792                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 793                if (err)
 794                        return err;
 795
 796                err = __cancel_inactive(engine);
 797                if (err == 0)
 798                        err = __cancel_active(engine);
 799                if (err == 0)
 800                        err = __cancel_completed(engine);
 801
 802                err2 = igt_live_test_end(&t);
 803                if (err)
 804                        return err;
 805                if (err2)
 806                        return err2;
 807        }
 808
 809        return 0;
 810}
 811
 812static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 813{
 814        struct drm_i915_gem_object *obj;
 815        struct i915_vma *vma;
 816        u32 *cmd;
 817        int err;
 818
 819        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 820        if (IS_ERR(obj))
 821                return ERR_CAST(obj);
 822
 823        cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
 824        if (IS_ERR(cmd)) {
 825                err = PTR_ERR(cmd);
 826                goto err;
 827        }
 828
 829        *cmd = MI_BATCH_BUFFER_END;
 830
 831        __i915_gem_object_flush_map(obj, 0, 64);
 832        i915_gem_object_unpin_map(obj);
 833
 834        intel_gt_chipset_flush(&i915->gt);
 835
 836        vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 837        if (IS_ERR(vma)) {
 838                err = PTR_ERR(vma);
 839                goto err;
 840        }
 841
 842        err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
 843        if (err)
 844                goto err;
 845
 846        /* Force the wait wait now to avoid including it in the benchmark */
 847        err = i915_vma_sync(vma);
 848        if (err)
 849                goto err_pin;
 850
 851        return vma;
 852
 853err_pin:
 854        i915_vma_unpin(vma);
 855err:
 856        i915_gem_object_put(obj);
 857        return ERR_PTR(err);
 858}
 859
 860static struct i915_request *
 861empty_request(struct intel_engine_cs *engine,
 862              struct i915_vma *batch)
 863{
 864        struct i915_request *request;
 865        int err;
 866
 867        request = i915_request_create(engine->kernel_context);
 868        if (IS_ERR(request))
 869                return request;
 870
 871        err = engine->emit_bb_start(request,
 872                                    batch->node.start,
 873                                    batch->node.size,
 874                                    I915_DISPATCH_SECURE);
 875        if (err)
 876                goto out_request;
 877
 878        i915_request_get(request);
 879out_request:
 880        i915_request_add(request);
 881        return err ? ERR_PTR(err) : request;
 882}
 883
 884static int live_empty_request(void *arg)
 885{
 886        struct drm_i915_private *i915 = arg;
 887        struct intel_engine_cs *engine;
 888        struct igt_live_test t;
 889        struct i915_vma *batch;
 890        int err = 0;
 891
 892        /*
 893         * Submit various sized batches of empty requests, to each engine
 894         * (individually), and wait for the batch to complete. We can check
 895         * the overhead of submitting requests to the hardware.
 896         */
 897
 898        batch = empty_batch(i915);
 899        if (IS_ERR(batch))
 900                return PTR_ERR(batch);
 901
 902        for_each_uabi_engine(engine, i915) {
 903                IGT_TIMEOUT(end_time);
 904                struct i915_request *request;
 905                unsigned long n, prime;
 906                ktime_t times[2] = {};
 907
 908                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 909                if (err)
 910                        goto out_batch;
 911
 912                intel_engine_pm_get(engine);
 913
 914                /* Warmup / preload */
 915                request = empty_request(engine, batch);
 916                if (IS_ERR(request)) {
 917                        err = PTR_ERR(request);
 918                        intel_engine_pm_put(engine);
 919                        goto out_batch;
 920                }
 921                i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 922
 923                for_each_prime_number_from(prime, 1, 8192) {
 924                        times[1] = ktime_get_raw();
 925
 926                        for (n = 0; n < prime; n++) {
 927                                i915_request_put(request);
 928                                request = empty_request(engine, batch);
 929                                if (IS_ERR(request)) {
 930                                        err = PTR_ERR(request);
 931                                        intel_engine_pm_put(engine);
 932                                        goto out_batch;
 933                                }
 934                        }
 935                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 936
 937                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 938                        if (prime == 1)
 939                                times[0] = times[1];
 940
 941                        if (__igt_timeout(end_time, NULL))
 942                                break;
 943                }
 944                i915_request_put(request);
 945                intel_engine_pm_put(engine);
 946
 947                err = igt_live_test_end(&t);
 948                if (err)
 949                        goto out_batch;
 950
 951                pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 952                        engine->name,
 953                        ktime_to_ns(times[0]),
 954                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 955        }
 956
 957out_batch:
 958        i915_vma_unpin(batch);
 959        i915_vma_put(batch);
 960        return err;
 961}
 962
 963static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 964{
 965        struct drm_i915_gem_object *obj;
 966        const int ver = GRAPHICS_VER(i915);
 967        struct i915_vma *vma;
 968        u32 *cmd;
 969        int err;
 970
 971        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 972        if (IS_ERR(obj))
 973                return ERR_CAST(obj);
 974
 975        vma = i915_vma_instance(obj, i915->gt.vm, NULL);
 976        if (IS_ERR(vma)) {
 977                err = PTR_ERR(vma);
 978                goto err;
 979        }
 980
 981        err = i915_vma_pin(vma, 0, 0, PIN_USER);
 982        if (err)
 983                goto err;
 984
 985        cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
 986        if (IS_ERR(cmd)) {
 987                err = PTR_ERR(cmd);
 988                goto err;
 989        }
 990
 991        if (ver >= 8) {
 992                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 993                *cmd++ = lower_32_bits(vma->node.start);
 994                *cmd++ = upper_32_bits(vma->node.start);
 995        } else if (ver >= 6) {
 996                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
 997                *cmd++ = lower_32_bits(vma->node.start);
 998        } else {
 999                *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1000                *cmd++ = lower_32_bits(vma->node.start);
1001        }
1002        *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
1003
1004        __i915_gem_object_flush_map(obj, 0, 64);
1005        i915_gem_object_unpin_map(obj);
1006
1007        intel_gt_chipset_flush(&i915->gt);
1008
1009        return vma;
1010
1011err:
1012        i915_gem_object_put(obj);
1013        return ERR_PTR(err);
1014}
1015
1016static int recursive_batch_resolve(struct i915_vma *batch)
1017{
1018        u32 *cmd;
1019
1020        cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1021        if (IS_ERR(cmd))
1022                return PTR_ERR(cmd);
1023
1024        *cmd = MI_BATCH_BUFFER_END;
1025
1026        __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
1027        i915_gem_object_unpin_map(batch->obj);
1028
1029        intel_gt_chipset_flush(batch->vm->gt);
1030
1031        return 0;
1032}
1033
1034static int live_all_engines(void *arg)
1035{
1036        struct drm_i915_private *i915 = arg;
1037        const unsigned int nengines = num_uabi_engines(i915);
1038        struct intel_engine_cs *engine;
1039        struct i915_request **request;
1040        struct igt_live_test t;
1041        struct i915_vma *batch;
1042        unsigned int idx;
1043        int err;
1044
1045        /*
1046         * Check we can submit requests to all engines simultaneously. We
1047         * send a recursive batch to each engine - checking that we don't
1048         * block doing so, and that they don't complete too soon.
1049         */
1050
1051        request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1052        if (!request)
1053                return -ENOMEM;
1054
1055        err = igt_live_test_begin(&t, i915, __func__, "");
1056        if (err)
1057                goto out_free;
1058
1059        batch = recursive_batch(i915);
1060        if (IS_ERR(batch)) {
1061                err = PTR_ERR(batch);
1062                pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
1063                goto out_free;
1064        }
1065
1066        i915_vma_lock(batch);
1067
1068        idx = 0;
1069        for_each_uabi_engine(engine, i915) {
1070                request[idx] = intel_engine_create_kernel_request(engine);
1071                if (IS_ERR(request[idx])) {
1072                        err = PTR_ERR(request[idx]);
1073                        pr_err("%s: Request allocation failed with err=%d\n",
1074                               __func__, err);
1075                        goto out_request;
1076                }
1077
1078                err = i915_request_await_object(request[idx], batch->obj, 0);
1079                if (err == 0)
1080                        err = i915_vma_move_to_active(batch, request[idx], 0);
1081                GEM_BUG_ON(err);
1082
1083                err = engine->emit_bb_start(request[idx],
1084                                            batch->node.start,
1085                                            batch->node.size,
1086                                            0);
1087                GEM_BUG_ON(err);
1088                request[idx]->batch = batch;
1089
1090                i915_request_get(request[idx]);
1091                i915_request_add(request[idx]);
1092                idx++;
1093        }
1094
1095        i915_vma_unlock(batch);
1096
1097        idx = 0;
1098        for_each_uabi_engine(engine, i915) {
1099                if (i915_request_completed(request[idx])) {
1100                        pr_err("%s(%s): request completed too early!\n",
1101                               __func__, engine->name);
1102                        err = -EINVAL;
1103                        goto out_request;
1104                }
1105                idx++;
1106        }
1107
1108        err = recursive_batch_resolve(batch);
1109        if (err) {
1110                pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
1111                goto out_request;
1112        }
1113
1114        idx = 0;
1115        for_each_uabi_engine(engine, i915) {
1116                long timeout;
1117
1118                timeout = i915_request_wait(request[idx], 0,
1119                                            MAX_SCHEDULE_TIMEOUT);
1120                if (timeout < 0) {
1121                        err = timeout;
1122                        pr_err("%s: error waiting for request on %s, err=%d\n",
1123                               __func__, engine->name, err);
1124                        goto out_request;
1125                }
1126
1127                GEM_BUG_ON(!i915_request_completed(request[idx]));
1128                i915_request_put(request[idx]);
1129                request[idx] = NULL;
1130                idx++;
1131        }
1132
1133        err = igt_live_test_end(&t);
1134
1135out_request:
1136        idx = 0;
1137        for_each_uabi_engine(engine, i915) {
1138                if (request[idx])
1139                        i915_request_put(request[idx]);
1140                idx++;
1141        }
1142        i915_vma_unpin(batch);
1143        i915_vma_put(batch);
1144out_free:
1145        kfree(request);
1146        return err;
1147}
1148
1149static int live_sequential_engines(void *arg)
1150{
1151        struct drm_i915_private *i915 = arg;
1152        const unsigned int nengines = num_uabi_engines(i915);
1153        struct i915_request **request;
1154        struct i915_request *prev = NULL;
1155        struct intel_engine_cs *engine;
1156        struct igt_live_test t;
1157        unsigned int idx;
1158        int err;
1159
1160        /*
1161         * Check we can submit requests to all engines sequentially, such
1162         * that each successive request waits for the earlier ones. This
1163         * tests that we don't execute requests out of order, even though
1164         * they are running on independent engines.
1165         */
1166
1167        request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1168        if (!request)
1169                return -ENOMEM;
1170
1171        err = igt_live_test_begin(&t, i915, __func__, "");
1172        if (err)
1173                goto out_free;
1174
1175        idx = 0;
1176        for_each_uabi_engine(engine, i915) {
1177                struct i915_vma *batch;
1178
1179                batch = recursive_batch(i915);
1180                if (IS_ERR(batch)) {
1181                        err = PTR_ERR(batch);
1182                        pr_err("%s: Unable to create batch for %s, err=%d\n",
1183                               __func__, engine->name, err);
1184                        goto out_free;
1185                }
1186
1187                i915_vma_lock(batch);
1188                request[idx] = intel_engine_create_kernel_request(engine);
1189                if (IS_ERR(request[idx])) {
1190                        err = PTR_ERR(request[idx]);
1191                        pr_err("%s: Request allocation failed for %s with err=%d\n",
1192                               __func__, engine->name, err);
1193                        goto out_unlock;
1194                }
1195
1196                if (prev) {
1197                        err = i915_request_await_dma_fence(request[idx],
1198                                                           &prev->fence);
1199                        if (err) {
1200                                i915_request_add(request[idx]);
1201                                pr_err("%s: Request await failed for %s with err=%d\n",
1202                                       __func__, engine->name, err);
1203                                goto out_unlock;
1204                        }
1205                }
1206
1207                err = i915_request_await_object(request[idx],
1208                                                batch->obj, false);
1209                if (err == 0)
1210                        err = i915_vma_move_to_active(batch, request[idx], 0);
1211                GEM_BUG_ON(err);
1212
1213                err = engine->emit_bb_start(request[idx],
1214                                            batch->node.start,
1215                                            batch->node.size,
1216                                            0);
1217                GEM_BUG_ON(err);
1218                request[idx]->batch = batch;
1219
1220                i915_request_get(request[idx]);
1221                i915_request_add(request[idx]);
1222
1223                prev = request[idx];
1224                idx++;
1225
1226out_unlock:
1227                i915_vma_unlock(batch);
1228                if (err)
1229                        goto out_request;
1230        }
1231
1232        idx = 0;
1233        for_each_uabi_engine(engine, i915) {
1234                long timeout;
1235
1236                if (i915_request_completed(request[idx])) {
1237                        pr_err("%s(%s): request completed too early!\n",
1238                               __func__, engine->name);
1239                        err = -EINVAL;
1240                        goto out_request;
1241                }
1242
1243                err = recursive_batch_resolve(request[idx]->batch);
1244                if (err) {
1245                        pr_err("%s: failed to resolve batch, err=%d\n",
1246                               __func__, err);
1247                        goto out_request;
1248                }
1249
1250                timeout = i915_request_wait(request[idx], 0,
1251                                            MAX_SCHEDULE_TIMEOUT);
1252                if (timeout < 0) {
1253                        err = timeout;
1254                        pr_err("%s: error waiting for request on %s, err=%d\n",
1255                               __func__, engine->name, err);
1256                        goto out_request;
1257                }
1258
1259                GEM_BUG_ON(!i915_request_completed(request[idx]));
1260                idx++;
1261        }
1262
1263        err = igt_live_test_end(&t);
1264
1265out_request:
1266        idx = 0;
1267        for_each_uabi_engine(engine, i915) {
1268                u32 *cmd;
1269
1270                if (!request[idx])
1271                        break;
1272
1273                cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj,
1274                                                       I915_MAP_WC);
1275                if (!IS_ERR(cmd)) {
1276                        *cmd = MI_BATCH_BUFFER_END;
1277
1278                        __i915_gem_object_flush_map(request[idx]->batch->obj,
1279                                                    0, sizeof(*cmd));
1280                        i915_gem_object_unpin_map(request[idx]->batch->obj);
1281
1282                        intel_gt_chipset_flush(engine->gt);
1283                }
1284
1285                i915_vma_put(request[idx]->batch);
1286                i915_request_put(request[idx]);
1287                idx++;
1288        }
1289out_free:
1290        kfree(request);
1291        return err;
1292}
1293
1294static int __live_parallel_engine1(void *arg)
1295{
1296        struct intel_engine_cs *engine = arg;
1297        IGT_TIMEOUT(end_time);
1298        unsigned long count;
1299        int err = 0;
1300
1301        count = 0;
1302        intel_engine_pm_get(engine);
1303        do {
1304                struct i915_request *rq;
1305
1306                rq = i915_request_create(engine->kernel_context);
1307                if (IS_ERR(rq)) {
1308                        err = PTR_ERR(rq);
1309                        break;
1310                }
1311
1312                i915_request_get(rq);
1313                i915_request_add(rq);
1314
1315                err = 0;
1316                if (i915_request_wait(rq, 0, HZ) < 0)
1317                        err = -ETIME;
1318                i915_request_put(rq);
1319                if (err)
1320                        break;
1321
1322                count++;
1323        } while (!__igt_timeout(end_time, NULL));
1324        intel_engine_pm_put(engine);
1325
1326        pr_info("%s: %lu request + sync\n", engine->name, count);
1327        return err;
1328}
1329
1330static int __live_parallel_engineN(void *arg)
1331{
1332        struct intel_engine_cs *engine = arg;
1333        IGT_TIMEOUT(end_time);
1334        unsigned long count;
1335        int err = 0;
1336
1337        count = 0;
1338        intel_engine_pm_get(engine);
1339        do {
1340                struct i915_request *rq;
1341
1342                rq = i915_request_create(engine->kernel_context);
1343                if (IS_ERR(rq)) {
1344                        err = PTR_ERR(rq);
1345                        break;
1346                }
1347
1348                i915_request_add(rq);
1349                count++;
1350        } while (!__igt_timeout(end_time, NULL));
1351        intel_engine_pm_put(engine);
1352
1353        pr_info("%s: %lu requests\n", engine->name, count);
1354        return err;
1355}
1356
1357static bool wake_all(struct drm_i915_private *i915)
1358{
1359        if (atomic_dec_and_test(&i915->selftest.counter)) {
1360                wake_up_var(&i915->selftest.counter);
1361                return true;
1362        }
1363
1364        return false;
1365}
1366
1367static int wait_for_all(struct drm_i915_private *i915)
1368{
1369        if (wake_all(i915))
1370                return 0;
1371
1372        if (wait_var_event_timeout(&i915->selftest.counter,
1373                                   !atomic_read(&i915->selftest.counter),
1374                                   i915_selftest.timeout_jiffies))
1375                return 0;
1376
1377        return -ETIME;
1378}
1379
1380static int __live_parallel_spin(void *arg)
1381{
1382        struct intel_engine_cs *engine = arg;
1383        struct igt_spinner spin;
1384        struct i915_request *rq;
1385        int err = 0;
1386
1387        /*
1388         * Create a spinner running for eternity on each engine. If a second
1389         * spinner is incorrectly placed on the same engine, it will not be
1390         * able to start in time.
1391         */
1392
1393        if (igt_spinner_init(&spin, engine->gt)) {
1394                wake_all(engine->i915);
1395                return -ENOMEM;
1396        }
1397
1398        intel_engine_pm_get(engine);
1399        rq = igt_spinner_create_request(&spin,
1400                                        engine->kernel_context,
1401                                        MI_NOOP); /* no preemption */
1402        intel_engine_pm_put(engine);
1403        if (IS_ERR(rq)) {
1404                err = PTR_ERR(rq);
1405                if (err == -ENODEV)
1406                        err = 0;
1407                wake_all(engine->i915);
1408                goto out_spin;
1409        }
1410
1411        i915_request_get(rq);
1412        i915_request_add(rq);
1413        if (igt_wait_for_spinner(&spin, rq)) {
1414                /* Occupy this engine for the whole test */
1415                err = wait_for_all(engine->i915);
1416        } else {
1417                pr_err("Failed to start spinner on %s\n", engine->name);
1418                err = -EINVAL;
1419        }
1420        igt_spinner_end(&spin);
1421
1422        if (err == 0 && i915_request_wait(rq, 0, HZ) < 0)
1423                err = -EIO;
1424        i915_request_put(rq);
1425
1426out_spin:
1427        igt_spinner_fini(&spin);
1428        return err;
1429}
1430
1431static int live_parallel_engines(void *arg)
1432{
1433        struct drm_i915_private *i915 = arg;
1434        static int (* const func[])(void *arg) = {
1435                __live_parallel_engine1,
1436                __live_parallel_engineN,
1437                __live_parallel_spin,
1438                NULL,
1439        };
1440        const unsigned int nengines = num_uabi_engines(i915);
1441        struct intel_engine_cs *engine;
1442        int (* const *fn)(void *arg);
1443        struct task_struct **tsk;
1444        int err = 0;
1445
1446        /*
1447         * Check we can submit requests to all engines concurrently. This
1448         * tests that we load up the system maximally.
1449         */
1450
1451        tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1452        if (!tsk)
1453                return -ENOMEM;
1454
1455        for (fn = func; !err && *fn; fn++) {
1456                char name[KSYM_NAME_LEN];
1457                struct igt_live_test t;
1458                unsigned int idx;
1459
1460                snprintf(name, sizeof(name), "%ps", *fn);
1461                err = igt_live_test_begin(&t, i915, __func__, name);
1462                if (err)
1463                        break;
1464
1465                atomic_set(&i915->selftest.counter, nengines);
1466
1467                idx = 0;
1468                for_each_uabi_engine(engine, i915) {
1469                        tsk[idx] = kthread_run(*fn, engine,
1470                                               "igt/parallel:%s",
1471                                               engine->name);
1472                        if (IS_ERR(tsk[idx])) {
1473                                err = PTR_ERR(tsk[idx]);
1474                                break;
1475                        }
1476                        get_task_struct(tsk[idx++]);
1477                }
1478
1479                yield(); /* start all threads before we kthread_stop() */
1480
1481                idx = 0;
1482                for_each_uabi_engine(engine, i915) {
1483                        int status;
1484
1485                        if (IS_ERR(tsk[idx]))
1486                                break;
1487
1488                        status = kthread_stop(tsk[idx]);
1489                        if (status && !err)
1490                                err = status;
1491
1492                        put_task_struct(tsk[idx++]);
1493                }
1494
1495                if (igt_live_test_end(&t))
1496                        err = -EIO;
1497        }
1498
1499        kfree(tsk);
1500        return err;
1501}
1502
1503static int
1504max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1505{
1506        struct i915_request *rq;
1507        int ret;
1508
1509        /*
1510         * Before execlists, all contexts share the same ringbuffer. With
1511         * execlists, each context/engine has a separate ringbuffer and
1512         * for the purposes of this test, inexhaustible.
1513         *
1514         * For the global ringbuffer though, we have to be very careful
1515         * that we do not wrap while preventing the execution of requests
1516         * with a unsignaled fence.
1517         */
1518        if (HAS_EXECLISTS(ctx->i915))
1519                return INT_MAX;
1520
1521        rq = igt_request_alloc(ctx, engine);
1522        if (IS_ERR(rq)) {
1523                ret = PTR_ERR(rq);
1524        } else {
1525                int sz;
1526
1527                ret = rq->ring->size - rq->reserved_space;
1528                i915_request_add(rq);
1529
1530                sz = rq->ring->emit - rq->head;
1531                if (sz < 0)
1532                        sz += rq->ring->size;
1533                ret /= sz;
1534                ret /= 2; /* leave half spare, in case of emergency! */
1535        }
1536
1537        return ret;
1538}
1539
1540static int live_breadcrumbs_smoketest(void *arg)
1541{
1542        struct drm_i915_private *i915 = arg;
1543        const unsigned int nengines = num_uabi_engines(i915);
1544        const unsigned int ncpus = num_online_cpus();
1545        unsigned long num_waits, num_fences;
1546        struct intel_engine_cs *engine;
1547        struct task_struct **threads;
1548        struct igt_live_test live;
1549        intel_wakeref_t wakeref;
1550        struct smoketest *smoke;
1551        unsigned int n, idx;
1552        struct file *file;
1553        int ret = 0;
1554
1555        /*
1556         * Smoketest our breadcrumb/signal handling for requests across multiple
1557         * threads. A very simple test to only catch the most egregious of bugs.
1558         * See __igt_breadcrumbs_smoketest();
1559         *
1560         * On real hardware this time.
1561         */
1562
1563        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1564
1565        file = mock_file(i915);
1566        if (IS_ERR(file)) {
1567                ret = PTR_ERR(file);
1568                goto out_rpm;
1569        }
1570
1571        smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1572        if (!smoke) {
1573                ret = -ENOMEM;
1574                goto out_file;
1575        }
1576
1577        threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1578        if (!threads) {
1579                ret = -ENOMEM;
1580                goto out_smoke;
1581        }
1582
1583        smoke[0].request_alloc = __live_request_alloc;
1584        smoke[0].ncontexts = 64;
1585        smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1586                                    sizeof(*smoke[0].contexts),
1587                                    GFP_KERNEL);
1588        if (!smoke[0].contexts) {
1589                ret = -ENOMEM;
1590                goto out_threads;
1591        }
1592
1593        for (n = 0; n < smoke[0].ncontexts; n++) {
1594                smoke[0].contexts[n] = live_context(i915, file);
1595                if (IS_ERR(smoke[0].contexts[n])) {
1596                        ret = PTR_ERR(smoke[0].contexts[n]);
1597                        goto out_contexts;
1598                }
1599        }
1600
1601        ret = igt_live_test_begin(&live, i915, __func__, "");
1602        if (ret)
1603                goto out_contexts;
1604
1605        idx = 0;
1606        for_each_uabi_engine(engine, i915) {
1607                smoke[idx] = smoke[0];
1608                smoke[idx].engine = engine;
1609                smoke[idx].max_batch =
1610                        max_batches(smoke[0].contexts[0], engine);
1611                if (smoke[idx].max_batch < 0) {
1612                        ret = smoke[idx].max_batch;
1613                        goto out_flush;
1614                }
1615                /* One ring interleaved between requests from all cpus */
1616                smoke[idx].max_batch /= num_online_cpus() + 1;
1617                pr_debug("Limiting batches to %d requests on %s\n",
1618                         smoke[idx].max_batch, engine->name);
1619
1620                for (n = 0; n < ncpus; n++) {
1621                        struct task_struct *tsk;
1622
1623                        tsk = kthread_run(__igt_breadcrumbs_smoketest,
1624                                          &smoke[idx], "igt/%d.%d", idx, n);
1625                        if (IS_ERR(tsk)) {
1626                                ret = PTR_ERR(tsk);
1627                                goto out_flush;
1628                        }
1629
1630                        get_task_struct(tsk);
1631                        threads[idx * ncpus + n] = tsk;
1632                }
1633
1634                idx++;
1635        }
1636
1637        yield(); /* start all threads before we begin */
1638        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1639
1640out_flush:
1641        idx = 0;
1642        num_waits = 0;
1643        num_fences = 0;
1644        for_each_uabi_engine(engine, i915) {
1645                for (n = 0; n < ncpus; n++) {
1646                        struct task_struct *tsk = threads[idx * ncpus + n];
1647                        int err;
1648
1649                        if (!tsk)
1650                                continue;
1651
1652                        err = kthread_stop(tsk);
1653                        if (err < 0 && !ret)
1654                                ret = err;
1655
1656                        put_task_struct(tsk);
1657                }
1658
1659                num_waits += atomic_long_read(&smoke[idx].num_waits);
1660                num_fences += atomic_long_read(&smoke[idx].num_fences);
1661                idx++;
1662        }
1663        pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1664                num_waits, num_fences, idx, ncpus);
1665
1666        ret = igt_live_test_end(&live) ?: ret;
1667out_contexts:
1668        kfree(smoke[0].contexts);
1669out_threads:
1670        kfree(threads);
1671out_smoke:
1672        kfree(smoke);
1673out_file:
1674        fput(file);
1675out_rpm:
1676        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1677
1678        return ret;
1679}
1680
1681int i915_request_live_selftests(struct drm_i915_private *i915)
1682{
1683        static const struct i915_subtest tests[] = {
1684                SUBTEST(live_nop_request),
1685                SUBTEST(live_all_engines),
1686                SUBTEST(live_sequential_engines),
1687                SUBTEST(live_parallel_engines),
1688                SUBTEST(live_empty_request),
1689                SUBTEST(live_cancel_request),
1690                SUBTEST(live_breadcrumbs_smoketest),
1691        };
1692
1693        if (intel_gt_is_wedged(&i915->gt))
1694                return 0;
1695
1696        return i915_subtests(tests, i915);
1697}
1698
1699static int switch_to_kernel_sync(struct intel_context *ce, int err)
1700{
1701        struct i915_request *rq;
1702        struct dma_fence *fence;
1703
1704        rq = intel_engine_create_kernel_request(ce->engine);
1705        if (IS_ERR(rq))
1706                return PTR_ERR(rq);
1707
1708        fence = i915_active_fence_get(&ce->timeline->last_request);
1709        if (fence) {
1710                i915_request_await_dma_fence(rq, fence);
1711                dma_fence_put(fence);
1712        }
1713
1714        rq = i915_request_get(rq);
1715        i915_request_add(rq);
1716        if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
1717                err = -ETIME;
1718        i915_request_put(rq);
1719
1720        while (!err && !intel_engine_is_idle(ce->engine))
1721                intel_engine_flush_submission(ce->engine);
1722
1723        return err;
1724}
1725
1726struct perf_stats {
1727        struct intel_engine_cs *engine;
1728        unsigned long count;
1729        ktime_t time;
1730        ktime_t busy;
1731        u64 runtime;
1732};
1733
1734struct perf_series {
1735        struct drm_i915_private *i915;
1736        unsigned int nengines;
1737        struct intel_context *ce[];
1738};
1739
1740static int cmp_u32(const void *A, const void *B)
1741{
1742        const u32 *a = A, *b = B;
1743
1744        return *a - *b;
1745}
1746
1747static u32 trifilter(u32 *a)
1748{
1749        u64 sum;
1750
1751#define TF_COUNT 5
1752        sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL);
1753
1754        sum = mul_u32_u32(a[2], 2);
1755        sum += a[1];
1756        sum += a[3];
1757
1758        GEM_BUG_ON(sum > U32_MAX);
1759        return sum;
1760#define TF_BIAS 2
1761}
1762
1763static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles)
1764{
1765        u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles);
1766
1767        return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS);
1768}
1769
1770static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset)
1771{
1772        *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1773        *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base)));
1774        *cs++ = offset;
1775        *cs++ = 0;
1776
1777        return cs;
1778}
1779
1780static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value)
1781{
1782        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1783        *cs++ = offset;
1784        *cs++ = 0;
1785        *cs++ = value;
1786
1787        return cs;
1788}
1789
1790static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset)
1791{
1792        *cs++ = MI_SEMAPHORE_WAIT |
1793                MI_SEMAPHORE_GLOBAL_GTT |
1794                MI_SEMAPHORE_POLL |
1795                mode;
1796        *cs++ = value;
1797        *cs++ = offset;
1798        *cs++ = 0;
1799
1800        return cs;
1801}
1802
1803static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value)
1804{
1805        return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset);
1806}
1807
1808static void semaphore_set(u32 *sema, u32 value)
1809{
1810        WRITE_ONCE(*sema, value);
1811        wmb(); /* flush the update to the cache, and beyond */
1812}
1813
1814static u32 *hwsp_scratch(const struct intel_context *ce)
1815{
1816        return memset32(ce->engine->status_page.addr + 1000, 0, 21);
1817}
1818
1819static u32 hwsp_offset(const struct intel_context *ce, u32 *dw)
1820{
1821        return (i915_ggtt_offset(ce->engine->status_page.vma) +
1822                offset_in_page(dw));
1823}
1824
1825static int measure_semaphore_response(struct intel_context *ce)
1826{
1827        u32 *sema = hwsp_scratch(ce);
1828        const u32 offset = hwsp_offset(ce, sema);
1829        u32 elapsed[TF_COUNT], cycles;
1830        struct i915_request *rq;
1831        u32 *cs;
1832        int err;
1833        int i;
1834
1835        /*
1836         * Measure how many cycles it takes for the HW to detect the change
1837         * in a semaphore value.
1838         *
1839         *    A: read CS_TIMESTAMP from CPU
1840         *    poke semaphore
1841         *    B: read CS_TIMESTAMP on GPU
1842         *
1843         * Semaphore latency: B - A
1844         */
1845
1846        semaphore_set(sema, -1);
1847
1848        rq = i915_request_create(ce);
1849        if (IS_ERR(rq))
1850                return PTR_ERR(rq);
1851
1852        cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed));
1853        if (IS_ERR(cs)) {
1854                i915_request_add(rq);
1855                err = PTR_ERR(cs);
1856                goto err;
1857        }
1858
1859        cs = emit_store_dw(cs, offset, 0);
1860        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
1861                cs = emit_semaphore_poll_until(cs, offset, i);
1862                cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
1863                cs = emit_store_dw(cs, offset, 0);
1864        }
1865
1866        intel_ring_advance(rq, cs);
1867        i915_request_add(rq);
1868
1869        if (wait_for(READ_ONCE(*sema) == 0, 50)) {
1870                err = -EIO;
1871                goto err;
1872        }
1873
1874        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
1875                preempt_disable();
1876                cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
1877                semaphore_set(sema, i);
1878                preempt_enable();
1879
1880                if (wait_for(READ_ONCE(*sema) == 0, 50)) {
1881                        err = -EIO;
1882                        goto err;
1883                }
1884
1885                elapsed[i - 1] = sema[i] - cycles;
1886        }
1887
1888        cycles = trifilter(elapsed);
1889        pr_info("%s: semaphore response %d cycles, %lluns\n",
1890                ce->engine->name, cycles >> TF_BIAS,
1891                cycles_to_ns(ce->engine, cycles));
1892
1893        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
1894
1895err:
1896        intel_gt_set_wedged(ce->engine->gt);
1897        return err;
1898}
1899
1900static int measure_idle_dispatch(struct intel_context *ce)
1901{
1902        u32 *sema = hwsp_scratch(ce);
1903        const u32 offset = hwsp_offset(ce, sema);
1904        u32 elapsed[TF_COUNT], cycles;
1905        u32 *cs;
1906        int err;
1907        int i;
1908
1909        /*
1910         * Measure how long it takes for us to submit a request while the
1911         * engine is idle, but is resting in our context.
1912         *
1913         *    A: read CS_TIMESTAMP from CPU
1914         *    submit request
1915         *    B: read CS_TIMESTAMP on GPU
1916         *
1917         * Submission latency: B - A
1918         */
1919
1920        for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
1921                struct i915_request *rq;
1922
1923                err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
1924                if (err)
1925                        return err;
1926
1927                rq = i915_request_create(ce);
1928                if (IS_ERR(rq)) {
1929                        err = PTR_ERR(rq);
1930                        goto err;
1931                }
1932
1933                cs = intel_ring_begin(rq, 4);
1934                if (IS_ERR(cs)) {
1935                        i915_request_add(rq);
1936                        err = PTR_ERR(cs);
1937                        goto err;
1938                }
1939
1940                cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
1941
1942                intel_ring_advance(rq, cs);
1943
1944                preempt_disable();
1945                local_bh_disable();
1946                elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
1947                i915_request_add(rq);
1948                local_bh_enable();
1949                preempt_enable();
1950        }
1951
1952        err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
1953        if (err)
1954                goto err;
1955
1956        for (i = 0; i < ARRAY_SIZE(elapsed); i++)
1957                elapsed[i] = sema[i] - elapsed[i];
1958
1959        cycles = trifilter(elapsed);
1960        pr_info("%s: idle dispatch latency %d cycles, %lluns\n",
1961                ce->engine->name, cycles >> TF_BIAS,
1962                cycles_to_ns(ce->engine, cycles));
1963
1964        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
1965
1966err:
1967        intel_gt_set_wedged(ce->engine->gt);
1968        return err;
1969}
1970
1971static int measure_busy_dispatch(struct intel_context *ce)
1972{
1973        u32 *sema = hwsp_scratch(ce);
1974        const u32 offset = hwsp_offset(ce, sema);
1975        u32 elapsed[TF_COUNT + 1], cycles;
1976        u32 *cs;
1977        int err;
1978        int i;
1979
1980        /*
1981         * Measure how long it takes for us to submit a request while the
1982         * engine is busy, polling on a semaphore in our context. With
1983         * direct submission, this will include the cost of a lite restore.
1984         *
1985         *    A: read CS_TIMESTAMP from CPU
1986         *    submit request
1987         *    B: read CS_TIMESTAMP on GPU
1988         *
1989         * Submission latency: B - A
1990         */
1991
1992        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
1993                struct i915_request *rq;
1994
1995                rq = i915_request_create(ce);
1996                if (IS_ERR(rq)) {
1997                        err = PTR_ERR(rq);
1998                        goto err;
1999                }
2000
2001                cs = intel_ring_begin(rq, 12);
2002                if (IS_ERR(cs)) {
2003                        i915_request_add(rq);
2004                        err = PTR_ERR(cs);
2005                        goto err;
2006                }
2007
2008                cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2009                cs = emit_semaphore_poll_until(cs, offset, i);
2010                cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2011
2012                intel_ring_advance(rq, cs);
2013
2014                if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) {
2015                        err = -EIO;
2016                        goto err;
2017                }
2018
2019                preempt_disable();
2020                local_bh_disable();
2021                elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2022                i915_request_add(rq);
2023                local_bh_enable();
2024                semaphore_set(sema, i - 1);
2025                preempt_enable();
2026        }
2027
2028        wait_for(READ_ONCE(sema[i - 1]), 500);
2029        semaphore_set(sema, i - 1);
2030
2031        for (i = 1; i <= TF_COUNT; i++) {
2032                GEM_BUG_ON(sema[i] == -1);
2033                elapsed[i - 1] = sema[i] - elapsed[i];
2034        }
2035
2036        cycles = trifilter(elapsed);
2037        pr_info("%s: busy dispatch latency %d cycles, %lluns\n",
2038                ce->engine->name, cycles >> TF_BIAS,
2039                cycles_to_ns(ce->engine, cycles));
2040
2041        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2042
2043err:
2044        intel_gt_set_wedged(ce->engine->gt);
2045        return err;
2046}
2047
2048static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value)
2049{
2050        const u32 offset =
2051                i915_ggtt_offset(engine->status_page.vma) +
2052                offset_in_page(sema);
2053        struct i915_request *rq;
2054        u32 *cs;
2055
2056        rq = i915_request_create(engine->kernel_context);
2057        if (IS_ERR(rq))
2058                return PTR_ERR(rq);
2059
2060        cs = intel_ring_begin(rq, 4);
2061        if (IS_ERR(cs)) {
2062                i915_request_add(rq);
2063                return PTR_ERR(cs);
2064        }
2065
2066        cs = emit_semaphore_poll(cs, mode, value, offset);
2067
2068        intel_ring_advance(rq, cs);
2069        i915_request_add(rq);
2070
2071        return 0;
2072}
2073
2074static int measure_inter_request(struct intel_context *ce)
2075{
2076        u32 *sema = hwsp_scratch(ce);
2077        const u32 offset = hwsp_offset(ce, sema);
2078        u32 elapsed[TF_COUNT + 1], cycles;
2079        struct i915_sw_fence *submit;
2080        int i, err;
2081
2082        /*
2083         * Measure how long it takes to advance from one request into the
2084         * next. Between each request we flush the GPU caches to memory,
2085         * update the breadcrumbs, and then invalidate those caches.
2086         * We queue up all the requests to be submitted in one batch so
2087         * it should be one set of contiguous measurements.
2088         *
2089         *    A: read CS_TIMESTAMP on GPU
2090         *    advance request
2091         *    B: read CS_TIMESTAMP on GPU
2092         *
2093         * Request latency: B - A
2094         */
2095
2096        err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2097        if (err)
2098                return err;
2099
2100        submit = heap_fence_create(GFP_KERNEL);
2101        if (!submit) {
2102                semaphore_set(sema, 1);
2103                return -ENOMEM;
2104        }
2105
2106        intel_engine_flush_submission(ce->engine);
2107        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2108                struct i915_request *rq;
2109                u32 *cs;
2110
2111                rq = i915_request_create(ce);
2112                if (IS_ERR(rq)) {
2113                        err = PTR_ERR(rq);
2114                        goto err_submit;
2115                }
2116
2117                err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
2118                                                       submit,
2119                                                       GFP_KERNEL);
2120                if (err < 0) {
2121                        i915_request_add(rq);
2122                        goto err_submit;
2123                }
2124
2125                cs = intel_ring_begin(rq, 4);
2126                if (IS_ERR(cs)) {
2127                        i915_request_add(rq);
2128                        err = PTR_ERR(cs);
2129                        goto err_submit;
2130                }
2131
2132                cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2133
2134                intel_ring_advance(rq, cs);
2135                i915_request_add(rq);
2136        }
2137        i915_sw_fence_commit(submit);
2138        intel_engine_flush_submission(ce->engine);
2139        heap_fence_put(submit);
2140
2141        semaphore_set(sema, 1);
2142        err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2143        if (err)
2144                goto err;
2145
2146        for (i = 1; i <= TF_COUNT; i++)
2147                elapsed[i - 1] = sema[i + 1] - sema[i];
2148
2149        cycles = trifilter(elapsed);
2150        pr_info("%s: inter-request latency %d cycles, %lluns\n",
2151                ce->engine->name, cycles >> TF_BIAS,
2152                cycles_to_ns(ce->engine, cycles));
2153
2154        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2155
2156err_submit:
2157        i915_sw_fence_commit(submit);
2158        heap_fence_put(submit);
2159        semaphore_set(sema, 1);
2160err:
2161        intel_gt_set_wedged(ce->engine->gt);
2162        return err;
2163}
2164
2165static int measure_context_switch(struct intel_context *ce)
2166{
2167        u32 *sema = hwsp_scratch(ce);
2168        const u32 offset = hwsp_offset(ce, sema);
2169        struct i915_request *fence = NULL;
2170        u32 elapsed[TF_COUNT + 1], cycles;
2171        int i, j, err;
2172        u32 *cs;
2173
2174        /*
2175         * Measure how long it takes to advance from one request in one
2176         * context to a request in another context. This allows us to
2177         * measure how long the context save/restore take, along with all
2178         * the inter-context setup we require.
2179         *
2180         *    A: read CS_TIMESTAMP on GPU
2181         *    switch context
2182         *    B: read CS_TIMESTAMP on GPU
2183         *
2184         * Context switch latency: B - A
2185         */
2186
2187        err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2188        if (err)
2189                return err;
2190
2191        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2192                struct intel_context *arr[] = {
2193                        ce, ce->engine->kernel_context
2194                };
2195                u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32);
2196
2197                for (j = 0; j < ARRAY_SIZE(arr); j++) {
2198                        struct i915_request *rq;
2199
2200                        rq = i915_request_create(arr[j]);
2201                        if (IS_ERR(rq)) {
2202                                err = PTR_ERR(rq);
2203                                goto err_fence;
2204                        }
2205
2206                        if (fence) {
2207                                err = i915_request_await_dma_fence(rq,
2208                                                                   &fence->fence);
2209                                if (err) {
2210                                        i915_request_add(rq);
2211                                        goto err_fence;
2212                                }
2213                        }
2214
2215                        cs = intel_ring_begin(rq, 4);
2216                        if (IS_ERR(cs)) {
2217                                i915_request_add(rq);
2218                                err = PTR_ERR(cs);
2219                                goto err_fence;
2220                        }
2221
2222                        cs = emit_timestamp_store(cs, ce, addr);
2223                        addr += sizeof(u32);
2224
2225                        intel_ring_advance(rq, cs);
2226
2227                        i915_request_put(fence);
2228                        fence = i915_request_get(rq);
2229
2230                        i915_request_add(rq);
2231                }
2232        }
2233        i915_request_put(fence);
2234        intel_engine_flush_submission(ce->engine);
2235
2236        semaphore_set(sema, 1);
2237        err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2238        if (err)
2239                goto err;
2240
2241        for (i = 1; i <= TF_COUNT; i++)
2242                elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1];
2243
2244        cycles = trifilter(elapsed);
2245        pr_info("%s: context switch latency %d cycles, %lluns\n",
2246                ce->engine->name, cycles >> TF_BIAS,
2247                cycles_to_ns(ce->engine, cycles));
2248
2249        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2250
2251err_fence:
2252        i915_request_put(fence);
2253        semaphore_set(sema, 1);
2254err:
2255        intel_gt_set_wedged(ce->engine->gt);
2256        return err;
2257}
2258
2259static int measure_preemption(struct intel_context *ce)
2260{
2261        u32 *sema = hwsp_scratch(ce);
2262        const u32 offset = hwsp_offset(ce, sema);
2263        u32 elapsed[TF_COUNT], cycles;
2264        u32 *cs;
2265        int err;
2266        int i;
2267
2268        /*
2269         * We measure two latencies while triggering preemption. The first
2270         * latency is how long it takes for us to submit a preempting request.
2271         * The second latency is how it takes for us to return from the
2272         * preemption back to the original context.
2273         *
2274         *    A: read CS_TIMESTAMP from CPU
2275         *    submit preemption
2276         *    B: read CS_TIMESTAMP on GPU (in preempting context)
2277         *    context switch
2278         *    C: read CS_TIMESTAMP on GPU (in original context)
2279         *
2280         * Preemption dispatch latency: B - A
2281         * Preemption switch latency: C - B
2282         */
2283
2284        if (!intel_engine_has_preemption(ce->engine))
2285                return 0;
2286
2287        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2288                u32 addr = offset + 2 * i * sizeof(u32);
2289                struct i915_request *rq;
2290
2291                rq = i915_request_create(ce);
2292                if (IS_ERR(rq)) {
2293                        err = PTR_ERR(rq);
2294                        goto err;
2295                }
2296
2297                cs = intel_ring_begin(rq, 12);
2298                if (IS_ERR(cs)) {
2299                        i915_request_add(rq);
2300                        err = PTR_ERR(cs);
2301                        goto err;
2302                }
2303
2304                cs = emit_store_dw(cs, addr, -1);
2305                cs = emit_semaphore_poll_until(cs, offset, i);
2306                cs = emit_timestamp_store(cs, ce, addr + sizeof(u32));
2307
2308                intel_ring_advance(rq, cs);
2309                i915_request_add(rq);
2310
2311                if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) {
2312                        err = -EIO;
2313                        goto err;
2314                }
2315
2316                rq = i915_request_create(ce->engine->kernel_context);
2317                if (IS_ERR(rq)) {
2318                        err = PTR_ERR(rq);
2319                        goto err;
2320                }
2321
2322                cs = intel_ring_begin(rq, 8);
2323                if (IS_ERR(cs)) {
2324                        i915_request_add(rq);
2325                        err = PTR_ERR(cs);
2326                        goto err;
2327                }
2328
2329                cs = emit_timestamp_store(cs, ce, addr);
2330                cs = emit_store_dw(cs, offset, i);
2331
2332                intel_ring_advance(rq, cs);
2333                rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2334
2335                elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2336                i915_request_add(rq);
2337        }
2338
2339        if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) {
2340                err = -EIO;
2341                goto err;
2342        }
2343
2344        for (i = 1; i <= TF_COUNT; i++)
2345                elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1];
2346
2347        cycles = trifilter(elapsed);
2348        pr_info("%s: preemption dispatch latency %d cycles, %lluns\n",
2349                ce->engine->name, cycles >> TF_BIAS,
2350                cycles_to_ns(ce->engine, cycles));
2351
2352        for (i = 1; i <= TF_COUNT; i++)
2353                elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0];
2354
2355        cycles = trifilter(elapsed);
2356        pr_info("%s: preemption switch latency %d cycles, %lluns\n",
2357                ce->engine->name, cycles >> TF_BIAS,
2358                cycles_to_ns(ce->engine, cycles));
2359
2360        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2361
2362err:
2363        intel_gt_set_wedged(ce->engine->gt);
2364        return err;
2365}
2366
2367struct signal_cb {
2368        struct dma_fence_cb base;
2369        bool seen;
2370};
2371
2372static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
2373{
2374        struct signal_cb *s = container_of(cb, typeof(*s), base);
2375
2376        smp_store_mb(s->seen, true); /* be safe, be strong */
2377}
2378
2379static int measure_completion(struct intel_context *ce)
2380{
2381        u32 *sema = hwsp_scratch(ce);
2382        const u32 offset = hwsp_offset(ce, sema);
2383        u32 elapsed[TF_COUNT], cycles;
2384        u32 *cs;
2385        int err;
2386        int i;
2387
2388        /*
2389         * Measure how long it takes for the signal (interrupt) to be
2390         * sent from the GPU to be processed by the CPU.
2391         *
2392         *    A: read CS_TIMESTAMP on GPU
2393         *    signal
2394         *    B: read CS_TIMESTAMP from CPU
2395         *
2396         * Completion latency: B - A
2397         */
2398
2399        for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2400                struct signal_cb cb = { .seen = false };
2401                struct i915_request *rq;
2402
2403                rq = i915_request_create(ce);
2404                if (IS_ERR(rq)) {
2405                        err = PTR_ERR(rq);
2406                        goto err;
2407                }
2408
2409                cs = intel_ring_begin(rq, 12);
2410                if (IS_ERR(cs)) {
2411                        i915_request_add(rq);
2412                        err = PTR_ERR(cs);
2413                        goto err;
2414                }
2415
2416                cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2417                cs = emit_semaphore_poll_until(cs, offset, i);
2418                cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2419
2420                intel_ring_advance(rq, cs);
2421
2422                dma_fence_add_callback(&rq->fence, &cb.base, signal_cb);
2423                i915_request_add(rq);
2424
2425                intel_engine_flush_submission(ce->engine);
2426                if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
2427                        err = -EIO;
2428                        goto err;
2429                }
2430
2431                preempt_disable();
2432                semaphore_set(sema, i);
2433                while (!READ_ONCE(cb.seen))
2434                        cpu_relax();
2435
2436                elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2437                preempt_enable();
2438        }
2439
2440        err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2441        if (err)
2442                goto err;
2443
2444        for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2445                GEM_BUG_ON(sema[i + 1] == -1);
2446                elapsed[i] = elapsed[i] - sema[i + 1];
2447        }
2448
2449        cycles = trifilter(elapsed);
2450        pr_info("%s: completion latency %d cycles, %lluns\n",
2451                ce->engine->name, cycles >> TF_BIAS,
2452                cycles_to_ns(ce->engine, cycles));
2453
2454        return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2455
2456err:
2457        intel_gt_set_wedged(ce->engine->gt);
2458        return err;
2459}
2460
2461static void rps_pin(struct intel_gt *gt)
2462{
2463        /* Pin the frequency to max */
2464        atomic_inc(&gt->rps.num_waiters);
2465        intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
2466
2467        mutex_lock(&gt->rps.lock);
2468        intel_rps_set(&gt->rps, gt->rps.max_freq);
2469        mutex_unlock(&gt->rps.lock);
2470}
2471
2472static void rps_unpin(struct intel_gt *gt)
2473{
2474        intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
2475        atomic_dec(&gt->rps.num_waiters);
2476}
2477
2478static int perf_request_latency(void *arg)
2479{
2480        struct drm_i915_private *i915 = arg;
2481        struct intel_engine_cs *engine;
2482        struct pm_qos_request qos;
2483        int err = 0;
2484
2485        if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */
2486                return 0;
2487
2488        cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2489
2490        for_each_uabi_engine(engine, i915) {
2491                struct intel_context *ce;
2492
2493                ce = intel_context_create(engine);
2494                if (IS_ERR(ce)) {
2495                        err = PTR_ERR(ce);
2496                        goto out;
2497                }
2498
2499                err = intel_context_pin(ce);
2500                if (err) {
2501                        intel_context_put(ce);
2502                        goto out;
2503                }
2504
2505                st_engine_heartbeat_disable(engine);
2506                rps_pin(engine->gt);
2507
2508                if (err == 0)
2509                        err = measure_semaphore_response(ce);
2510                if (err == 0)
2511                        err = measure_idle_dispatch(ce);
2512                if (err == 0)
2513                        err = measure_busy_dispatch(ce);
2514                if (err == 0)
2515                        err = measure_inter_request(ce);
2516                if (err == 0)
2517                        err = measure_context_switch(ce);
2518                if (err == 0)
2519                        err = measure_preemption(ce);
2520                if (err == 0)
2521                        err = measure_completion(ce);
2522
2523                rps_unpin(engine->gt);
2524                st_engine_heartbeat_enable(engine);
2525
2526                intel_context_unpin(ce);
2527                intel_context_put(ce);
2528                if (err)
2529                        goto out;
2530        }
2531
2532out:
2533        if (igt_flush_test(i915))
2534                err = -EIO;
2535
2536        cpu_latency_qos_remove_request(&qos);
2537        return err;
2538}
2539
2540static int s_sync0(void *arg)
2541{
2542        struct perf_series *ps = arg;
2543        IGT_TIMEOUT(end_time);
2544        unsigned int idx = 0;
2545        int err = 0;
2546
2547        GEM_BUG_ON(!ps->nengines);
2548        do {
2549                struct i915_request *rq;
2550
2551                rq = i915_request_create(ps->ce[idx]);
2552                if (IS_ERR(rq)) {
2553                        err = PTR_ERR(rq);
2554                        break;
2555                }
2556
2557                i915_request_get(rq);
2558                i915_request_add(rq);
2559
2560                if (i915_request_wait(rq, 0, HZ / 5) < 0)
2561                        err = -ETIME;
2562                i915_request_put(rq);
2563                if (err)
2564                        break;
2565
2566                if (++idx == ps->nengines)
2567                        idx = 0;
2568        } while (!__igt_timeout(end_time, NULL));
2569
2570        return err;
2571}
2572
2573static int s_sync1(void *arg)
2574{
2575        struct perf_series *ps = arg;
2576        struct i915_request *prev = NULL;
2577        IGT_TIMEOUT(end_time);
2578        unsigned int idx = 0;
2579        int err = 0;
2580
2581        GEM_BUG_ON(!ps->nengines);
2582        do {
2583                struct i915_request *rq;
2584
2585                rq = i915_request_create(ps->ce[idx]);
2586                if (IS_ERR(rq)) {
2587                        err = PTR_ERR(rq);
2588                        break;
2589                }
2590
2591                i915_request_get(rq);
2592                i915_request_add(rq);
2593
2594                if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
2595                        err = -ETIME;
2596                i915_request_put(prev);
2597                prev = rq;
2598                if (err)
2599                        break;
2600
2601                if (++idx == ps->nengines)
2602                        idx = 0;
2603        } while (!__igt_timeout(end_time, NULL));
2604        i915_request_put(prev);
2605
2606        return err;
2607}
2608
2609static int s_many(void *arg)
2610{
2611        struct perf_series *ps = arg;
2612        IGT_TIMEOUT(end_time);
2613        unsigned int idx = 0;
2614
2615        GEM_BUG_ON(!ps->nengines);
2616        do {
2617                struct i915_request *rq;
2618
2619                rq = i915_request_create(ps->ce[idx]);
2620                if (IS_ERR(rq))
2621                        return PTR_ERR(rq);
2622
2623                i915_request_add(rq);
2624
2625                if (++idx == ps->nengines)
2626                        idx = 0;
2627        } while (!__igt_timeout(end_time, NULL));
2628
2629        return 0;
2630}
2631
2632static int perf_series_engines(void *arg)
2633{
2634        struct drm_i915_private *i915 = arg;
2635        static int (* const func[])(void *arg) = {
2636                s_sync0,
2637                s_sync1,
2638                s_many,
2639                NULL,
2640        };
2641        const unsigned int nengines = num_uabi_engines(i915);
2642        struct intel_engine_cs *engine;
2643        int (* const *fn)(void *arg);
2644        struct pm_qos_request qos;
2645        struct perf_stats *stats;
2646        struct perf_series *ps;
2647        unsigned int idx;
2648        int err = 0;
2649
2650        stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
2651        if (!stats)
2652                return -ENOMEM;
2653
2654        ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
2655        if (!ps) {
2656                kfree(stats);
2657                return -ENOMEM;
2658        }
2659
2660        cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2661
2662        ps->i915 = i915;
2663        ps->nengines = nengines;
2664
2665        idx = 0;
2666        for_each_uabi_engine(engine, i915) {
2667                struct intel_context *ce;
2668
2669                ce = intel_context_create(engine);
2670                if (IS_ERR(ce)) {
2671                        err = PTR_ERR(ce);
2672                        goto out;
2673                }
2674
2675                err = intel_context_pin(ce);
2676                if (err) {
2677                        intel_context_put(ce);
2678                        goto out;
2679                }
2680
2681                ps->ce[idx++] = ce;
2682        }
2683        GEM_BUG_ON(idx != ps->nengines);
2684
2685        for (fn = func; *fn && !err; fn++) {
2686                char name[KSYM_NAME_LEN];
2687                struct igt_live_test t;
2688
2689                snprintf(name, sizeof(name), "%ps", *fn);
2690                err = igt_live_test_begin(&t, i915, __func__, name);
2691                if (err)
2692                        break;
2693
2694                for (idx = 0; idx < nengines; idx++) {
2695                        struct perf_stats *p =
2696                                memset(&stats[idx], 0, sizeof(stats[idx]));
2697                        struct intel_context *ce = ps->ce[idx];
2698
2699                        p->engine = ps->ce[idx]->engine;
2700                        intel_engine_pm_get(p->engine);
2701
2702                        if (intel_engine_supports_stats(p->engine))
2703                                p->busy = intel_engine_get_busy_time(p->engine,
2704                                                                     &p->time) + 1;
2705                        else
2706                                p->time = ktime_get();
2707                        p->runtime = -intel_context_get_total_runtime_ns(ce);
2708                }
2709
2710                err = (*fn)(ps);
2711                if (igt_live_test_end(&t))
2712                        err = -EIO;
2713
2714                for (idx = 0; idx < nengines; idx++) {
2715                        struct perf_stats *p = &stats[idx];
2716                        struct intel_context *ce = ps->ce[idx];
2717                        int integer, decimal;
2718                        u64 busy, dt, now;
2719
2720                        if (p->busy)
2721                                p->busy = ktime_sub(intel_engine_get_busy_time(p->engine,
2722                                                                               &now),
2723                                                    p->busy - 1);
2724                        else
2725                                now = ktime_get();
2726                        p->time = ktime_sub(now, p->time);
2727
2728                        err = switch_to_kernel_sync(ce, err);
2729                        p->runtime += intel_context_get_total_runtime_ns(ce);
2730                        intel_engine_pm_put(p->engine);
2731
2732                        busy = 100 * ktime_to_ns(p->busy);
2733                        dt = ktime_to_ns(p->time);
2734                        if (dt) {
2735                                integer = div64_u64(busy, dt);
2736                                busy -= integer * dt;
2737                                decimal = div64_u64(100 * busy, dt);
2738                        } else {
2739                                integer = 0;
2740                                decimal = 0;
2741                        }
2742
2743                        pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2744                                name, p->engine->name, ce->timeline->seqno,
2745                                integer, decimal,
2746                                div_u64(p->runtime, 1000 * 1000),
2747                                div_u64(ktime_to_ns(p->time), 1000 * 1000));
2748                }
2749        }
2750
2751out:
2752        for (idx = 0; idx < nengines; idx++) {
2753                if (IS_ERR_OR_NULL(ps->ce[idx]))
2754                        break;
2755
2756                intel_context_unpin(ps->ce[idx]);
2757                intel_context_put(ps->ce[idx]);
2758        }
2759        kfree(ps);
2760
2761        cpu_latency_qos_remove_request(&qos);
2762        kfree(stats);
2763        return err;
2764}
2765
2766static int p_sync0(void *arg)
2767{
2768        struct perf_stats *p = arg;
2769        struct intel_engine_cs *engine = p->engine;
2770        struct intel_context *ce;
2771        IGT_TIMEOUT(end_time);
2772        unsigned long count;
2773        bool busy;
2774        int err = 0;
2775
2776        ce = intel_context_create(engine);
2777        if (IS_ERR(ce))
2778                return PTR_ERR(ce);
2779
2780        err = intel_context_pin(ce);
2781        if (err) {
2782                intel_context_put(ce);
2783                return err;
2784        }
2785
2786        if (intel_engine_supports_stats(engine)) {
2787                p->busy = intel_engine_get_busy_time(engine, &p->time);
2788                busy = true;
2789        } else {
2790                p->time = ktime_get();
2791                busy = false;
2792        }
2793
2794        count = 0;
2795        do {
2796                struct i915_request *rq;
2797
2798                rq = i915_request_create(ce);
2799                if (IS_ERR(rq)) {
2800                        err = PTR_ERR(rq);
2801                        break;
2802                }
2803
2804                i915_request_get(rq);
2805                i915_request_add(rq);
2806
2807                err = 0;
2808                if (i915_request_wait(rq, 0, HZ / 5) < 0)
2809                        err = -ETIME;
2810                i915_request_put(rq);
2811                if (err)
2812                        break;
2813
2814                count++;
2815        } while (!__igt_timeout(end_time, NULL));
2816
2817        if (busy) {
2818                ktime_t now;
2819
2820                p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2821                                    p->busy);
2822                p->time = ktime_sub(now, p->time);
2823        } else {
2824                p->time = ktime_sub(ktime_get(), p->time);
2825        }
2826
2827        err = switch_to_kernel_sync(ce, err);
2828        p->runtime = intel_context_get_total_runtime_ns(ce);
2829        p->count = count;
2830
2831        intel_context_unpin(ce);
2832        intel_context_put(ce);
2833        return err;
2834}
2835
2836static int p_sync1(void *arg)
2837{
2838        struct perf_stats *p = arg;
2839        struct intel_engine_cs *engine = p->engine;
2840        struct i915_request *prev = NULL;
2841        struct intel_context *ce;
2842        IGT_TIMEOUT(end_time);
2843        unsigned long count;
2844        bool busy;
2845        int err = 0;
2846
2847        ce = intel_context_create(engine);
2848        if (IS_ERR(ce))
2849                return PTR_ERR(ce);
2850
2851        err = intel_context_pin(ce);
2852        if (err) {
2853                intel_context_put(ce);
2854                return err;
2855        }
2856
2857        if (intel_engine_supports_stats(engine)) {
2858                p->busy = intel_engine_get_busy_time(engine, &p->time);
2859                busy = true;
2860        } else {
2861                p->time = ktime_get();
2862                busy = false;
2863        }
2864
2865        count = 0;
2866        do {
2867                struct i915_request *rq;
2868
2869                rq = i915_request_create(ce);
2870                if (IS_ERR(rq)) {
2871                        err = PTR_ERR(rq);
2872                        break;
2873                }
2874
2875                i915_request_get(rq);
2876                i915_request_add(rq);
2877
2878                err = 0;
2879                if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
2880                        err = -ETIME;
2881                i915_request_put(prev);
2882                prev = rq;
2883                if (err)
2884                        break;
2885
2886                count++;
2887        } while (!__igt_timeout(end_time, NULL));
2888        i915_request_put(prev);
2889
2890        if (busy) {
2891                ktime_t now;
2892
2893                p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2894                                    p->busy);
2895                p->time = ktime_sub(now, p->time);
2896        } else {
2897                p->time = ktime_sub(ktime_get(), p->time);
2898        }
2899
2900        err = switch_to_kernel_sync(ce, err);
2901        p->runtime = intel_context_get_total_runtime_ns(ce);
2902        p->count = count;
2903
2904        intel_context_unpin(ce);
2905        intel_context_put(ce);
2906        return err;
2907}
2908
2909static int p_many(void *arg)
2910{
2911        struct perf_stats *p = arg;
2912        struct intel_engine_cs *engine = p->engine;
2913        struct intel_context *ce;
2914        IGT_TIMEOUT(end_time);
2915        unsigned long count;
2916        int err = 0;
2917        bool busy;
2918
2919        ce = intel_context_create(engine);
2920        if (IS_ERR(ce))
2921                return PTR_ERR(ce);
2922
2923        err = intel_context_pin(ce);
2924        if (err) {
2925                intel_context_put(ce);
2926                return err;
2927        }
2928
2929        if (intel_engine_supports_stats(engine)) {
2930                p->busy = intel_engine_get_busy_time(engine, &p->time);
2931                busy = true;
2932        } else {
2933                p->time = ktime_get();
2934                busy = false;
2935        }
2936
2937        count = 0;
2938        do {
2939                struct i915_request *rq;
2940
2941                rq = i915_request_create(ce);
2942                if (IS_ERR(rq)) {
2943                        err = PTR_ERR(rq);
2944                        break;
2945                }
2946
2947                i915_request_add(rq);
2948                count++;
2949        } while (!__igt_timeout(end_time, NULL));
2950
2951        if (busy) {
2952                ktime_t now;
2953
2954                p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
2955                                    p->busy);
2956                p->time = ktime_sub(now, p->time);
2957        } else {
2958                p->time = ktime_sub(ktime_get(), p->time);
2959        }
2960
2961        err = switch_to_kernel_sync(ce, err);
2962        p->runtime = intel_context_get_total_runtime_ns(ce);
2963        p->count = count;
2964
2965        intel_context_unpin(ce);
2966        intel_context_put(ce);
2967        return err;
2968}
2969
2970static int perf_parallel_engines(void *arg)
2971{
2972        struct drm_i915_private *i915 = arg;
2973        static int (* const func[])(void *arg) = {
2974                p_sync0,
2975                p_sync1,
2976                p_many,
2977                NULL,
2978        };
2979        const unsigned int nengines = num_uabi_engines(i915);
2980        struct intel_engine_cs *engine;
2981        int (* const *fn)(void *arg);
2982        struct pm_qos_request qos;
2983        struct {
2984                struct perf_stats p;
2985                struct task_struct *tsk;
2986        } *engines;
2987        int err = 0;
2988
2989        engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
2990        if (!engines)
2991                return -ENOMEM;
2992
2993        cpu_latency_qos_add_request(&qos, 0);
2994
2995        for (fn = func; *fn; fn++) {
2996                char name[KSYM_NAME_LEN];
2997                struct igt_live_test t;
2998                unsigned int idx;
2999
3000                snprintf(name, sizeof(name), "%ps", *fn);
3001                err = igt_live_test_begin(&t, i915, __func__, name);
3002                if (err)
3003                        break;
3004
3005                atomic_set(&i915->selftest.counter, nengines);
3006
3007                idx = 0;
3008                for_each_uabi_engine(engine, i915) {
3009                        intel_engine_pm_get(engine);
3010
3011                        memset(&engines[idx].p, 0, sizeof(engines[idx].p));
3012                        engines[idx].p.engine = engine;
3013
3014                        engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
3015                                                       "igt:%s", engine->name);
3016                        if (IS_ERR(engines[idx].tsk)) {
3017                                err = PTR_ERR(engines[idx].tsk);
3018                                intel_engine_pm_put(engine);
3019                                break;
3020                        }
3021                        get_task_struct(engines[idx++].tsk);
3022                }
3023
3024                yield(); /* start all threads before we kthread_stop() */
3025
3026                idx = 0;
3027                for_each_uabi_engine(engine, i915) {
3028                        int status;
3029
3030                        if (IS_ERR(engines[idx].tsk))
3031                                break;
3032
3033                        status = kthread_stop(engines[idx].tsk);
3034                        if (status && !err)
3035                                err = status;
3036
3037                        intel_engine_pm_put(engine);
3038                        put_task_struct(engines[idx++].tsk);
3039                }
3040
3041                if (igt_live_test_end(&t))
3042                        err = -EIO;
3043                if (err)
3044                        break;
3045
3046                idx = 0;
3047                for_each_uabi_engine(engine, i915) {
3048                        struct perf_stats *p = &engines[idx].p;
3049                        u64 busy = 100 * ktime_to_ns(p->busy);
3050                        u64 dt = ktime_to_ns(p->time);
3051                        int integer, decimal;
3052
3053                        if (dt) {
3054                                integer = div64_u64(busy, dt);
3055                                busy -= integer * dt;
3056                                decimal = div64_u64(100 * busy, dt);
3057                        } else {
3058                                integer = 0;
3059                                decimal = 0;
3060                        }
3061
3062                        GEM_BUG_ON(engine != p->engine);
3063                        pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
3064                                name, engine->name, p->count, integer, decimal,
3065                                div_u64(p->runtime, 1000 * 1000),
3066                                div_u64(ktime_to_ns(p->time), 1000 * 1000));
3067                        idx++;
3068                }
3069        }
3070
3071        cpu_latency_qos_remove_request(&qos);
3072        kfree(engines);
3073        return err;
3074}
3075
3076int i915_request_perf_selftests(struct drm_i915_private *i915)
3077{
3078        static const struct i915_subtest tests[] = {
3079                SUBTEST(perf_request_latency),
3080                SUBTEST(perf_series_engines),
3081                SUBTEST(perf_parallel_engines),
3082        };
3083
3084        if (intel_gt_is_wedged(&i915->gt))
3085                return 0;
3086
3087        return i915_subtests(tests, i915);
3088}
3089