linux/drivers/gpu/drm/i915/selftests/i915_request.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/prime_numbers.h>
  26
  27#include "gem/i915_gem_pm.h"
  28#include "gem/selftests/mock_context.h"
  29
  30#include "i915_random.h"
  31#include "i915_selftest.h"
  32#include "igt_live_test.h"
  33#include "lib_sw_fence.h"
  34
  35#include "mock_drm.h"
  36#include "mock_gem_device.h"
  37
  38static int igt_add_request(void *arg)
  39{
  40        struct drm_i915_private *i915 = arg;
  41        struct i915_request *request;
  42        int err = -ENOMEM;
  43
  44        /* Basic preliminary test to create a request and let it loose! */
  45
  46        mutex_lock(&i915->drm.struct_mutex);
  47        request = mock_request(i915->engine[RCS0],
  48                               i915->kernel_context,
  49                               HZ / 10);
  50        if (!request)
  51                goto out_unlock;
  52
  53        i915_request_add(request);
  54
  55        err = 0;
  56out_unlock:
  57        mutex_unlock(&i915->drm.struct_mutex);
  58        return err;
  59}
  60
  61static int igt_wait_request(void *arg)
  62{
  63        const long T = HZ / 4;
  64        struct drm_i915_private *i915 = arg;
  65        struct i915_request *request;
  66        int err = -EINVAL;
  67
  68        /* Submit a request, then wait upon it */
  69
  70        mutex_lock(&i915->drm.struct_mutex);
  71        request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
  72        if (!request) {
  73                err = -ENOMEM;
  74                goto out_unlock;
  75        }
  76
  77        if (i915_request_wait(request, 0, 0) != -ETIME) {
  78                pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
  79                goto out_unlock;
  80        }
  81
  82        if (i915_request_wait(request, 0, T) != -ETIME) {
  83                pr_err("request wait succeeded (expected timeout before submit!)\n");
  84                goto out_unlock;
  85        }
  86
  87        if (i915_request_completed(request)) {
  88                pr_err("request completed before submit!!\n");
  89                goto out_unlock;
  90        }
  91
  92        i915_request_add(request);
  93
  94        if (i915_request_wait(request, 0, 0) != -ETIME) {
  95                pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
  96                goto out_unlock;
  97        }
  98
  99        if (i915_request_completed(request)) {
 100                pr_err("request completed immediately!\n");
 101                goto out_unlock;
 102        }
 103
 104        if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 105                pr_err("request wait succeeded (expected timeout!)\n");
 106                goto out_unlock;
 107        }
 108
 109        if (i915_request_wait(request, 0, T) == -ETIME) {
 110                pr_err("request wait timed out!\n");
 111                goto out_unlock;
 112        }
 113
 114        if (!i915_request_completed(request)) {
 115                pr_err("request not complete after waiting!\n");
 116                goto out_unlock;
 117        }
 118
 119        if (i915_request_wait(request, 0, T) == -ETIME) {
 120                pr_err("request wait timed out when already complete!\n");
 121                goto out_unlock;
 122        }
 123
 124        err = 0;
 125out_unlock:
 126        mock_device_flush(i915);
 127        mutex_unlock(&i915->drm.struct_mutex);
 128        return err;
 129}
 130
 131static int igt_fence_wait(void *arg)
 132{
 133        const long T = HZ / 4;
 134        struct drm_i915_private *i915 = arg;
 135        struct i915_request *request;
 136        int err = -EINVAL;
 137
 138        /* Submit a request, treat it as a fence and wait upon it */
 139
 140        mutex_lock(&i915->drm.struct_mutex);
 141        request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
 142        if (!request) {
 143                err = -ENOMEM;
 144                goto out_locked;
 145        }
 146
 147        if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 148                pr_err("fence wait success before submit (expected timeout)!\n");
 149                goto out_locked;
 150        }
 151
 152        i915_request_add(request);
 153        mutex_unlock(&i915->drm.struct_mutex);
 154
 155        if (dma_fence_is_signaled(&request->fence)) {
 156                pr_err("fence signaled immediately!\n");
 157                goto out_device;
 158        }
 159
 160        if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 161                pr_err("fence wait success after submit (expected timeout)!\n");
 162                goto out_device;
 163        }
 164
 165        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 166                pr_err("fence wait timed out (expected success)!\n");
 167                goto out_device;
 168        }
 169
 170        if (!dma_fence_is_signaled(&request->fence)) {
 171                pr_err("fence unsignaled after waiting!\n");
 172                goto out_device;
 173        }
 174
 175        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 176                pr_err("fence wait timed out when complete (expected success)!\n");
 177                goto out_device;
 178        }
 179
 180        err = 0;
 181out_device:
 182        mutex_lock(&i915->drm.struct_mutex);
 183out_locked:
 184        mock_device_flush(i915);
 185        mutex_unlock(&i915->drm.struct_mutex);
 186        return err;
 187}
 188
 189static int igt_request_rewind(void *arg)
 190{
 191        struct drm_i915_private *i915 = arg;
 192        struct i915_request *request, *vip;
 193        struct i915_gem_context *ctx[2];
 194        int err = -EINVAL;
 195
 196        mutex_lock(&i915->drm.struct_mutex);
 197        ctx[0] = mock_context(i915, "A");
 198        request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
 199        if (!request) {
 200                err = -ENOMEM;
 201                goto err_context_0;
 202        }
 203
 204        i915_request_get(request);
 205        i915_request_add(request);
 206
 207        ctx[1] = mock_context(i915, "B");
 208        vip = mock_request(i915->engine[RCS0], ctx[1], 0);
 209        if (!vip) {
 210                err = -ENOMEM;
 211                goto err_context_1;
 212        }
 213
 214        /* Simulate preemption by manual reordering */
 215        if (!mock_cancel_request(request)) {
 216                pr_err("failed to cancel request (already executed)!\n");
 217                i915_request_add(vip);
 218                goto err_context_1;
 219        }
 220        i915_request_get(vip);
 221        i915_request_add(vip);
 222        rcu_read_lock();
 223        request->engine->submit_request(request);
 224        rcu_read_unlock();
 225
 226        mutex_unlock(&i915->drm.struct_mutex);
 227
 228        if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 229                pr_err("timed out waiting for high priority request\n");
 230                goto err;
 231        }
 232
 233        if (i915_request_completed(request)) {
 234                pr_err("low priority request already completed\n");
 235                goto err;
 236        }
 237
 238        err = 0;
 239err:
 240        i915_request_put(vip);
 241        mutex_lock(&i915->drm.struct_mutex);
 242err_context_1:
 243        mock_context_close(ctx[1]);
 244        i915_request_put(request);
 245err_context_0:
 246        mock_context_close(ctx[0]);
 247        mock_device_flush(i915);
 248        mutex_unlock(&i915->drm.struct_mutex);
 249        return err;
 250}
 251
 252struct smoketest {
 253        struct intel_engine_cs *engine;
 254        struct i915_gem_context **contexts;
 255        atomic_long_t num_waits, num_fences;
 256        int ncontexts, max_batch;
 257        struct i915_request *(*request_alloc)(struct i915_gem_context *,
 258                                              struct intel_engine_cs *);
 259};
 260
 261static struct i915_request *
 262__mock_request_alloc(struct i915_gem_context *ctx,
 263                     struct intel_engine_cs *engine)
 264{
 265        return mock_request(engine, ctx, 0);
 266}
 267
 268static struct i915_request *
 269__live_request_alloc(struct i915_gem_context *ctx,
 270                     struct intel_engine_cs *engine)
 271{
 272        return igt_request_alloc(ctx, engine);
 273}
 274
 275static int __igt_breadcrumbs_smoketest(void *arg)
 276{
 277        struct smoketest *t = arg;
 278        struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 279        const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 280        const unsigned int total = 4 * t->ncontexts + 1;
 281        unsigned int num_waits = 0, num_fences = 0;
 282        struct i915_request **requests;
 283        I915_RND_STATE(prng);
 284        unsigned int *order;
 285        int err = 0;
 286
 287        /*
 288         * A very simple test to catch the most egregious of list handling bugs.
 289         *
 290         * At its heart, we simply create oodles of requests running across
 291         * multiple kthreads and enable signaling on them, for the sole purpose
 292         * of stressing our breadcrumb handling. The only inspection we do is
 293         * that the fences were marked as signaled.
 294         */
 295
 296        requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
 297        if (!requests)
 298                return -ENOMEM;
 299
 300        order = i915_random_order(total, &prng);
 301        if (!order) {
 302                err = -ENOMEM;
 303                goto out_requests;
 304        }
 305
 306        while (!kthread_should_stop()) {
 307                struct i915_sw_fence *submit, *wait;
 308                unsigned int n, count;
 309
 310                submit = heap_fence_create(GFP_KERNEL);
 311                if (!submit) {
 312                        err = -ENOMEM;
 313                        break;
 314                }
 315
 316                wait = heap_fence_create(GFP_KERNEL);
 317                if (!wait) {
 318                        i915_sw_fence_commit(submit);
 319                        heap_fence_put(submit);
 320                        err = ENOMEM;
 321                        break;
 322                }
 323
 324                i915_random_reorder(order, total, &prng);
 325                count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
 326
 327                for (n = 0; n < count; n++) {
 328                        struct i915_gem_context *ctx =
 329                                t->contexts[order[n] % t->ncontexts];
 330                        struct i915_request *rq;
 331
 332                        mutex_lock(BKL);
 333
 334                        rq = t->request_alloc(ctx, t->engine);
 335                        if (IS_ERR(rq)) {
 336                                mutex_unlock(BKL);
 337                                err = PTR_ERR(rq);
 338                                count = n;
 339                                break;
 340                        }
 341
 342                        err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 343                                                               submit,
 344                                                               GFP_KERNEL);
 345
 346                        requests[n] = i915_request_get(rq);
 347                        i915_request_add(rq);
 348
 349                        mutex_unlock(BKL);
 350
 351                        if (err >= 0)
 352                                err = i915_sw_fence_await_dma_fence(wait,
 353                                                                    &rq->fence,
 354                                                                    0,
 355                                                                    GFP_KERNEL);
 356
 357                        if (err < 0) {
 358                                i915_request_put(rq);
 359                                count = n;
 360                                break;
 361                        }
 362                }
 363
 364                i915_sw_fence_commit(submit);
 365                i915_sw_fence_commit(wait);
 366
 367                if (!wait_event_timeout(wait->wait,
 368                                        i915_sw_fence_done(wait),
 369                                        HZ / 2)) {
 370                        struct i915_request *rq = requests[count - 1];
 371
 372                        pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
 373                               count,
 374                               rq->fence.context, rq->fence.seqno,
 375                               t->engine->name);
 376                        i915_gem_set_wedged(t->engine->i915);
 377                        GEM_BUG_ON(!i915_request_completed(rq));
 378                        i915_sw_fence_wait(wait);
 379                        err = -EIO;
 380                }
 381
 382                for (n = 0; n < count; n++) {
 383                        struct i915_request *rq = requests[n];
 384
 385                        if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 386                                      &rq->fence.flags)) {
 387                                pr_err("%llu:%llu was not signaled!\n",
 388                                       rq->fence.context, rq->fence.seqno);
 389                                err = -EINVAL;
 390                        }
 391
 392                        i915_request_put(rq);
 393                }
 394
 395                heap_fence_put(wait);
 396                heap_fence_put(submit);
 397
 398                if (err < 0)
 399                        break;
 400
 401                num_fences += count;
 402                num_waits++;
 403
 404                cond_resched();
 405        }
 406
 407        atomic_long_add(num_fences, &t->num_fences);
 408        atomic_long_add(num_waits, &t->num_waits);
 409
 410        kfree(order);
 411out_requests:
 412        kfree(requests);
 413        return err;
 414}
 415
 416static int mock_breadcrumbs_smoketest(void *arg)
 417{
 418        struct drm_i915_private *i915 = arg;
 419        struct smoketest t = {
 420                .engine = i915->engine[RCS0],
 421                .ncontexts = 1024,
 422                .max_batch = 1024,
 423                .request_alloc = __mock_request_alloc
 424        };
 425        unsigned int ncpus = num_online_cpus();
 426        struct task_struct **threads;
 427        unsigned int n;
 428        int ret = 0;
 429
 430        /*
 431         * Smoketest our breadcrumb/signal handling for requests across multiple
 432         * threads. A very simple test to only catch the most egregious of bugs.
 433         * See __igt_breadcrumbs_smoketest();
 434         */
 435
 436        threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
 437        if (!threads)
 438                return -ENOMEM;
 439
 440        t.contexts =
 441                kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 442        if (!t.contexts) {
 443                ret = -ENOMEM;
 444                goto out_threads;
 445        }
 446
 447        mutex_lock(&t.engine->i915->drm.struct_mutex);
 448        for (n = 0; n < t.ncontexts; n++) {
 449                t.contexts[n] = mock_context(t.engine->i915, "mock");
 450                if (!t.contexts[n]) {
 451                        ret = -ENOMEM;
 452                        goto out_contexts;
 453                }
 454        }
 455        mutex_unlock(&t.engine->i915->drm.struct_mutex);
 456
 457        for (n = 0; n < ncpus; n++) {
 458                threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
 459                                         &t, "igt/%d", n);
 460                if (IS_ERR(threads[n])) {
 461                        ret = PTR_ERR(threads[n]);
 462                        ncpus = n;
 463                        break;
 464                }
 465
 466                get_task_struct(threads[n]);
 467        }
 468
 469        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 470
 471        for (n = 0; n < ncpus; n++) {
 472                int err;
 473
 474                err = kthread_stop(threads[n]);
 475                if (err < 0 && !ret)
 476                        ret = err;
 477
 478                put_task_struct(threads[n]);
 479        }
 480        pr_info("Completed %lu waits for %lu fence across %d cpus\n",
 481                atomic_long_read(&t.num_waits),
 482                atomic_long_read(&t.num_fences),
 483                ncpus);
 484
 485        mutex_lock(&t.engine->i915->drm.struct_mutex);
 486out_contexts:
 487        for (n = 0; n < t.ncontexts; n++) {
 488                if (!t.contexts[n])
 489                        break;
 490                mock_context_close(t.contexts[n]);
 491        }
 492        mutex_unlock(&t.engine->i915->drm.struct_mutex);
 493        kfree(t.contexts);
 494out_threads:
 495        kfree(threads);
 496
 497        return ret;
 498}
 499
 500int i915_request_mock_selftests(void)
 501{
 502        static const struct i915_subtest tests[] = {
 503                SUBTEST(igt_add_request),
 504                SUBTEST(igt_wait_request),
 505                SUBTEST(igt_fence_wait),
 506                SUBTEST(igt_request_rewind),
 507                SUBTEST(mock_breadcrumbs_smoketest),
 508        };
 509        struct drm_i915_private *i915;
 510        intel_wakeref_t wakeref;
 511        int err = 0;
 512
 513        i915 = mock_gem_device();
 514        if (!i915)
 515                return -ENOMEM;
 516
 517        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 518                err = i915_subtests(tests, i915);
 519
 520        drm_dev_put(&i915->drm);
 521
 522        return err;
 523}
 524
 525static int live_nop_request(void *arg)
 526{
 527        struct drm_i915_private *i915 = arg;
 528        struct intel_engine_cs *engine;
 529        intel_wakeref_t wakeref;
 530        struct igt_live_test t;
 531        unsigned int id;
 532        int err = -ENODEV;
 533
 534        /* Submit various sized batches of empty requests, to each engine
 535         * (individually), and wait for the batch to complete. We can check
 536         * the overhead of submitting requests to the hardware.
 537         */
 538
 539        mutex_lock(&i915->drm.struct_mutex);
 540        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 541
 542        for_each_engine(engine, i915, id) {
 543                struct i915_request *request = NULL;
 544                unsigned long n, prime;
 545                IGT_TIMEOUT(end_time);
 546                ktime_t times[2] = {};
 547
 548                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 549                if (err)
 550                        goto out_unlock;
 551
 552                for_each_prime_number_from(prime, 1, 8192) {
 553                        times[1] = ktime_get_raw();
 554
 555                        for (n = 0; n < prime; n++) {
 556                                request = i915_request_create(engine->kernel_context);
 557                                if (IS_ERR(request)) {
 558                                        err = PTR_ERR(request);
 559                                        goto out_unlock;
 560                                }
 561
 562                                /* This space is left intentionally blank.
 563                                 *
 564                                 * We do not actually want to perform any
 565                                 * action with this request, we just want
 566                                 * to measure the latency in allocation
 567                                 * and submission of our breadcrumbs -
 568                                 * ensuring that the bare request is sufficient
 569                                 * for the system to work (i.e. proper HEAD
 570                                 * tracking of the rings, interrupt handling,
 571                                 * etc). It also gives us the lowest bounds
 572                                 * for latency.
 573                                 */
 574
 575                                i915_request_add(request);
 576                        }
 577                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 578
 579                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 580                        if (prime == 1)
 581                                times[0] = times[1];
 582
 583                        if (__igt_timeout(end_time, NULL))
 584                                break;
 585                }
 586
 587                err = igt_live_test_end(&t);
 588                if (err)
 589                        goto out_unlock;
 590
 591                pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 592                        engine->name,
 593                        ktime_to_ns(times[0]),
 594                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 595        }
 596
 597out_unlock:
 598        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 599        mutex_unlock(&i915->drm.struct_mutex);
 600        return err;
 601}
 602
 603static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 604{
 605        struct drm_i915_gem_object *obj;
 606        struct i915_vma *vma;
 607        u32 *cmd;
 608        int err;
 609
 610        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 611        if (IS_ERR(obj))
 612                return ERR_CAST(obj);
 613
 614        cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 615        if (IS_ERR(cmd)) {
 616                err = PTR_ERR(cmd);
 617                goto err;
 618        }
 619
 620        *cmd = MI_BATCH_BUFFER_END;
 621
 622        __i915_gem_object_flush_map(obj, 0, 64);
 623        i915_gem_object_unpin_map(obj);
 624
 625        i915_gem_chipset_flush(i915);
 626
 627        vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 628        if (IS_ERR(vma)) {
 629                err = PTR_ERR(vma);
 630                goto err;
 631        }
 632
 633        err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
 634        if (err)
 635                goto err;
 636
 637        return vma;
 638
 639err:
 640        i915_gem_object_put(obj);
 641        return ERR_PTR(err);
 642}
 643
 644static struct i915_request *
 645empty_request(struct intel_engine_cs *engine,
 646              struct i915_vma *batch)
 647{
 648        struct i915_request *request;
 649        int err;
 650
 651        request = i915_request_create(engine->kernel_context);
 652        if (IS_ERR(request))
 653                return request;
 654
 655        err = engine->emit_bb_start(request,
 656                                    batch->node.start,
 657                                    batch->node.size,
 658                                    I915_DISPATCH_SECURE);
 659        if (err)
 660                goto out_request;
 661
 662out_request:
 663        i915_request_add(request);
 664        return err ? ERR_PTR(err) : request;
 665}
 666
 667static int live_empty_request(void *arg)
 668{
 669        struct drm_i915_private *i915 = arg;
 670        struct intel_engine_cs *engine;
 671        intel_wakeref_t wakeref;
 672        struct igt_live_test t;
 673        struct i915_vma *batch;
 674        unsigned int id;
 675        int err = 0;
 676
 677        /* Submit various sized batches of empty requests, to each engine
 678         * (individually), and wait for the batch to complete. We can check
 679         * the overhead of submitting requests to the hardware.
 680         */
 681
 682        mutex_lock(&i915->drm.struct_mutex);
 683        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 684
 685        batch = empty_batch(i915);
 686        if (IS_ERR(batch)) {
 687                err = PTR_ERR(batch);
 688                goto out_unlock;
 689        }
 690
 691        for_each_engine(engine, i915, id) {
 692                IGT_TIMEOUT(end_time);
 693                struct i915_request *request;
 694                unsigned long n, prime;
 695                ktime_t times[2] = {};
 696
 697                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 698                if (err)
 699                        goto out_batch;
 700
 701                /* Warmup / preload */
 702                request = empty_request(engine, batch);
 703                if (IS_ERR(request)) {
 704                        err = PTR_ERR(request);
 705                        goto out_batch;
 706                }
 707                i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 708
 709                for_each_prime_number_from(prime, 1, 8192) {
 710                        times[1] = ktime_get_raw();
 711
 712                        for (n = 0; n < prime; n++) {
 713                                request = empty_request(engine, batch);
 714                                if (IS_ERR(request)) {
 715                                        err = PTR_ERR(request);
 716                                        goto out_batch;
 717                                }
 718                        }
 719                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 720
 721                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 722                        if (prime == 1)
 723                                times[0] = times[1];
 724
 725                        if (__igt_timeout(end_time, NULL))
 726                                break;
 727                }
 728
 729                err = igt_live_test_end(&t);
 730                if (err)
 731                        goto out_batch;
 732
 733                pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 734                        engine->name,
 735                        ktime_to_ns(times[0]),
 736                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 737        }
 738
 739out_batch:
 740        i915_vma_unpin(batch);
 741        i915_vma_put(batch);
 742out_unlock:
 743        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 744        mutex_unlock(&i915->drm.struct_mutex);
 745        return err;
 746}
 747
 748static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 749{
 750        struct i915_gem_context *ctx = i915->kernel_context;
 751        struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 752        struct drm_i915_gem_object *obj;
 753        const int gen = INTEL_GEN(i915);
 754        struct i915_vma *vma;
 755        u32 *cmd;
 756        int err;
 757
 758        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 759        if (IS_ERR(obj))
 760                return ERR_CAST(obj);
 761
 762        vma = i915_vma_instance(obj, vm, NULL);
 763        if (IS_ERR(vma)) {
 764                err = PTR_ERR(vma);
 765                goto err;
 766        }
 767
 768        err = i915_vma_pin(vma, 0, 0, PIN_USER);
 769        if (err)
 770                goto err;
 771
 772        cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
 773        if (IS_ERR(cmd)) {
 774                err = PTR_ERR(cmd);
 775                goto err;
 776        }
 777
 778        if (gen >= 8) {
 779                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 780                *cmd++ = lower_32_bits(vma->node.start);
 781                *cmd++ = upper_32_bits(vma->node.start);
 782        } else if (gen >= 6) {
 783                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
 784                *cmd++ = lower_32_bits(vma->node.start);
 785        } else {
 786                *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 787                *cmd++ = lower_32_bits(vma->node.start);
 788        }
 789        *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
 790
 791        __i915_gem_object_flush_map(obj, 0, 64);
 792        i915_gem_object_unpin_map(obj);
 793
 794        i915_gem_chipset_flush(i915);
 795
 796        return vma;
 797
 798err:
 799        i915_gem_object_put(obj);
 800        return ERR_PTR(err);
 801}
 802
 803static int recursive_batch_resolve(struct i915_vma *batch)
 804{
 805        u32 *cmd;
 806
 807        cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
 808        if (IS_ERR(cmd))
 809                return PTR_ERR(cmd);
 810
 811        *cmd = MI_BATCH_BUFFER_END;
 812        i915_gem_chipset_flush(batch->vm->i915);
 813
 814        i915_gem_object_unpin_map(batch->obj);
 815
 816        return 0;
 817}
 818
 819static int live_all_engines(void *arg)
 820{
 821        struct drm_i915_private *i915 = arg;
 822        struct intel_engine_cs *engine;
 823        struct i915_request *request[I915_NUM_ENGINES];
 824        intel_wakeref_t wakeref;
 825        struct igt_live_test t;
 826        struct i915_vma *batch;
 827        unsigned int id;
 828        int err;
 829
 830        /* Check we can submit requests to all engines simultaneously. We
 831         * send a recursive batch to each engine - checking that we don't
 832         * block doing so, and that they don't complete too soon.
 833         */
 834
 835        mutex_lock(&i915->drm.struct_mutex);
 836        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 837
 838        err = igt_live_test_begin(&t, i915, __func__, "");
 839        if (err)
 840                goto out_unlock;
 841
 842        batch = recursive_batch(i915);
 843        if (IS_ERR(batch)) {
 844                err = PTR_ERR(batch);
 845                pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
 846                goto out_unlock;
 847        }
 848
 849        for_each_engine(engine, i915, id) {
 850                request[id] = i915_request_create(engine->kernel_context);
 851                if (IS_ERR(request[id])) {
 852                        err = PTR_ERR(request[id]);
 853                        pr_err("%s: Request allocation failed with err=%d\n",
 854                               __func__, err);
 855                        goto out_request;
 856                }
 857
 858                err = engine->emit_bb_start(request[id],
 859                                            batch->node.start,
 860                                            batch->node.size,
 861                                            0);
 862                GEM_BUG_ON(err);
 863                request[id]->batch = batch;
 864
 865                i915_vma_lock(batch);
 866                err = i915_vma_move_to_active(batch, request[id], 0);
 867                i915_vma_unlock(batch);
 868                GEM_BUG_ON(err);
 869
 870                i915_request_get(request[id]);
 871                i915_request_add(request[id]);
 872        }
 873
 874        for_each_engine(engine, i915, id) {
 875                if (i915_request_completed(request[id])) {
 876                        pr_err("%s(%s): request completed too early!\n",
 877                               __func__, engine->name);
 878                        err = -EINVAL;
 879                        goto out_request;
 880                }
 881        }
 882
 883        err = recursive_batch_resolve(batch);
 884        if (err) {
 885                pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
 886                goto out_request;
 887        }
 888
 889        for_each_engine(engine, i915, id) {
 890                long timeout;
 891
 892                timeout = i915_request_wait(request[id], 0,
 893                                            MAX_SCHEDULE_TIMEOUT);
 894                if (timeout < 0) {
 895                        err = timeout;
 896                        pr_err("%s: error waiting for request on %s, err=%d\n",
 897                               __func__, engine->name, err);
 898                        goto out_request;
 899                }
 900
 901                GEM_BUG_ON(!i915_request_completed(request[id]));
 902                i915_request_put(request[id]);
 903                request[id] = NULL;
 904        }
 905
 906        err = igt_live_test_end(&t);
 907
 908out_request:
 909        for_each_engine(engine, i915, id)
 910                if (request[id])
 911                        i915_request_put(request[id]);
 912        i915_vma_unpin(batch);
 913        i915_vma_put(batch);
 914out_unlock:
 915        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 916        mutex_unlock(&i915->drm.struct_mutex);
 917        return err;
 918}
 919
 920static int live_sequential_engines(void *arg)
 921{
 922        struct drm_i915_private *i915 = arg;
 923        struct i915_request *request[I915_NUM_ENGINES] = {};
 924        struct i915_request *prev = NULL;
 925        struct intel_engine_cs *engine;
 926        intel_wakeref_t wakeref;
 927        struct igt_live_test t;
 928        unsigned int id;
 929        int err;
 930
 931        /* Check we can submit requests to all engines sequentially, such
 932         * that each successive request waits for the earlier ones. This
 933         * tests that we don't execute requests out of order, even though
 934         * they are running on independent engines.
 935         */
 936
 937        mutex_lock(&i915->drm.struct_mutex);
 938        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 939
 940        err = igt_live_test_begin(&t, i915, __func__, "");
 941        if (err)
 942                goto out_unlock;
 943
 944        for_each_engine(engine, i915, id) {
 945                struct i915_vma *batch;
 946
 947                batch = recursive_batch(i915);
 948                if (IS_ERR(batch)) {
 949                        err = PTR_ERR(batch);
 950                        pr_err("%s: Unable to create batch for %s, err=%d\n",
 951                               __func__, engine->name, err);
 952                        goto out_unlock;
 953                }
 954
 955                request[id] = i915_request_create(engine->kernel_context);
 956                if (IS_ERR(request[id])) {
 957                        err = PTR_ERR(request[id]);
 958                        pr_err("%s: Request allocation failed for %s with err=%d\n",
 959                               __func__, engine->name, err);
 960                        goto out_request;
 961                }
 962
 963                if (prev) {
 964                        err = i915_request_await_dma_fence(request[id],
 965                                                           &prev->fence);
 966                        if (err) {
 967                                i915_request_add(request[id]);
 968                                pr_err("%s: Request await failed for %s with err=%d\n",
 969                                       __func__, engine->name, err);
 970                                goto out_request;
 971                        }
 972                }
 973
 974                err = engine->emit_bb_start(request[id],
 975                                            batch->node.start,
 976                                            batch->node.size,
 977                                            0);
 978                GEM_BUG_ON(err);
 979                request[id]->batch = batch;
 980
 981                i915_vma_lock(batch);
 982                err = i915_vma_move_to_active(batch, request[id], 0);
 983                i915_vma_unlock(batch);
 984                GEM_BUG_ON(err);
 985
 986                i915_request_get(request[id]);
 987                i915_request_add(request[id]);
 988
 989                prev = request[id];
 990        }
 991
 992        for_each_engine(engine, i915, id) {
 993                long timeout;
 994
 995                if (i915_request_completed(request[id])) {
 996                        pr_err("%s(%s): request completed too early!\n",
 997                               __func__, engine->name);
 998                        err = -EINVAL;
 999                        goto out_request;
1000                }
1001
1002                err = recursive_batch_resolve(request[id]->batch);
1003                if (err) {
1004                        pr_err("%s: failed to resolve batch, err=%d\n",
1005                               __func__, err);
1006                        goto out_request;
1007                }
1008
1009                timeout = i915_request_wait(request[id], 0,
1010                                            MAX_SCHEDULE_TIMEOUT);
1011                if (timeout < 0) {
1012                        err = timeout;
1013                        pr_err("%s: error waiting for request on %s, err=%d\n",
1014                               __func__, engine->name, err);
1015                        goto out_request;
1016                }
1017
1018                GEM_BUG_ON(!i915_request_completed(request[id]));
1019        }
1020
1021        err = igt_live_test_end(&t);
1022
1023out_request:
1024        for_each_engine(engine, i915, id) {
1025                u32 *cmd;
1026
1027                if (!request[id])
1028                        break;
1029
1030                cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1031                                              I915_MAP_WC);
1032                if (!IS_ERR(cmd)) {
1033                        *cmd = MI_BATCH_BUFFER_END;
1034                        i915_gem_chipset_flush(i915);
1035
1036                        i915_gem_object_unpin_map(request[id]->batch->obj);
1037                }
1038
1039                i915_vma_put(request[id]->batch);
1040                i915_request_put(request[id]);
1041        }
1042out_unlock:
1043        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1044        mutex_unlock(&i915->drm.struct_mutex);
1045        return err;
1046}
1047
1048static int
1049max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1050{
1051        struct i915_request *rq;
1052        int ret;
1053
1054        /*
1055         * Before execlists, all contexts share the same ringbuffer. With
1056         * execlists, each context/engine has a separate ringbuffer and
1057         * for the purposes of this test, inexhaustible.
1058         *
1059         * For the global ringbuffer though, we have to be very careful
1060         * that we do not wrap while preventing the execution of requests
1061         * with a unsignaled fence.
1062         */
1063        if (HAS_EXECLISTS(ctx->i915))
1064                return INT_MAX;
1065
1066        rq = igt_request_alloc(ctx, engine);
1067        if (IS_ERR(rq)) {
1068                ret = PTR_ERR(rq);
1069        } else {
1070                int sz;
1071
1072                ret = rq->ring->size - rq->reserved_space;
1073                i915_request_add(rq);
1074
1075                sz = rq->ring->emit - rq->head;
1076                if (sz < 0)
1077                        sz += rq->ring->size;
1078                ret /= sz;
1079                ret /= 2; /* leave half spare, in case of emergency! */
1080        }
1081
1082        return ret;
1083}
1084
1085static int live_breadcrumbs_smoketest(void *arg)
1086{
1087        struct drm_i915_private *i915 = arg;
1088        struct smoketest t[I915_NUM_ENGINES];
1089        unsigned int ncpus = num_online_cpus();
1090        unsigned long num_waits, num_fences;
1091        struct intel_engine_cs *engine;
1092        struct task_struct **threads;
1093        struct igt_live_test live;
1094        enum intel_engine_id id;
1095        intel_wakeref_t wakeref;
1096        struct drm_file *file;
1097        unsigned int n;
1098        int ret = 0;
1099
1100        /*
1101         * Smoketest our breadcrumb/signal handling for requests across multiple
1102         * threads. A very simple test to only catch the most egregious of bugs.
1103         * See __igt_breadcrumbs_smoketest();
1104         *
1105         * On real hardware this time.
1106         */
1107
1108        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1109
1110        file = mock_file(i915);
1111        if (IS_ERR(file)) {
1112                ret = PTR_ERR(file);
1113                goto out_rpm;
1114        }
1115
1116        threads = kcalloc(ncpus * I915_NUM_ENGINES,
1117                          sizeof(*threads),
1118                          GFP_KERNEL);
1119        if (!threads) {
1120                ret = -ENOMEM;
1121                goto out_file;
1122        }
1123
1124        memset(&t[0], 0, sizeof(t[0]));
1125        t[0].request_alloc = __live_request_alloc;
1126        t[0].ncontexts = 64;
1127        t[0].contexts = kmalloc_array(t[0].ncontexts,
1128                                      sizeof(*t[0].contexts),
1129                                      GFP_KERNEL);
1130        if (!t[0].contexts) {
1131                ret = -ENOMEM;
1132                goto out_threads;
1133        }
1134
1135        mutex_lock(&i915->drm.struct_mutex);
1136        for (n = 0; n < t[0].ncontexts; n++) {
1137                t[0].contexts[n] = live_context(i915, file);
1138                if (!t[0].contexts[n]) {
1139                        ret = -ENOMEM;
1140                        goto out_contexts;
1141                }
1142        }
1143
1144        ret = igt_live_test_begin(&live, i915, __func__, "");
1145        if (ret)
1146                goto out_contexts;
1147
1148        for_each_engine(engine, i915, id) {
1149                t[id] = t[0];
1150                t[id].engine = engine;
1151                t[id].max_batch = max_batches(t[0].contexts[0], engine);
1152                if (t[id].max_batch < 0) {
1153                        ret = t[id].max_batch;
1154                        mutex_unlock(&i915->drm.struct_mutex);
1155                        goto out_flush;
1156                }
1157                /* One ring interleaved between requests from all cpus */
1158                t[id].max_batch /= num_online_cpus() + 1;
1159                pr_debug("Limiting batches to %d requests on %s\n",
1160                         t[id].max_batch, engine->name);
1161
1162                for (n = 0; n < ncpus; n++) {
1163                        struct task_struct *tsk;
1164
1165                        tsk = kthread_run(__igt_breadcrumbs_smoketest,
1166                                          &t[id], "igt/%d.%d", id, n);
1167                        if (IS_ERR(tsk)) {
1168                                ret = PTR_ERR(tsk);
1169                                mutex_unlock(&i915->drm.struct_mutex);
1170                                goto out_flush;
1171                        }
1172
1173                        get_task_struct(tsk);
1174                        threads[id * ncpus + n] = tsk;
1175                }
1176        }
1177        mutex_unlock(&i915->drm.struct_mutex);
1178
1179        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1180
1181out_flush:
1182        num_waits = 0;
1183        num_fences = 0;
1184        for_each_engine(engine, i915, id) {
1185                for (n = 0; n < ncpus; n++) {
1186                        struct task_struct *tsk = threads[id * ncpus + n];
1187                        int err;
1188
1189                        if (!tsk)
1190                                continue;
1191
1192                        err = kthread_stop(tsk);
1193                        if (err < 0 && !ret)
1194                                ret = err;
1195
1196                        put_task_struct(tsk);
1197                }
1198
1199                num_waits += atomic_long_read(&t[id].num_waits);
1200                num_fences += atomic_long_read(&t[id].num_fences);
1201        }
1202        pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1203                num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1204
1205        mutex_lock(&i915->drm.struct_mutex);
1206        ret = igt_live_test_end(&live) ?: ret;
1207out_contexts:
1208        mutex_unlock(&i915->drm.struct_mutex);
1209        kfree(t[0].contexts);
1210out_threads:
1211        kfree(threads);
1212out_file:
1213        mock_file_free(i915, file);
1214out_rpm:
1215        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1216
1217        return ret;
1218}
1219
1220int i915_request_live_selftests(struct drm_i915_private *i915)
1221{
1222        static const struct i915_subtest tests[] = {
1223                SUBTEST(live_nop_request),
1224                SUBTEST(live_all_engines),
1225                SUBTEST(live_sequential_engines),
1226                SUBTEST(live_empty_request),
1227                SUBTEST(live_breadcrumbs_smoketest),
1228        };
1229
1230        if (i915_terminally_wedged(i915))
1231                return 0;
1232
1233        return i915_subtests(tests, i915);
1234}
1235