LXR linux/drivers/gpu/drm/i915/selftests/i915

   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/prime_numbers.h>
  26
  27#include "gem/i915_gem_pm.h"
  28#include "gem/selftests/mock_context.h"
  29
  30#include "gt/intel_engine_pm.h"
  31#include "gt/intel_gt.h"
  32
  33#include "i915_random.h"
  34#include "i915_selftest.h"
  35#include "igt_live_test.h"
  36#include "igt_spinner.h"
  37#include "lib_sw_fence.h"
  38
  39#include "mock_drm.h"
  40#include "mock_gem_device.h"
  41
  42static unsigned int num_uabi_engines(struct drm_i915_private *i915)
  43{
  44        struct intel_engine_cs *engine;
  45        unsigned int count;
  46
  47        count = 0;
  48        for_each_uabi_engine(engine, i915)
  49                count++;
  50
  51        return count;
  52}
  53
  54static int igt_add_request(void *arg)
  55{
  56        struct drm_i915_private *i915 = arg;
  57        struct i915_request *request;
  58
  59        /* Basic preliminary test to create a request and let it loose! */
  60
  61        request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
  62        if (!request)
  63                return -ENOMEM;
  64
  65        i915_request_add(request);
  66
  67        return 0;
  68}
  69
  70static int igt_wait_request(void *arg)
  71{
  72        const long T = HZ / 4;
  73        struct drm_i915_private *i915 = arg;
  74        struct i915_request *request;
  75        int err = -EINVAL;
  76
  77        /* Submit a request, then wait upon it */
  78
  79        request = mock_request(i915->engine[RCS0]->kernel_context, T);
  80        if (!request)
  81                return -ENOMEM;
  82
  83        i915_request_get(request);
  84
  85        if (i915_request_wait(request, 0, 0) != -ETIME) {
  86                pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
  87                goto out_request;
  88        }
  89
  90        if (i915_request_wait(request, 0, T) != -ETIME) {
  91                pr_err("request wait succeeded (expected timeout before submit!)\n");
  92                goto out_request;
  93        }
  94
  95        if (i915_request_completed(request)) {
  96                pr_err("request completed before submit!!\n");
  97                goto out_request;
  98        }
  99
 100        i915_request_add(request);
 101
 102        if (i915_request_wait(request, 0, 0) != -ETIME) {
 103                pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
 104                goto out_request;
 105        }
 106
 107        if (i915_request_completed(request)) {
 108                pr_err("request completed immediately!\n");
 109                goto out_request;
 110        }
 111
 112        if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 113                pr_err("request wait succeeded (expected timeout!)\n");
 114                goto out_request;
 115        }
 116
 117        if (i915_request_wait(request, 0, T) == -ETIME) {
 118                pr_err("request wait timed out!\n");
 119                goto out_request;
 120        }
 121
 122        if (!i915_request_completed(request)) {
 123                pr_err("request not complete after waiting!\n");
 124                goto out_request;
 125        }
 126
 127        if (i915_request_wait(request, 0, T) == -ETIME) {
 128                pr_err("request wait timed out when already complete!\n");
 129                goto out_request;
 130        }
 131
 132        err = 0;
 133out_request:
 134        i915_request_put(request);
 135        mock_device_flush(i915);
 136        return err;
 137}
 138
 139static int igt_fence_wait(void *arg)
 140{
 141        const long T = HZ / 4;
 142        struct drm_i915_private *i915 = arg;
 143        struct i915_request *request;
 144        int err = -EINVAL;
 145
 146        /* Submit a request, treat it as a fence and wait upon it */
 147
 148        request = mock_request(i915->engine[RCS0]->kernel_context, T);
 149        if (!request)
 150                return -ENOMEM;
 151
 152        if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 153                pr_err("fence wait success before submit (expected timeout)!\n");
 154                goto out;
 155        }
 156
 157        i915_request_add(request);
 158
 159        if (dma_fence_is_signaled(&request->fence)) {
 160                pr_err("fence signaled immediately!\n");
 161                goto out;
 162        }
 163
 164        if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 165                pr_err("fence wait success after submit (expected timeout)!\n");
 166                goto out;
 167        }
 168
 169        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 170                pr_err("fence wait timed out (expected success)!\n");
 171                goto out;
 172        }
 173
 174        if (!dma_fence_is_signaled(&request->fence)) {
 175                pr_err("fence unsignaled after waiting!\n");
 176                goto out;
 177        }
 178
 179        if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 180                pr_err("fence wait timed out when complete (expected success)!\n");
 181                goto out;
 182        }
 183
 184        err = 0;
 185out:
 186        mock_device_flush(i915);
 187        return err;
 188}
 189
 190static int igt_request_rewind(void *arg)
 191{
 192        struct drm_i915_private *i915 = arg;
 193        struct i915_request *request, *vip;
 194        struct i915_gem_context *ctx[2];
 195        struct intel_context *ce;
 196        int err = -EINVAL;
 197
 198        ctx[0] = mock_context(i915, "A");
 199
 200        ce = i915_gem_context_get_engine(ctx[0], RCS0);
 201        GEM_BUG_ON(IS_ERR(ce));
 202        request = mock_request(ce, 2 * HZ);
 203        intel_context_put(ce);
 204        if (!request) {
 205                err = -ENOMEM;
 206                goto err_context_0;
 207        }
 208
 209        i915_request_get(request);
 210        i915_request_add(request);
 211
 212        ctx[1] = mock_context(i915, "B");
 213
 214        ce = i915_gem_context_get_engine(ctx[1], RCS0);
 215        GEM_BUG_ON(IS_ERR(ce));
 216        vip = mock_request(ce, 0);
 217        intel_context_put(ce);
 218        if (!vip) {
 219                err = -ENOMEM;
 220                goto err_context_1;
 221        }
 222
 223        /* Simulate preemption by manual reordering */
 224        if (!mock_cancel_request(request)) {
 225                pr_err("failed to cancel request (already executed)!\n");
 226                i915_request_add(vip);
 227                goto err_context_1;
 228        }
 229        i915_request_get(vip);
 230        i915_request_add(vip);
 231        rcu_read_lock();
 232        request->engine->submit_request(request);
 233        rcu_read_unlock();
 234
 235
 236        if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 237                pr_err("timed out waiting for high priority request\n");
 238                goto err;
 239        }
 240
 241        if (i915_request_completed(request)) {
 242                pr_err("low priority request already completed\n");
 243                goto err;
 244        }
 245
 246        err = 0;
 247err:
 248        i915_request_put(vip);
 249err_context_1:
 250        mock_context_close(ctx[1]);
 251        i915_request_put(request);
 252err_context_0:
 253        mock_context_close(ctx[0]);
 254        mock_device_flush(i915);
 255        return err;
 256}
 257
 258struct smoketest {
 259        struct intel_engine_cs *engine;
 260        struct i915_gem_context **contexts;
 261        atomic_long_t num_waits, num_fences;
 262        int ncontexts, max_batch;
 263        struct i915_request *(*request_alloc)(struct intel_context *ce);
 264};
 265
 266static struct i915_request *
 267__mock_request_alloc(struct intel_context *ce)
 268{
 269        return mock_request(ce, 0);
 270}
 271
 272static struct i915_request *
 273__live_request_alloc(struct intel_context *ce)
 274{
 275        return intel_context_create_request(ce);
 276}
 277
 278static int __igt_breadcrumbs_smoketest(void *arg)
 279{
 280        struct smoketest *t = arg;
 281        const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 282        const unsigned int total = 4 * t->ncontexts + 1;
 283        unsigned int num_waits = 0, num_fences = 0;
 284        struct i915_request **requests;
 285        I915_RND_STATE(prng);
 286        unsigned int *order;
 287        int err = 0;
 288
 289        /*
 290         * A very simple test to catch the most egregious of list handling bugs.
 291         *
 292         * At its heart, we simply create oodles of requests running across
 293         * multiple kthreads and enable signaling on them, for the sole purpose
 294         * of stressing our breadcrumb handling. The only inspection we do is
 295         * that the fences were marked as signaled.
 296         */
 297
 298        requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
 299        if (!requests)
 300                return -ENOMEM;
 301
 302        order = i915_random_order(total, &prng);
 303        if (!order) {
 304                err = -ENOMEM;
 305                goto out_requests;
 306        }
 307
 308        while (!kthread_should_stop()) {
 309                struct i915_sw_fence *submit, *wait;
 310                unsigned int n, count;
 311
 312                submit = heap_fence_create(GFP_KERNEL);
 313                if (!submit) {
 314                        err = -ENOMEM;
 315                        break;
 316                }
 317
 318                wait = heap_fence_create(GFP_KERNEL);
 319                if (!wait) {
 320                        i915_sw_fence_commit(submit);
 321                        heap_fence_put(submit);
 322                        err = ENOMEM;
 323                        break;
 324                }
 325
 326                i915_random_reorder(order, total, &prng);
 327                count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
 328
 329                for (n = 0; n < count; n++) {
 330                        struct i915_gem_context *ctx =
 331                                t->contexts[order[n] % t->ncontexts];
 332                        struct i915_request *rq;
 333                        struct intel_context *ce;
 334
 335                        ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 336                        GEM_BUG_ON(IS_ERR(ce));
 337                        rq = t->request_alloc(ce);
 338                        intel_context_put(ce);
 339                        if (IS_ERR(rq)) {
 340                                err = PTR_ERR(rq);
 341                                count = n;
 342                                break;
 343                        }
 344
 345                        err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 346                                                               submit,
 347                                                               GFP_KERNEL);
 348
 349                        requests[n] = i915_request_get(rq);
 350                        i915_request_add(rq);
 351
 352                        if (err >= 0)
 353                                err = i915_sw_fence_await_dma_fence(wait,
 354                                                                    &rq->fence,
 355                                                                    0,
 356                                                                    GFP_KERNEL);
 357
 358                        if (err < 0) {
 359                                i915_request_put(rq);
 360                                count = n;
 361                                break;
 362                        }
 363                }
 364
 365                i915_sw_fence_commit(submit);
 366                i915_sw_fence_commit(wait);
 367
 368                if (!wait_event_timeout(wait->wait,
 369                                        i915_sw_fence_done(wait),
 370                                        5 * HZ)) {
 371                        struct i915_request *rq = requests[count - 1];
 372
 373                        pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
 374                               atomic_read(&wait->pending), count,
 375                               rq->fence.context, rq->fence.seqno,
 376                               t->engine->name);
 377                        GEM_TRACE_DUMP();
 378
 379                        intel_gt_set_wedged(t->engine->gt);
 380                        GEM_BUG_ON(!i915_request_completed(rq));
 381                        i915_sw_fence_wait(wait);
 382                        err = -EIO;
 383                }
 384
 385                for (n = 0; n < count; n++) {
 386                        struct i915_request *rq = requests[n];
 387
 388                        if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 389                                      &rq->fence.flags)) {
 390                                pr_err("%llu:%llu was not signaled!\n",
 391                                       rq->fence.context, rq->fence.seqno);
 392                                err = -EINVAL;
 393                        }
 394
 395                        i915_request_put(rq);
 396                }
 397
 398                heap_fence_put(wait);
 399                heap_fence_put(submit);
 400
 401                if (err < 0)
 402                        break;
 403
 404                num_fences += count;
 405                num_waits++;
 406
 407                cond_resched();
 408        }
 409
 410        atomic_long_add(num_fences, &t->num_fences);
 411        atomic_long_add(num_waits, &t->num_waits);
 412
 413        kfree(order);
 414out_requests:
 415        kfree(requests);
 416        return err;
 417}
 418
 419static int mock_breadcrumbs_smoketest(void *arg)
 420{
 421        struct drm_i915_private *i915 = arg;
 422        struct smoketest t = {
 423                .engine = i915->engine[RCS0],
 424                .ncontexts = 1024,
 425                .max_batch = 1024,
 426                .request_alloc = __mock_request_alloc
 427        };
 428        unsigned int ncpus = num_online_cpus();
 429        struct task_struct **threads;
 430        unsigned int n;
 431        int ret = 0;
 432
 433        /*
 434         * Smoketest our breadcrumb/signal handling for requests across multiple
 435         * threads. A very simple test to only catch the most egregious of bugs.
 436         * See __igt_breadcrumbs_smoketest();
 437         */
 438
 439        threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
 440        if (!threads)
 441                return -ENOMEM;
 442
 443        t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 444        if (!t.contexts) {
 445                ret = -ENOMEM;
 446                goto out_threads;
 447        }
 448
 449        for (n = 0; n < t.ncontexts; n++) {
 450                t.contexts[n] = mock_context(t.engine->i915, "mock");
 451                if (!t.contexts[n]) {
 452                        ret = -ENOMEM;
 453                        goto out_contexts;
 454                }
 455        }
 456
 457        for (n = 0; n < ncpus; n++) {
 458                threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
 459                                         &t, "igt/%d", n);
 460                if (IS_ERR(threads[n])) {
 461                        ret = PTR_ERR(threads[n]);
 462                        ncpus = n;
 463                        break;
 464                }
 465
 466                get_task_struct(threads[n]);
 467        }
 468
 469        yield(); /* start all threads before we begin */
 470        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 471
 472        for (n = 0; n < ncpus; n++) {
 473                int err;
 474
 475                err = kthread_stop(threads[n]);
 476                if (err < 0 && !ret)
 477                        ret = err;
 478
 479                put_task_struct(threads[n]);
 480        }
 481        pr_info("Completed %lu waits for %lu fence across %d cpus\n",
 482                atomic_long_read(&t.num_waits),
 483                atomic_long_read(&t.num_fences),
 484                ncpus);
 485
 486out_contexts:
 487        for (n = 0; n < t.ncontexts; n++) {
 488                if (!t.contexts[n])
 489                        break;
 490                mock_context_close(t.contexts[n]);
 491        }
 492        kfree(t.contexts);
 493out_threads:
 494        kfree(threads);
 495        return ret;
 496}
 497
 498int i915_request_mock_selftests(void)
 499{
 500        static const struct i915_subtest tests[] = {
 501                SUBTEST(igt_add_request),
 502                SUBTEST(igt_wait_request),
 503                SUBTEST(igt_fence_wait),
 504                SUBTEST(igt_request_rewind),
 505                SUBTEST(mock_breadcrumbs_smoketest),
 506        };
 507        struct drm_i915_private *i915;
 508        intel_wakeref_t wakeref;
 509        int err = 0;
 510
 511        i915 = mock_gem_device();
 512        if (!i915)
 513                return -ENOMEM;
 514
 515        with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 516                err = i915_subtests(tests, i915);
 517
 518        drm_dev_put(&i915->drm);
 519
 520        return err;
 521}
 522
 523static int live_nop_request(void *arg)
 524{
 525        struct drm_i915_private *i915 = arg;
 526        struct intel_engine_cs *engine;
 527        struct igt_live_test t;
 528        int err = -ENODEV;
 529
 530        /*
 531         * Submit various sized batches of empty requests, to each engine
 532         * (individually), and wait for the batch to complete. We can check
 533         * the overhead of submitting requests to the hardware.
 534         */
 535
 536        for_each_uabi_engine(engine, i915) {
 537                unsigned long n, prime;
 538                IGT_TIMEOUT(end_time);
 539                ktime_t times[2] = {};
 540
 541                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 542                if (err)
 543                        return err;
 544
 545                intel_engine_pm_get(engine);
 546                for_each_prime_number_from(prime, 1, 8192) {
 547                        struct i915_request *request = NULL;
 548
 549                        times[1] = ktime_get_raw();
 550
 551                        for (n = 0; n < prime; n++) {
 552                                i915_request_put(request);
 553                                request = i915_request_create(engine->kernel_context);
 554                                if (IS_ERR(request))
 555                                        return PTR_ERR(request);
 556
 557                                /*
 558                                 * This space is left intentionally blank.
 559                                 *
 560                                 * We do not actually want to perform any
 561                                 * action with this request, we just want
 562                                 * to measure the latency in allocation
 563                                 * and submission of our breadcrumbs -
 564                                 * ensuring that the bare request is sufficient
 565                                 * for the system to work (i.e. proper HEAD
 566                                 * tracking of the rings, interrupt handling,
 567                                 * etc). It also gives us the lowest bounds
 568                                 * for latency.
 569                                 */
 570
 571                                i915_request_get(request);
 572                                i915_request_add(request);
 573                        }
 574                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 575                        i915_request_put(request);
 576
 577                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 578                        if (prime == 1)
 579                                times[0] = times[1];
 580
 581                        if (__igt_timeout(end_time, NULL))
 582                                break;
 583                }
 584                intel_engine_pm_put(engine);
 585
 586                err = igt_live_test_end(&t);
 587                if (err)
 588                        return err;
 589
 590                pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 591                        engine->name,
 592                        ktime_to_ns(times[0]),
 593                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 594        }
 595
 596        return err;
 597}
 598
 599static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 600{
 601        struct drm_i915_gem_object *obj;
 602        struct i915_vma *vma;
 603        u32 *cmd;
 604        int err;
 605
 606        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 607        if (IS_ERR(obj))
 608                return ERR_CAST(obj);
 609
 610        cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 611        if (IS_ERR(cmd)) {
 612                err = PTR_ERR(cmd);
 613                goto err;
 614        }
 615
 616        *cmd = MI_BATCH_BUFFER_END;
 617
 618        __i915_gem_object_flush_map(obj, 0, 64);
 619        i915_gem_object_unpin_map(obj);
 620
 621        intel_gt_chipset_flush(&i915->gt);
 622
 623        vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 624        if (IS_ERR(vma)) {
 625                err = PTR_ERR(vma);
 626                goto err;
 627        }
 628
 629        err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
 630        if (err)
 631                goto err;
 632
 633        /* Force the wait wait now to avoid including it in the benchmark */
 634        err = i915_vma_sync(vma);
 635        if (err)
 636                goto err_pin;
 637
 638        return vma;
 639
 640err_pin:
 641        i915_vma_unpin(vma);
 642err:
 643        i915_gem_object_put(obj);
 644        return ERR_PTR(err);
 645}
 646
 647static struct i915_request *
 648empty_request(struct intel_engine_cs *engine,
 649              struct i915_vma *batch)
 650{
 651        struct i915_request *request;
 652        int err;
 653
 654        request = i915_request_create(engine->kernel_context);
 655        if (IS_ERR(request))
 656                return request;
 657
 658        err = engine->emit_bb_start(request,
 659                                    batch->node.start,
 660                                    batch->node.size,
 661                                    I915_DISPATCH_SECURE);
 662        if (err)
 663                goto out_request;
 664
 665        i915_request_get(request);
 666out_request:
 667        i915_request_add(request);
 668        return err ? ERR_PTR(err) : request;
 669}
 670
 671static int live_empty_request(void *arg)
 672{
 673        struct drm_i915_private *i915 = arg;
 674        struct intel_engine_cs *engine;
 675        struct igt_live_test t;
 676        struct i915_vma *batch;
 677        int err = 0;
 678
 679        /*
 680         * Submit various sized batches of empty requests, to each engine
 681         * (individually), and wait for the batch to complete. We can check
 682         * the overhead of submitting requests to the hardware.
 683         */
 684
 685        batch = empty_batch(i915);
 686        if (IS_ERR(batch))
 687                return PTR_ERR(batch);
 688
 689        for_each_uabi_engine(engine, i915) {
 690                IGT_TIMEOUT(end_time);
 691                struct i915_request *request;
 692                unsigned long n, prime;
 693                ktime_t times[2] = {};
 694
 695                err = igt_live_test_begin(&t, i915, __func__, engine->name);
 696                if (err)
 697                        goto out_batch;
 698
 699                intel_engine_pm_get(engine);
 700
 701                /* Warmup / preload */
 702                request = empty_request(engine, batch);
 703                if (IS_ERR(request)) {
 704                        err = PTR_ERR(request);
 705                        intel_engine_pm_put(engine);
 706                        goto out_batch;
 707                }
 708                i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 709
 710                for_each_prime_number_from(prime, 1, 8192) {
 711                        times[1] = ktime_get_raw();
 712
 713                        for (n = 0; n < prime; n++) {
 714                                i915_request_put(request);
 715                                request = empty_request(engine, batch);
 716                                if (IS_ERR(request)) {
 717                                        err = PTR_ERR(request);
 718                                        intel_engine_pm_put(engine);
 719                                        goto out_batch;
 720                                }
 721                        }
 722                        i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 723
 724                        times[1] = ktime_sub(ktime_get_raw(), times[1]);
 725                        if (prime == 1)
 726                                times[0] = times[1];
 727
 728                        if (__igt_timeout(end_time, NULL))
 729                                break;
 730                }
 731                i915_request_put(request);
 732                intel_engine_pm_put(engine);
 733
 734                err = igt_live_test_end(&t);
 735                if (err)
 736                        goto out_batch;
 737
 738                pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 739                        engine->name,
 740                        ktime_to_ns(times[0]),
 741                        prime, div64_u64(ktime_to_ns(times[1]), prime));
 742        }
 743
 744out_batch:
 745        i915_vma_unpin(batch);
 746        i915_vma_put(batch);
 747        return err;
 748}
 749
 750static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 751{
 752        struct drm_i915_gem_object *obj;
 753        const int gen = INTEL_GEN(i915);
 754        struct i915_vma *vma;
 755        u32 *cmd;
 756        int err;
 757
 758        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 759        if (IS_ERR(obj))
 760                return ERR_CAST(obj);
 761
 762        vma = i915_vma_instance(obj, i915->gt.vm, NULL);
 763        if (IS_ERR(vma)) {
 764                err = PTR_ERR(vma);
 765                goto err;
 766        }
 767
 768        err = i915_vma_pin(vma, 0, 0, PIN_USER);
 769        if (err)
 770                goto err;
 771
 772        cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
 773        if (IS_ERR(cmd)) {
 774                err = PTR_ERR(cmd);
 775                goto err;
 776        }
 777
 778        if (gen >= 8) {
 779                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 780                *cmd++ = lower_32_bits(vma->node.start);
 781                *cmd++ = upper_32_bits(vma->node.start);
 782        } else if (gen >= 6) {
 783                *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
 784                *cmd++ = lower_32_bits(vma->node.start);
 785        } else {
 786                *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 787                *cmd++ = lower_32_bits(vma->node.start);
 788        }
 789        *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
 790
 791        __i915_gem_object_flush_map(obj, 0, 64);
 792        i915_gem_object_unpin_map(obj);
 793
 794        intel_gt_chipset_flush(&i915->gt);
 795
 796        return vma;
 797
 798err:
 799        i915_gem_object_put(obj);
 800        return ERR_PTR(err);
 801}
 802
 803static int recursive_batch_resolve(struct i915_vma *batch)
 804{
 805        u32 *cmd;
 806
 807        cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
 808        if (IS_ERR(cmd))
 809                return PTR_ERR(cmd);
 810
 811        *cmd = MI_BATCH_BUFFER_END;
 812        intel_gt_chipset_flush(batch->vm->gt);
 813
 814        i915_gem_object_unpin_map(batch->obj);
 815
 816        return 0;
 817}
 818
 819static int live_all_engines(void *arg)
 820{
 821        struct drm_i915_private *i915 = arg;
 822        const unsigned int nengines = num_uabi_engines(i915);
 823        struct intel_engine_cs *engine;
 824        struct i915_request **request;
 825        struct igt_live_test t;
 826        struct i915_vma *batch;
 827        unsigned int idx;
 828        int err;
 829
 830        /*
 831         * Check we can submit requests to all engines simultaneously. We
 832         * send a recursive batch to each engine - checking that we don't
 833         * block doing so, and that they don't complete too soon.
 834         */
 835
 836        request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
 837        if (!request)
 838                return -ENOMEM;
 839
 840        err = igt_live_test_begin(&t, i915, __func__, "");
 841        if (err)
 842                goto out_free;
 843
 844        batch = recursive_batch(i915);
 845        if (IS_ERR(batch)) {
 846                err = PTR_ERR(batch);
 847                pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
 848                goto out_free;
 849        }
 850
 851        idx = 0;
 852        for_each_uabi_engine(engine, i915) {
 853                request[idx] = intel_engine_create_kernel_request(engine);
 854                if (IS_ERR(request[idx])) {
 855                        err = PTR_ERR(request[idx]);
 856                        pr_err("%s: Request allocation failed with err=%d\n",
 857                               __func__, err);
 858                        goto out_request;
 859                }
 860
 861                err = engine->emit_bb_start(request[idx],
 862                                            batch->node.start,
 863                                            batch->node.size,
 864                                            0);
 865                GEM_BUG_ON(err);
 866                request[idx]->batch = batch;
 867
 868                i915_vma_lock(batch);
 869                err = i915_request_await_object(request[idx], batch->obj, 0);
 870                if (err == 0)
 871                        err = i915_vma_move_to_active(batch, request[idx], 0);
 872                i915_vma_unlock(batch);
 873                GEM_BUG_ON(err);
 874
 875                i915_request_get(request[idx]);
 876                i915_request_add(request[idx]);
 877                idx++;
 878        }
 879
 880        idx = 0;
 881        for_each_uabi_engine(engine, i915) {
 882                if (i915_request_completed(request[idx])) {
 883                        pr_err("%s(%s): request completed too early!\n",
 884                               __func__, engine->name);
 885                        err = -EINVAL;
 886                        goto out_request;
 887                }
 888                idx++;
 889        }
 890
 891        err = recursive_batch_resolve(batch);
 892        if (err) {
 893                pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
 894                goto out_request;
 895        }
 896
 897        idx = 0;
 898        for_each_uabi_engine(engine, i915) {
 899                long timeout;
 900
 901                timeout = i915_request_wait(request[idx], 0,
 902                                            MAX_SCHEDULE_TIMEOUT);
 903                if (timeout < 0) {
 904                        err = timeout;
 905                        pr_err("%s: error waiting for request on %s, err=%d\n",
 906                               __func__, engine->name, err);
 907                        goto out_request;
 908                }
 909
 910                GEM_BUG_ON(!i915_request_completed(request[idx]));
 911                i915_request_put(request[idx]);
 912                request[idx] = NULL;
 913                idx++;
 914        }
 915
 916        err = igt_live_test_end(&t);
 917
 918out_request:
 919        idx = 0;
 920        for_each_uabi_engine(engine, i915) {
 921                if (request[idx])
 922                        i915_request_put(request[idx]);
 923                idx++;
 924        }
 925        i915_vma_unpin(batch);
 926        i915_vma_put(batch);
 927out_free:
 928        kfree(request);
 929        return err;
 930}
 931
 932static int live_sequential_engines(void *arg)
 933{
 934        struct drm_i915_private *i915 = arg;
 935        const unsigned int nengines = num_uabi_engines(i915);
 936        struct i915_request **request;
 937        struct i915_request *prev = NULL;
 938        struct intel_engine_cs *engine;
 939        struct igt_live_test t;
 940        unsigned int idx;
 941        int err;
 942
 943        /*
 944         * Check we can submit requests to all engines sequentially, such
 945         * that each successive request waits for the earlier ones. This
 946         * tests that we don't execute requests out of order, even though
 947         * they are running on independent engines.
 948         */
 949
 950        request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
 951        if (!request)
 952                return -ENOMEM;
 953
 954        err = igt_live_test_begin(&t, i915, __func__, "");
 955        if (err)
 956                goto out_free;
 957
 958        idx = 0;
 959        for_each_uabi_engine(engine, i915) {
 960                struct i915_vma *batch;
 961
 962                batch = recursive_batch(i915);
 963                if (IS_ERR(batch)) {
 964                        err = PTR_ERR(batch);
 965                        pr_err("%s: Unable to create batch for %s, err=%d\n",
 966                               __func__, engine->name, err);
 967                        goto out_free;
 968                }
 969
 970                request[idx] = intel_engine_create_kernel_request(engine);
 971                if (IS_ERR(request[idx])) {
 972                        err = PTR_ERR(request[idx]);
 973                        pr_err("%s: Request allocation failed for %s with err=%d\n",
 974                               __func__, engine->name, err);
 975                        goto out_request;
 976                }
 977
 978                if (prev) {
 979                        err = i915_request_await_dma_fence(request[idx],
 980                                                           &prev->fence);
 981                        if (err) {
 982                                i915_request_add(request[idx]);
 983                                pr_err("%s: Request await failed for %s with err=%d\n",
 984                                       __func__, engine->name, err);
 985                                goto out_request;
 986                        }
 987                }
 988
 989                err = engine->emit_bb_start(request[idx],
 990                                            batch->node.start,
 991                                            batch->node.size,
 992                                            0);
 993                GEM_BUG_ON(err);
 994                request[idx]->batch = batch;
 995
 996                i915_vma_lock(batch);
 997                err = i915_request_await_object(request[idx],
 998                                                batch->obj, false);
 999                if (err == 0)
1000                        err = i915_vma_move_to_active(batch, request[idx], 0);

1001                i915_vma_unlock(batch);
1002                GEM_BUG_ON(err);
1003
1004                i915_request_get(request[idx]);
1005                i915_request_add(request[idx]);
1006
1007                prev = request[idx];
1008                idx++;
1009        }
1010
1011        idx = 0;
1012        for_each_uabi_engine(engine, i915) {
1013                long timeout;
1014
1015                if (i915_request_completed(request[idx])) {
1016                        pr_err("%s(%s): request completed too early!\n",
1017                               __func__, engine->name);
1018                        err = -EINVAL;
1019                        goto out_request;
1020                }
1021
1022                err = recursive_batch_resolve(request[idx]->batch);
1023                if (err) {
1024                        pr_err("%s: failed to resolve batch, err=%d\n",
1025                               __func__, err);
1026                        goto out_request;
1027                }
1028
1029                timeout = i915_request_wait(request[idx], 0,
1030                                            MAX_SCHEDULE_TIMEOUT);
1031                if (timeout < 0) {
1032                        err = timeout;
1033                        pr_err("%s: error waiting for request on %s, err=%d\n",
1034                               __func__, engine->name, err);
1035                        goto out_request;
1036                }
1037
1038                GEM_BUG_ON(!i915_request_completed(request[idx]));
1039                idx++;
1040        }
1041
1042        err = igt_live_test_end(&t);
1043
1044out_request:
1045        idx = 0;
1046        for_each_uabi_engine(engine, i915) {
1047                u32 *cmd;
1048
1049                if (!request[idx])
1050                        break;
1051
1052                cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1053                                              I915_MAP_WC);
1054                if (!IS_ERR(cmd)) {
1055                        *cmd = MI_BATCH_BUFFER_END;
1056                        intel_gt_chipset_flush(engine->gt);
1057
1058                        i915_gem_object_unpin_map(request[idx]->batch->obj);
1059                }
1060
1061                i915_vma_put(request[idx]->batch);
1062                i915_request_put(request[idx]);
1063                idx++;
1064        }
1065out_free:
1066        kfree(request);
1067        return err;
1068}
1069
1070static int __live_parallel_engine1(void *arg)
1071{
1072        struct intel_engine_cs *engine = arg;
1073        IGT_TIMEOUT(end_time);
1074        unsigned long count;
1075        int err = 0;
1076
1077        count = 0;
1078        intel_engine_pm_get(engine);
1079        do {
1080                struct i915_request *rq;
1081
1082                rq = i915_request_create(engine->kernel_context);
1083                if (IS_ERR(rq)) {
1084                        err = PTR_ERR(rq);
1085                        break;
1086                }
1087
1088                i915_request_get(rq);
1089                i915_request_add(rq);
1090
1091                err = 0;
1092                if (i915_request_wait(rq, 0, HZ / 5) < 0)
1093                        err = -ETIME;
1094                i915_request_put(rq);
1095                if (err)
1096                        break;
1097
1098                count++;
1099        } while (!__igt_timeout(end_time, NULL));
1100        intel_engine_pm_put(engine);
1101
1102        pr_info("%s: %lu request + sync\n", engine->name, count);
1103        return err;
1104}
1105
1106static int __live_parallel_engineN(void *arg)
1107{
1108        struct intel_engine_cs *engine = arg;
1109        IGT_TIMEOUT(end_time);
1110        unsigned long count;
1111        int err = 0;
1112
1113        count = 0;
1114        intel_engine_pm_get(engine);
1115        do {
1116                struct i915_request *rq;
1117
1118                rq = i915_request_create(engine->kernel_context);
1119                if (IS_ERR(rq)) {
1120                        err = PTR_ERR(rq);
1121                        break;
1122                }
1123
1124                i915_request_add(rq);
1125                count++;
1126        } while (!__igt_timeout(end_time, NULL));
1127        intel_engine_pm_put(engine);
1128
1129        pr_info("%s: %lu requests\n", engine->name, count);
1130        return err;
1131}
1132
1133static bool wake_all(struct drm_i915_private *i915)
1134{
1135        if (atomic_dec_and_test(&i915->selftest.counter)) {
1136                wake_up_var(&i915->selftest.counter);
1137                return true;
1138        }
1139
1140        return false;
1141}
1142
1143static int wait_for_all(struct drm_i915_private *i915)
1144{
1145        if (wake_all(i915))
1146                return 0;
1147
1148        if (wait_var_event_timeout(&i915->selftest.counter,
1149                                   !atomic_read(&i915->selftest.counter),
1150                                   i915_selftest.timeout_jiffies))
1151                return 0;
1152
1153        return -ETIME;
1154}
1155
1156static int __live_parallel_spin(void *arg)
1157{
1158        struct intel_engine_cs *engine = arg;
1159        struct igt_spinner spin;
1160        struct i915_request *rq;
1161        int err = 0;
1162
1163        /*
1164         * Create a spinner running for eternity on each engine. If a second
1165         * spinner is incorrectly placed on the same engine, it will not be
1166         * able to start in time.
1167         */
1168
1169        if (igt_spinner_init(&spin, engine->gt)) {
1170                wake_all(engine->i915);
1171                return -ENOMEM;
1172        }
1173
1174        intel_engine_pm_get(engine);
1175        rq = igt_spinner_create_request(&spin,
1176                                        engine->kernel_context,
1177                                        MI_NOOP); /* no preemption */
1178        intel_engine_pm_put(engine);
1179        if (IS_ERR(rq)) {
1180                err = PTR_ERR(rq);
1181                if (err == -ENODEV)
1182                        err = 0;
1183                wake_all(engine->i915);
1184                goto out_spin;
1185        }
1186
1187        i915_request_get(rq);
1188        i915_request_add(rq);
1189        if (igt_wait_for_spinner(&spin, rq)) {
1190                /* Occupy this engine for the whole test */
1191                err = wait_for_all(engine->i915);
1192        } else {
1193                pr_err("Failed to start spinner on %s\n", engine->name);
1194                err = -EINVAL;
1195        }
1196        igt_spinner_end(&spin);
1197
1198        if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
1199                err = -EIO;
1200        i915_request_put(rq);
1201
1202out_spin:
1203        igt_spinner_fini(&spin);
1204        return err;
1205}
1206
1207static int live_parallel_engines(void *arg)
1208{
1209        struct drm_i915_private *i915 = arg;
1210        static int (* const func[])(void *arg) = {
1211                __live_parallel_engine1,
1212                __live_parallel_engineN,
1213                __live_parallel_spin,
1214                NULL,
1215        };
1216        const unsigned int nengines = num_uabi_engines(i915);
1217        struct intel_engine_cs *engine;
1218        int (* const *fn)(void *arg);
1219        struct task_struct **tsk;
1220        int err = 0;
1221
1222        /*
1223         * Check we can submit requests to all engines concurrently. This
1224         * tests that we load up the system maximally.
1225         */
1226
1227        tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1228        if (!tsk)
1229                return -ENOMEM;
1230
1231        for (fn = func; !err && *fn; fn++) {
1232                char name[KSYM_NAME_LEN];
1233                struct igt_live_test t;
1234                unsigned int idx;
1235
1236                snprintf(name, sizeof(name), "%pS", fn);
1237                err = igt_live_test_begin(&t, i915, __func__, name);
1238                if (err)
1239                        break;
1240
1241                atomic_set(&i915->selftest.counter, nengines);
1242
1243                idx = 0;
1244                for_each_uabi_engine(engine, i915) {
1245                        tsk[idx] = kthread_run(*fn, engine,
1246                                               "igt/parallel:%s",
1247                                               engine->name);
1248                        if (IS_ERR(tsk[idx])) {
1249                                err = PTR_ERR(tsk[idx]);
1250                                break;
1251                        }
1252                        get_task_struct(tsk[idx++]);
1253                }
1254
1255                yield(); /* start all threads before we kthread_stop() */
1256
1257                idx = 0;
1258                for_each_uabi_engine(engine, i915) {
1259                        int status;
1260
1261                        if (IS_ERR(tsk[idx]))
1262                                break;
1263
1264                        status = kthread_stop(tsk[idx]);
1265                        if (status && !err)
1266                                err = status;
1267
1268                        put_task_struct(tsk[idx++]);
1269                }
1270
1271                if (igt_live_test_end(&t))
1272                        err = -EIO;
1273        }
1274
1275        kfree(tsk);
1276        return err;
1277}
1278
1279static int
1280max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1281{
1282        struct i915_request *rq;
1283        int ret;
1284
1285        /*
1286         * Before execlists, all contexts share the same ringbuffer. With
1287         * execlists, each context/engine has a separate ringbuffer and
1288         * for the purposes of this test, inexhaustible.
1289         *
1290         * For the global ringbuffer though, we have to be very careful
1291         * that we do not wrap while preventing the execution of requests
1292         * with a unsignaled fence.
1293         */
1294        if (HAS_EXECLISTS(ctx->i915))
1295                return INT_MAX;
1296
1297        rq = igt_request_alloc(ctx, engine);
1298        if (IS_ERR(rq)) {
1299                ret = PTR_ERR(rq);
1300        } else {
1301                int sz;
1302
1303                ret = rq->ring->size - rq->reserved_space;
1304                i915_request_add(rq);
1305
1306                sz = rq->ring->emit - rq->head;
1307                if (sz < 0)
1308                        sz += rq->ring->size;
1309                ret /= sz;
1310                ret /= 2; /* leave half spare, in case of emergency! */
1311        }
1312
1313        return ret;
1314}
1315
1316static int live_breadcrumbs_smoketest(void *arg)
1317{
1318        struct drm_i915_private *i915 = arg;
1319        const unsigned int nengines = num_uabi_engines(i915);
1320        const unsigned int ncpus = num_online_cpus();
1321        unsigned long num_waits, num_fences;
1322        struct intel_engine_cs *engine;
1323        struct task_struct **threads;
1324        struct igt_live_test live;
1325        intel_wakeref_t wakeref;
1326        struct smoketest *smoke;
1327        unsigned int n, idx;
1328        struct file *file;
1329        int ret = 0;
1330
1331        /*
1332         * Smoketest our breadcrumb/signal handling for requests across multiple
1333         * threads. A very simple test to only catch the most egregious of bugs.
1334         * See __igt_breadcrumbs_smoketest();
1335         *
1336         * On real hardware this time.
1337         */
1338
1339        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1340
1341        file = mock_file(i915);
1342        if (IS_ERR(file)) {
1343                ret = PTR_ERR(file);
1344                goto out_rpm;
1345        }
1346
1347        smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1348        if (!smoke) {
1349                ret = -ENOMEM;
1350                goto out_file;
1351        }
1352
1353        threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1354        if (!threads) {
1355                ret = -ENOMEM;
1356                goto out_smoke;
1357        }
1358
1359        smoke[0].request_alloc = __live_request_alloc;
1360        smoke[0].ncontexts = 64;
1361        smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1362                                    sizeof(*smoke[0].contexts),
1363                                    GFP_KERNEL);
1364        if (!smoke[0].contexts) {
1365                ret = -ENOMEM;
1366                goto out_threads;
1367        }
1368
1369        for (n = 0; n < smoke[0].ncontexts; n++) {
1370                smoke[0].contexts[n] = live_context(i915, file);
1371                if (!smoke[0].contexts[n]) {
1372                        ret = -ENOMEM;
1373                        goto out_contexts;
1374                }
1375        }
1376
1377        ret = igt_live_test_begin(&live, i915, __func__, "");
1378        if (ret)
1379                goto out_contexts;
1380
1381        idx = 0;
1382        for_each_uabi_engine(engine, i915) {
1383                smoke[idx] = smoke[0];
1384                smoke[idx].engine = engine;
1385                smoke[idx].max_batch =
1386                        max_batches(smoke[0].contexts[0], engine);
1387                if (smoke[idx].max_batch < 0) {
1388                        ret = smoke[idx].max_batch;
1389                        goto out_flush;
1390                }
1391                /* One ring interleaved between requests from all cpus */
1392                smoke[idx].max_batch /= num_online_cpus() + 1;
1393                pr_debug("Limiting batches to %d requests on %s\n",
1394                         smoke[idx].max_batch, engine->name);
1395
1396                for (n = 0; n < ncpus; n++) {
1397                        struct task_struct *tsk;
1398
1399                        tsk = kthread_run(__igt_breadcrumbs_smoketest,
1400                                          &smoke[idx], "igt/%d.%d", idx, n);
1401                        if (IS_ERR(tsk)) {
1402                                ret = PTR_ERR(tsk);
1403                                goto out_flush;
1404                        }
1405
1406                        get_task_struct(tsk);
1407                        threads[idx * ncpus + n] = tsk;
1408                }
1409
1410                idx++;
1411        }
1412
1413        yield(); /* start all threads before we begin */
1414        msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1415
1416out_flush:
1417        idx = 0;
1418        num_waits = 0;
1419        num_fences = 0;
1420        for_each_uabi_engine(engine, i915) {
1421                for (n = 0; n < ncpus; n++) {
1422                        struct task_struct *tsk = threads[idx * ncpus + n];
1423                        int err;
1424
1425                        if (!tsk)
1426                                continue;
1427
1428                        err = kthread_stop(tsk);
1429                        if (err < 0 && !ret)
1430                                ret = err;
1431
1432                        put_task_struct(tsk);
1433                }
1434
1435                num_waits += atomic_long_read(&smoke[idx].num_waits);
1436                num_fences += atomic_long_read(&smoke[idx].num_fences);
1437                idx++;
1438        }
1439        pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1440                num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1441
1442        ret = igt_live_test_end(&live) ?: ret;
1443out_contexts:
1444        kfree(smoke[0].contexts);
1445out_threads:
1446        kfree(threads);
1447out_smoke:
1448        kfree(smoke);
1449out_file:
1450        fput(file);
1451out_rpm:
1452        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1453
1454        return ret;
1455}
1456
1457int i915_request_live_selftests(struct drm_i915_private *i915)
1458{
1459        static const struct i915_subtest tests[] = {
1460                SUBTEST(live_nop_request),
1461                SUBTEST(live_all_engines),
1462                SUBTEST(live_sequential_engines),
1463                SUBTEST(live_parallel_engines),
1464                SUBTEST(live_empty_request),
1465                SUBTEST(live_breadcrumbs_smoketest),
1466        };
1467
1468        if (intel_gt_is_wedged(&i915->gt))
1469                return 0;
1470
1471        return i915_subtests(tests, i915);
1472}
1473