linux/drivers/gpu/drm/i915/gt/selftest_timeline.c
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2017-2018 Intel Corporation
   5 */
   6
   7#include <linux/prime_numbers.h>
   8
   9#include "intel_engine_pm.h"
  10#include "intel_gt.h"
  11#include "intel_gt_requests.h"
  12#include "intel_ring.h"
  13
  14#include "../selftests/i915_random.h"
  15#include "../i915_selftest.h"
  16
  17#include "../selftests/igt_flush_test.h"
  18#include "../selftests/mock_gem_device.h"
  19#include "selftests/mock_timeline.h"
  20
  21static struct page *hwsp_page(struct intel_timeline *tl)
  22{
  23        struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  24
  25        GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  26        return sg_page(obj->mm.pages->sgl);
  27}
  28
  29static unsigned long hwsp_cacheline(struct intel_timeline *tl)
  30{
  31        unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  32
  33        return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  34}
  35
  36#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  37
  38struct mock_hwsp_freelist {
  39        struct intel_gt *gt;
  40        struct radix_tree_root cachelines;
  41        struct intel_timeline **history;
  42        unsigned long count, max;
  43        struct rnd_state prng;
  44};
  45
  46enum {
  47        SHUFFLE = BIT(0),
  48};
  49
  50static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  51                               unsigned int idx,
  52                               struct intel_timeline *tl)
  53{
  54        tl = xchg(&state->history[idx], tl);
  55        if (tl) {
  56                radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  57                intel_timeline_put(tl);
  58        }
  59}
  60
  61static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  62                                unsigned int count,
  63                                unsigned int flags)
  64{
  65        struct intel_timeline *tl;
  66        unsigned int idx;
  67
  68        while (count--) {
  69                unsigned long cacheline;
  70                int err;
  71
  72                tl = intel_timeline_create(state->gt, NULL);
  73                if (IS_ERR(tl))
  74                        return PTR_ERR(tl);
  75
  76                cacheline = hwsp_cacheline(tl);
  77                err = radix_tree_insert(&state->cachelines, cacheline, tl);
  78                if (err) {
  79                        if (err == -EEXIST) {
  80                                pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  81                                       cacheline);
  82                        }
  83                        intel_timeline_put(tl);
  84                        return err;
  85                }
  86
  87                idx = state->count++ % state->max;
  88                __mock_hwsp_record(state, idx, tl);
  89        }
  90
  91        if (flags & SHUFFLE)
  92                i915_prandom_shuffle(state->history,
  93                                     sizeof(*state->history),
  94                                     min(state->count, state->max),
  95                                     &state->prng);
  96
  97        count = i915_prandom_u32_max_state(min(state->count, state->max),
  98                                           &state->prng);
  99        while (count--) {
 100                idx = --state->count % state->max;
 101                __mock_hwsp_record(state, idx, NULL);
 102        }
 103
 104        return 0;
 105}
 106
 107static int mock_hwsp_freelist(void *arg)
 108{
 109        struct mock_hwsp_freelist state;
 110        struct drm_i915_private *i915;
 111        const struct {
 112                const char *name;
 113                unsigned int flags;
 114        } phases[] = {
 115                { "linear", 0 },
 116                { "shuffled", SHUFFLE },
 117                { },
 118        }, *p;
 119        unsigned int na;
 120        int err = 0;
 121
 122        i915 = mock_gem_device();
 123        if (!i915)
 124                return -ENOMEM;
 125
 126        INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 127        state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 128
 129        state.gt = &i915->gt;
 130
 131        /*
 132         * Create a bunch of timelines and check that their HWSP do not overlap.
 133         * Free some, and try again.
 134         */
 135
 136        state.max = PAGE_SIZE / sizeof(*state.history);
 137        state.count = 0;
 138        state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 139        if (!state.history) {
 140                err = -ENOMEM;
 141                goto err_put;
 142        }
 143
 144        for (p = phases; p->name; p++) {
 145                pr_debug("%s(%s)\n", __func__, p->name);
 146                for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 147                        err = __mock_hwsp_timeline(&state, na, p->flags);
 148                        if (err)
 149                                goto out;
 150                }
 151        }
 152
 153out:
 154        for (na = 0; na < state.max; na++)
 155                __mock_hwsp_record(&state, na, NULL);
 156        kfree(state.history);
 157err_put:
 158        drm_dev_put(&i915->drm);
 159        return err;
 160}
 161
 162struct __igt_sync {
 163        const char *name;
 164        u32 seqno;
 165        bool expected;
 166        bool set;
 167};
 168
 169static int __igt_sync(struct intel_timeline *tl,
 170                      u64 ctx,
 171                      const struct __igt_sync *p,
 172                      const char *name)
 173{
 174        int ret;
 175
 176        if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 177                pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 178                       name, p->name, ctx, p->seqno, yesno(p->expected));
 179                return -EINVAL;
 180        }
 181
 182        if (p->set) {
 183                ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
 184                if (ret)
 185                        return ret;
 186        }
 187
 188        return 0;
 189}
 190
 191static int igt_sync(void *arg)
 192{
 193        const struct __igt_sync pass[] = {
 194                { "unset", 0, false, false },
 195                { "new", 0, false, true },
 196                { "0a", 0, true, true },
 197                { "1a", 1, false, true },
 198                { "1b", 1, true, true },
 199                { "0b", 0, true, false },
 200                { "2a", 2, false, true },
 201                { "4", 4, false, true },
 202                { "INT_MAX", INT_MAX, false, true },
 203                { "INT_MAX-1", INT_MAX-1, true, false },
 204                { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 205                { "INT_MAX", INT_MAX, true, false },
 206                { "UINT_MAX", UINT_MAX, false, true },
 207                { "wrap", 0, false, true },
 208                { "unwrap", UINT_MAX, true, false },
 209                {},
 210        }, *p;
 211        struct intel_timeline tl;
 212        int order, offset;
 213        int ret = -ENODEV;
 214
 215        mock_timeline_init(&tl, 0);
 216        for (p = pass; p->name; p++) {
 217                for (order = 1; order < 64; order++) {
 218                        for (offset = -1; offset <= (order > 1); offset++) {
 219                                u64 ctx = BIT_ULL(order) + offset;
 220
 221                                ret = __igt_sync(&tl, ctx, p, "1");
 222                                if (ret)
 223                                        goto out;
 224                        }
 225                }
 226        }
 227        mock_timeline_fini(&tl);
 228
 229        mock_timeline_init(&tl, 0);
 230        for (order = 1; order < 64; order++) {
 231                for (offset = -1; offset <= (order > 1); offset++) {
 232                        u64 ctx = BIT_ULL(order) + offset;
 233
 234                        for (p = pass; p->name; p++) {
 235                                ret = __igt_sync(&tl, ctx, p, "2");
 236                                if (ret)
 237                                        goto out;
 238                        }
 239                }
 240        }
 241
 242out:
 243        mock_timeline_fini(&tl);
 244        return ret;
 245}
 246
 247static unsigned int random_engine(struct rnd_state *rnd)
 248{
 249        return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 250}
 251
 252static int bench_sync(void *arg)
 253{
 254        struct rnd_state prng;
 255        struct intel_timeline tl;
 256        unsigned long end_time, count;
 257        u64 prng32_1M;
 258        ktime_t kt;
 259        int order, last_order;
 260
 261        mock_timeline_init(&tl, 0);
 262
 263        /* Lookups from cache are very fast and so the random number generation
 264         * and the loop itself becomes a significant factor in the per-iteration
 265         * timings. We try to compensate the results by measuring the overhead
 266         * of the prng and subtract it from the reported results.
 267         */
 268        prandom_seed_state(&prng, i915_selftest.random_seed);
 269        count = 0;
 270        kt = ktime_get();
 271        end_time = jiffies + HZ/10;
 272        do {
 273                u32 x;
 274
 275                /* Make sure the compiler doesn't optimise away the prng call */
 276                WRITE_ONCE(x, prandom_u32_state(&prng));
 277
 278                count++;
 279        } while (!time_after(jiffies, end_time));
 280        kt = ktime_sub(ktime_get(), kt);
 281        pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 282                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 283        prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 284
 285        /* Benchmark (only) setting random context ids */
 286        prandom_seed_state(&prng, i915_selftest.random_seed);
 287        count = 0;
 288        kt = ktime_get();
 289        end_time = jiffies + HZ/10;
 290        do {
 291                u64 id = i915_prandom_u64_state(&prng);
 292
 293                __intel_timeline_sync_set(&tl, id, 0);
 294                count++;
 295        } while (!time_after(jiffies, end_time));
 296        kt = ktime_sub(ktime_get(), kt);
 297        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 298        pr_info("%s: %lu random insertions, %lluns/insert\n",
 299                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 300
 301        /* Benchmark looking up the exact same context ids as we just set */
 302        prandom_seed_state(&prng, i915_selftest.random_seed);
 303        end_time = count;
 304        kt = ktime_get();
 305        while (end_time--) {
 306                u64 id = i915_prandom_u64_state(&prng);
 307
 308                if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
 309                        mock_timeline_fini(&tl);
 310                        pr_err("Lookup of %llu failed\n", id);
 311                        return -EINVAL;
 312                }
 313        }
 314        kt = ktime_sub(ktime_get(), kt);
 315        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 316        pr_info("%s: %lu random lookups, %lluns/lookup\n",
 317                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 318
 319        mock_timeline_fini(&tl);
 320        cond_resched();
 321
 322        mock_timeline_init(&tl, 0);
 323
 324        /* Benchmark setting the first N (in order) contexts */
 325        count = 0;
 326        kt = ktime_get();
 327        end_time = jiffies + HZ/10;
 328        do {
 329                __intel_timeline_sync_set(&tl, count++, 0);
 330        } while (!time_after(jiffies, end_time));
 331        kt = ktime_sub(ktime_get(), kt);
 332        pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 333                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 334
 335        /* Benchmark looking up the exact same context ids as we just set */
 336        end_time = count;
 337        kt = ktime_get();
 338        while (end_time--) {
 339                if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
 340                        pr_err("Lookup of %lu failed\n", end_time);
 341                        mock_timeline_fini(&tl);
 342                        return -EINVAL;
 343                }
 344        }
 345        kt = ktime_sub(ktime_get(), kt);
 346        pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 347                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 348
 349        mock_timeline_fini(&tl);
 350        cond_resched();
 351
 352        mock_timeline_init(&tl, 0);
 353
 354        /* Benchmark searching for a random context id and maybe changing it */
 355        prandom_seed_state(&prng, i915_selftest.random_seed);
 356        count = 0;
 357        kt = ktime_get();
 358        end_time = jiffies + HZ/10;
 359        do {
 360                u32 id = random_engine(&prng);
 361                u32 seqno = prandom_u32_state(&prng);
 362
 363                if (!__intel_timeline_sync_is_later(&tl, id, seqno))
 364                        __intel_timeline_sync_set(&tl, id, seqno);
 365
 366                count++;
 367        } while (!time_after(jiffies, end_time));
 368        kt = ktime_sub(ktime_get(), kt);
 369        kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 370        pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 371                __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 372        mock_timeline_fini(&tl);
 373        cond_resched();
 374
 375        /* Benchmark searching for a known context id and changing the seqno */
 376        for (last_order = 1, order = 1; order < 32;
 377             ({ int tmp = last_order; last_order = order; order += tmp; })) {
 378                unsigned int mask = BIT(order) - 1;
 379
 380                mock_timeline_init(&tl, 0);
 381
 382                count = 0;
 383                kt = ktime_get();
 384                end_time = jiffies + HZ/10;
 385                do {
 386                        /* Without assuming too many details of the underlying
 387                         * implementation, try to identify its phase-changes
 388                         * (if any)!
 389                         */
 390                        u64 id = (u64)(count & mask) << order;
 391
 392                        __intel_timeline_sync_is_later(&tl, id, 0);
 393                        __intel_timeline_sync_set(&tl, id, 0);
 394
 395                        count++;
 396                } while (!time_after(jiffies, end_time));
 397                kt = ktime_sub(ktime_get(), kt);
 398                pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 399                        __func__, count, order,
 400                        (long long)div64_ul(ktime_to_ns(kt), count));
 401                mock_timeline_fini(&tl);
 402                cond_resched();
 403        }
 404
 405        return 0;
 406}
 407
 408int intel_timeline_mock_selftests(void)
 409{
 410        static const struct i915_subtest tests[] = {
 411                SUBTEST(mock_hwsp_freelist),
 412                SUBTEST(igt_sync),
 413                SUBTEST(bench_sync),
 414        };
 415
 416        return i915_subtests(tests, NULL);
 417}
 418
 419static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 420{
 421        u32 *cs;
 422
 423        cs = intel_ring_begin(rq, 4);
 424        if (IS_ERR(cs))
 425                return PTR_ERR(cs);
 426
 427        if (INTEL_GEN(rq->i915) >= 8) {
 428                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 429                *cs++ = addr;
 430                *cs++ = 0;
 431                *cs++ = value;
 432        } else if (INTEL_GEN(rq->i915) >= 4) {
 433                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 434                *cs++ = 0;
 435                *cs++ = addr;
 436                *cs++ = value;
 437        } else {
 438                *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 439                *cs++ = addr;
 440                *cs++ = value;
 441                *cs++ = MI_NOOP;
 442        }
 443
 444        intel_ring_advance(rq, cs);
 445
 446        return 0;
 447}
 448
 449static struct i915_request *
 450tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 451{
 452        struct i915_request *rq;
 453        int err;
 454
 455        err = intel_timeline_pin(tl);
 456        if (err) {
 457                rq = ERR_PTR(err);
 458                goto out;
 459        }
 460
 461        rq = intel_engine_create_kernel_request(engine);
 462        if (IS_ERR(rq))
 463                goto out_unpin;
 464
 465        i915_request_get(rq);
 466
 467        err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 468        i915_request_add(rq);
 469        if (err) {
 470                i915_request_put(rq);
 471                rq = ERR_PTR(err);
 472        }
 473
 474out_unpin:
 475        intel_timeline_unpin(tl);
 476out:
 477        if (IS_ERR(rq))
 478                pr_err("Failed to write to timeline!\n");
 479        return rq;
 480}
 481
 482static struct intel_timeline *
 483checked_intel_timeline_create(struct intel_gt *gt)
 484{
 485        struct intel_timeline *tl;
 486
 487        tl = intel_timeline_create(gt, NULL);
 488        if (IS_ERR(tl))
 489                return tl;
 490
 491        if (*tl->hwsp_seqno != tl->seqno) {
 492                pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 493                       *tl->hwsp_seqno, tl->seqno);
 494                intel_timeline_put(tl);
 495                return ERR_PTR(-EINVAL);
 496        }
 497
 498        return tl;
 499}
 500
 501static int live_hwsp_engine(void *arg)
 502{
 503#define NUM_TIMELINES 4096
 504        struct intel_gt *gt = arg;
 505        struct intel_timeline **timelines;
 506        struct intel_engine_cs *engine;
 507        enum intel_engine_id id;
 508        unsigned long count, n;
 509        int err = 0;
 510
 511        /*
 512         * Create a bunch of timelines and check we can write
 513         * independently to each of their breadcrumb slots.
 514         */
 515
 516        timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 517                                   sizeof(*timelines),
 518                                   GFP_KERNEL);
 519        if (!timelines)
 520                return -ENOMEM;
 521
 522        count = 0;
 523        for_each_engine(engine, gt, id) {
 524                if (!intel_engine_can_store_dword(engine))
 525                        continue;
 526
 527                intel_engine_pm_get(engine);
 528
 529                for (n = 0; n < NUM_TIMELINES; n++) {
 530                        struct intel_timeline *tl;
 531                        struct i915_request *rq;
 532
 533                        tl = checked_intel_timeline_create(gt);
 534                        if (IS_ERR(tl)) {
 535                                err = PTR_ERR(tl);
 536                                break;
 537                        }
 538
 539                        rq = tl_write(tl, engine, count);
 540                        if (IS_ERR(rq)) {
 541                                intel_timeline_put(tl);
 542                                err = PTR_ERR(rq);
 543                                break;
 544                        }
 545
 546                        timelines[count++] = tl;
 547                        i915_request_put(rq);
 548                }
 549
 550                intel_engine_pm_put(engine);
 551                if (err)
 552                        break;
 553        }
 554
 555        if (igt_flush_test(gt->i915))
 556                err = -EIO;
 557
 558        for (n = 0; n < count; n++) {
 559                struct intel_timeline *tl = timelines[n];
 560
 561                if (!err && *tl->hwsp_seqno != n) {
 562                        pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 563                               n, *tl->hwsp_seqno);
 564                        err = -EINVAL;
 565                }
 566                intel_timeline_put(tl);
 567        }
 568
 569        kvfree(timelines);
 570        return err;
 571#undef NUM_TIMELINES
 572}
 573
 574static int live_hwsp_alternate(void *arg)
 575{
 576#define NUM_TIMELINES 4096
 577        struct intel_gt *gt = arg;
 578        struct intel_timeline **timelines;
 579        struct intel_engine_cs *engine;
 580        enum intel_engine_id id;
 581        unsigned long count, n;
 582        int err = 0;
 583
 584        /*
 585         * Create a bunch of timelines and check we can write
 586         * independently to each of their breadcrumb slots with adjacent
 587         * engines.
 588         */
 589
 590        timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 591                                   sizeof(*timelines),
 592                                   GFP_KERNEL);
 593        if (!timelines)
 594                return -ENOMEM;
 595
 596        count = 0;
 597        for (n = 0; n < NUM_TIMELINES; n++) {
 598                for_each_engine(engine, gt, id) {
 599                        struct intel_timeline *tl;
 600                        struct i915_request *rq;
 601
 602                        if (!intel_engine_can_store_dword(engine))
 603                                continue;
 604
 605                        tl = checked_intel_timeline_create(gt);
 606                        if (IS_ERR(tl)) {
 607                                intel_engine_pm_put(engine);
 608                                err = PTR_ERR(tl);
 609                                goto out;
 610                        }
 611
 612                        intel_engine_pm_get(engine);
 613                        rq = tl_write(tl, engine, count);
 614                        intel_engine_pm_put(engine);
 615                        if (IS_ERR(rq)) {
 616                                intel_timeline_put(tl);
 617                                err = PTR_ERR(rq);
 618                                goto out;
 619                        }
 620
 621                        timelines[count++] = tl;
 622                        i915_request_put(rq);
 623                }
 624        }
 625
 626out:
 627        if (igt_flush_test(gt->i915))
 628                err = -EIO;
 629
 630        for (n = 0; n < count; n++) {
 631                struct intel_timeline *tl = timelines[n];
 632
 633                if (!err && *tl->hwsp_seqno != n) {
 634                        pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 635                               n, *tl->hwsp_seqno);
 636                        err = -EINVAL;
 637                }
 638                intel_timeline_put(tl);
 639        }
 640
 641        kvfree(timelines);
 642        return err;
 643#undef NUM_TIMELINES
 644}
 645
 646static int live_hwsp_wrap(void *arg)
 647{
 648        struct intel_gt *gt = arg;
 649        struct intel_engine_cs *engine;
 650        struct intel_timeline *tl;
 651        enum intel_engine_id id;
 652        int err = 0;
 653
 654        /*
 655         * Across a seqno wrap, we need to keep the old cacheline alive for
 656         * foreign GPU references.
 657         */
 658
 659        tl = intel_timeline_create(gt, NULL);
 660        if (IS_ERR(tl))
 661                return PTR_ERR(tl);
 662
 663        if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 664                goto out_free;
 665
 666        err = intel_timeline_pin(tl);
 667        if (err)
 668                goto out_free;
 669
 670        for_each_engine(engine, gt, id) {
 671                const u32 *hwsp_seqno[2];
 672                struct i915_request *rq;
 673                u32 seqno[2];
 674
 675                if (!intel_engine_can_store_dword(engine))
 676                        continue;
 677
 678                rq = intel_engine_create_kernel_request(engine);
 679                if (IS_ERR(rq)) {
 680                        err = PTR_ERR(rq);
 681                        goto out;
 682                }
 683
 684                tl->seqno = -4u;
 685
 686                mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 687                err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
 688                mutex_unlock(&tl->mutex);
 689                if (err) {
 690                        i915_request_add(rq);
 691                        goto out;
 692                }
 693                pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 694                         seqno[0], tl->hwsp_offset);
 695
 696                err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 697                if (err) {
 698                        i915_request_add(rq);
 699                        goto out;
 700                }
 701                hwsp_seqno[0] = tl->hwsp_seqno;
 702
 703                mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 704                err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
 705                mutex_unlock(&tl->mutex);
 706                if (err) {
 707                        i915_request_add(rq);
 708                        goto out;
 709                }
 710                pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 711                         seqno[1], tl->hwsp_offset);
 712
 713                err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 714                if (err) {
 715                        i915_request_add(rq);
 716                        goto out;
 717                }
 718                hwsp_seqno[1] = tl->hwsp_seqno;
 719
 720                /* With wrap should come a new hwsp */
 721                GEM_BUG_ON(seqno[1] >= seqno[0]);
 722                GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 723
 724                i915_request_add(rq);
 725
 726                if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 727                        pr_err("Wait for timeline writes timed out!\n");
 728                        err = -EIO;
 729                        goto out;
 730                }
 731
 732                if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
 733                        pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 734                               *hwsp_seqno[0], *hwsp_seqno[1],
 735                               seqno[0], seqno[1]);
 736                        err = -EINVAL;
 737                        goto out;
 738                }
 739
 740                intel_gt_retire_requests(gt); /* recycle HWSP */
 741        }
 742
 743out:
 744        if (igt_flush_test(gt->i915))
 745                err = -EIO;
 746
 747        intel_timeline_unpin(tl);
 748out_free:
 749        intel_timeline_put(tl);
 750        return err;
 751}
 752
 753static int live_hwsp_recycle(void *arg)
 754{
 755        struct intel_gt *gt = arg;
 756        struct intel_engine_cs *engine;
 757        enum intel_engine_id id;
 758        unsigned long count;
 759        int err = 0;
 760
 761        /*
 762         * Check seqno writes into one timeline at a time. We expect to
 763         * recycle the breadcrumb slot between iterations and neither
 764         * want to confuse ourselves or the GPU.
 765         */
 766
 767        count = 0;
 768        for_each_engine(engine, gt, id) {
 769                IGT_TIMEOUT(end_time);
 770
 771                if (!intel_engine_can_store_dword(engine))
 772                        continue;
 773
 774                intel_engine_pm_get(engine);
 775
 776                do {
 777                        struct intel_timeline *tl;
 778                        struct i915_request *rq;
 779
 780                        tl = checked_intel_timeline_create(gt);
 781                        if (IS_ERR(tl)) {
 782                                err = PTR_ERR(tl);
 783                                break;
 784                        }
 785
 786                        rq = tl_write(tl, engine, count);
 787                        if (IS_ERR(rq)) {
 788                                intel_timeline_put(tl);
 789                                err = PTR_ERR(rq);
 790                                break;
 791                        }
 792
 793                        if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 794                                pr_err("Wait for timeline writes timed out!\n");
 795                                i915_request_put(rq);
 796                                intel_timeline_put(tl);
 797                                err = -EIO;
 798                                break;
 799                        }
 800
 801                        if (*tl->hwsp_seqno != count) {
 802                                pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
 803                                       count, *tl->hwsp_seqno);
 804                                err = -EINVAL;
 805                        }
 806
 807                        i915_request_put(rq);
 808                        intel_timeline_put(tl);
 809                        count++;
 810
 811                        if (err)
 812                                break;
 813                } while (!__igt_timeout(end_time, NULL));
 814
 815                intel_engine_pm_put(engine);
 816                if (err)
 817                        break;
 818        }
 819
 820        return err;
 821}
 822
 823int intel_timeline_live_selftests(struct drm_i915_private *i915)
 824{
 825        static const struct i915_subtest tests[] = {
 826                SUBTEST(live_hwsp_recycle),
 827                SUBTEST(live_hwsp_engine),
 828                SUBTEST(live_hwsp_alternate),
 829                SUBTEST(live_hwsp_wrap),
 830        };
 831
 832        if (intel_gt_is_wedged(&i915->gt))
 833                return 0;
 834
 835        return intel_gt_live_subtests(tests, &i915->gt);
 836}
 837