1
2
3
4
5
6#include <linux/prime_numbers.h>
7
8#include "intel_context.h"
9#include "intel_engine_heartbeat.h"
10#include "intel_engine_pm.h"
11#include "intel_gpu_commands.h"
12#include "intel_gt.h"
13#include "intel_gt_requests.h"
14#include "intel_ring.h"
15#include "selftest_engine_heartbeat.h"
16
17#include "../selftests/i915_random.h"
18#include "../i915_selftest.h"
19
20#include "selftests/igt_flush_test.h"
21#include "selftests/lib_sw_fence.h"
22#include "selftests/mock_gem_device.h"
23#include "selftests/mock_timeline.h"
24
25static struct page *hwsp_page(struct intel_timeline *tl)
26{
27 struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
28
29 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
30 return sg_page(obj->mm.pages->sgl);
31}
32
33static unsigned long hwsp_cacheline(struct intel_timeline *tl)
34{
35 unsigned long address = (unsigned long)page_address(hwsp_page(tl));
36
37 return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES;
38}
39
40static int selftest_tl_pin(struct intel_timeline *tl)
41{
42 struct i915_gem_ww_ctx ww;
43 int err;
44
45 i915_gem_ww_ctx_init(&ww, false);
46retry:
47 err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww);
48 if (!err)
49 err = intel_timeline_pin(tl, &ww);
50
51 if (err == -EDEADLK) {
52 err = i915_gem_ww_ctx_backoff(&ww);
53 if (!err)
54 goto retry;
55 }
56 i915_gem_ww_ctx_fini(&ww);
57 return err;
58}
59
60
61#define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2)
62
63struct mock_hwsp_freelist {
64 struct intel_gt *gt;
65 struct radix_tree_root cachelines;
66 struct intel_timeline **history;
67 unsigned long count, max;
68 struct rnd_state prng;
69};
70
71enum {
72 SHUFFLE = BIT(0),
73};
74
75static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
76 unsigned int idx,
77 struct intel_timeline *tl)
78{
79 tl = xchg(&state->history[idx], tl);
80 if (tl) {
81 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
82 intel_timeline_unpin(tl);
83 intel_timeline_put(tl);
84 }
85}
86
87static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
88 unsigned int count,
89 unsigned int flags)
90{
91 struct intel_timeline *tl;
92 unsigned int idx;
93
94 while (count--) {
95 unsigned long cacheline;
96 int err;
97
98 tl = intel_timeline_create(state->gt);
99 if (IS_ERR(tl))
100 return PTR_ERR(tl);
101
102 err = selftest_tl_pin(tl);
103 if (err) {
104 intel_timeline_put(tl);
105 return err;
106 }
107
108 cacheline = hwsp_cacheline(tl);
109 err = radix_tree_insert(&state->cachelines, cacheline, tl);
110 if (err) {
111 if (err == -EEXIST) {
112 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
113 cacheline);
114 }
115 intel_timeline_unpin(tl);
116 intel_timeline_put(tl);
117 return err;
118 }
119
120 idx = state->count++ % state->max;
121 __mock_hwsp_record(state, idx, tl);
122 }
123
124 if (flags & SHUFFLE)
125 i915_prandom_shuffle(state->history,
126 sizeof(*state->history),
127 min(state->count, state->max),
128 &state->prng);
129
130 count = i915_prandom_u32_max_state(min(state->count, state->max),
131 &state->prng);
132 while (count--) {
133 idx = --state->count % state->max;
134 __mock_hwsp_record(state, idx, NULL);
135 }
136
137 return 0;
138}
139
140static int mock_hwsp_freelist(void *arg)
141{
142 struct mock_hwsp_freelist state;
143 struct drm_i915_private *i915;
144 const struct {
145 const char *name;
146 unsigned int flags;
147 } phases[] = {
148 { "linear", 0 },
149 { "shuffled", SHUFFLE },
150 { },
151 }, *p;
152 unsigned int na;
153 int err = 0;
154
155 i915 = mock_gem_device();
156 if (!i915)
157 return -ENOMEM;
158
159 INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
160 state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
161
162 state.gt = &i915->gt;
163
164
165
166
167
168
169 state.max = PAGE_SIZE / sizeof(*state.history);
170 state.count = 0;
171 state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
172 if (!state.history) {
173 err = -ENOMEM;
174 goto err_put;
175 }
176
177 for (p = phases; p->name; p++) {
178 pr_debug("%s(%s)\n", __func__, p->name);
179 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
180 err = __mock_hwsp_timeline(&state, na, p->flags);
181 if (err)
182 goto out;
183 }
184 }
185
186out:
187 for (na = 0; na < state.max; na++)
188 __mock_hwsp_record(&state, na, NULL);
189 kfree(state.history);
190err_put:
191 mock_destroy_device(i915);
192 return err;
193}
194
195struct __igt_sync {
196 const char *name;
197 u32 seqno;
198 bool expected;
199 bool set;
200};
201
202static int __igt_sync(struct intel_timeline *tl,
203 u64 ctx,
204 const struct __igt_sync *p,
205 const char *name)
206{
207 int ret;
208
209 if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
210 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
211 name, p->name, ctx, p->seqno, yesno(p->expected));
212 return -EINVAL;
213 }
214
215 if (p->set) {
216 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
217 if (ret)
218 return ret;
219 }
220
221 return 0;
222}
223
224static int igt_sync(void *arg)
225{
226 const struct __igt_sync pass[] = {
227 { "unset", 0, false, false },
228 { "new", 0, false, true },
229 { "0a", 0, true, true },
230 { "1a", 1, false, true },
231 { "1b", 1, true, true },
232 { "0b", 0, true, false },
233 { "2a", 2, false, true },
234 { "4", 4, false, true },
235 { "INT_MAX", INT_MAX, false, true },
236 { "INT_MAX-1", INT_MAX-1, true, false },
237 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
238 { "INT_MAX", INT_MAX, true, false },
239 { "UINT_MAX", UINT_MAX, false, true },
240 { "wrap", 0, false, true },
241 { "unwrap", UINT_MAX, true, false },
242 {},
243 }, *p;
244 struct intel_timeline tl;
245 int order, offset;
246 int ret = -ENODEV;
247
248 mock_timeline_init(&tl, 0);
249 for (p = pass; p->name; p++) {
250 for (order = 1; order < 64; order++) {
251 for (offset = -1; offset <= (order > 1); offset++) {
252 u64 ctx = BIT_ULL(order) + offset;
253
254 ret = __igt_sync(&tl, ctx, p, "1");
255 if (ret)
256 goto out;
257 }
258 }
259 }
260 mock_timeline_fini(&tl);
261
262 mock_timeline_init(&tl, 0);
263 for (order = 1; order < 64; order++) {
264 for (offset = -1; offset <= (order > 1); offset++) {
265 u64 ctx = BIT_ULL(order) + offset;
266
267 for (p = pass; p->name; p++) {
268 ret = __igt_sync(&tl, ctx, p, "2");
269 if (ret)
270 goto out;
271 }
272 }
273 }
274
275out:
276 mock_timeline_fini(&tl);
277 return ret;
278}
279
280static unsigned int random_engine(struct rnd_state *rnd)
281{
282 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
283}
284
285static int bench_sync(void *arg)
286{
287 struct rnd_state prng;
288 struct intel_timeline tl;
289 unsigned long end_time, count;
290 u64 prng32_1M;
291 ktime_t kt;
292 int order, last_order;
293
294 mock_timeline_init(&tl, 0);
295
296
297
298
299
300
301 prandom_seed_state(&prng, i915_selftest.random_seed);
302 count = 0;
303 kt = ktime_get();
304 end_time = jiffies + HZ/10;
305 do {
306 u32 x;
307
308
309 WRITE_ONCE(x, prandom_u32_state(&prng));
310
311 count++;
312 } while (!time_after(jiffies, end_time));
313 kt = ktime_sub(ktime_get(), kt);
314 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
315 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
316 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
317
318
319 prandom_seed_state(&prng, i915_selftest.random_seed);
320 count = 0;
321 kt = ktime_get();
322 end_time = jiffies + HZ/10;
323 do {
324 u64 id = i915_prandom_u64_state(&prng);
325
326 __intel_timeline_sync_set(&tl, id, 0);
327 count++;
328 } while (!time_after(jiffies, end_time));
329 kt = ktime_sub(ktime_get(), kt);
330 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
331 pr_info("%s: %lu random insertions, %lluns/insert\n",
332 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
333
334
335 prandom_seed_state(&prng, i915_selftest.random_seed);
336 end_time = count;
337 kt = ktime_get();
338 while (end_time--) {
339 u64 id = i915_prandom_u64_state(&prng);
340
341 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
342 mock_timeline_fini(&tl);
343 pr_err("Lookup of %llu failed\n", id);
344 return -EINVAL;
345 }
346 }
347 kt = ktime_sub(ktime_get(), kt);
348 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
349 pr_info("%s: %lu random lookups, %lluns/lookup\n",
350 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
351
352 mock_timeline_fini(&tl);
353 cond_resched();
354
355 mock_timeline_init(&tl, 0);
356
357
358 count = 0;
359 kt = ktime_get();
360 end_time = jiffies + HZ/10;
361 do {
362 __intel_timeline_sync_set(&tl, count++, 0);
363 } while (!time_after(jiffies, end_time));
364 kt = ktime_sub(ktime_get(), kt);
365 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
366 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
367
368
369 end_time = count;
370 kt = ktime_get();
371 while (end_time--) {
372 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
373 pr_err("Lookup of %lu failed\n", end_time);
374 mock_timeline_fini(&tl);
375 return -EINVAL;
376 }
377 }
378 kt = ktime_sub(ktime_get(), kt);
379 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
380 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
381
382 mock_timeline_fini(&tl);
383 cond_resched();
384
385 mock_timeline_init(&tl, 0);
386
387
388 prandom_seed_state(&prng, i915_selftest.random_seed);
389 count = 0;
390 kt = ktime_get();
391 end_time = jiffies + HZ/10;
392 do {
393 u32 id = random_engine(&prng);
394 u32 seqno = prandom_u32_state(&prng);
395
396 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
397 __intel_timeline_sync_set(&tl, id, seqno);
398
399 count++;
400 } while (!time_after(jiffies, end_time));
401 kt = ktime_sub(ktime_get(), kt);
402 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
403 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
404 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
405 mock_timeline_fini(&tl);
406 cond_resched();
407
408
409 for (last_order = 1, order = 1; order < 32;
410 ({ int tmp = last_order; last_order = order; order += tmp; })) {
411 unsigned int mask = BIT(order) - 1;
412
413 mock_timeline_init(&tl, 0);
414
415 count = 0;
416 kt = ktime_get();
417 end_time = jiffies + HZ/10;
418 do {
419
420
421
422
423 u64 id = (u64)(count & mask) << order;
424
425 __intel_timeline_sync_is_later(&tl, id, 0);
426 __intel_timeline_sync_set(&tl, id, 0);
427
428 count++;
429 } while (!time_after(jiffies, end_time));
430 kt = ktime_sub(ktime_get(), kt);
431 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
432 __func__, count, order,
433 (long long)div64_ul(ktime_to_ns(kt), count));
434 mock_timeline_fini(&tl);
435 cond_resched();
436 }
437
438 return 0;
439}
440
441int intel_timeline_mock_selftests(void)
442{
443 static const struct i915_subtest tests[] = {
444 SUBTEST(mock_hwsp_freelist),
445 SUBTEST(igt_sync),
446 SUBTEST(bench_sync),
447 };
448
449 return i915_subtests(tests, NULL);
450}
451
452static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
453{
454 u32 *cs;
455
456 cs = intel_ring_begin(rq, 4);
457 if (IS_ERR(cs))
458 return PTR_ERR(cs);
459
460 if (GRAPHICS_VER(rq->engine->i915) >= 8) {
461 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
462 *cs++ = addr;
463 *cs++ = 0;
464 *cs++ = value;
465 } else if (GRAPHICS_VER(rq->engine->i915) >= 4) {
466 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
467 *cs++ = 0;
468 *cs++ = addr;
469 *cs++ = value;
470 } else {
471 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
472 *cs++ = addr;
473 *cs++ = value;
474 *cs++ = MI_NOOP;
475 }
476
477 intel_ring_advance(rq, cs);
478
479 return 0;
480}
481
482static struct i915_request *
483checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
484{
485 struct i915_request *rq;
486 int err;
487
488 err = selftest_tl_pin(tl);
489 if (err) {
490 rq = ERR_PTR(err);
491 goto out;
492 }
493
494 if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
495 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
496 *tl->hwsp_seqno, tl->seqno);
497 intel_timeline_unpin(tl);
498 return ERR_PTR(-EINVAL);
499 }
500
501 rq = intel_engine_create_kernel_request(engine);
502 if (IS_ERR(rq))
503 goto out_unpin;
504
505 i915_request_get(rq);
506
507 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
508 i915_request_add(rq);
509 if (err) {
510 i915_request_put(rq);
511 rq = ERR_PTR(err);
512 }
513
514out_unpin:
515 intel_timeline_unpin(tl);
516out:
517 if (IS_ERR(rq))
518 pr_err("Failed to write to timeline!\n");
519 return rq;
520}
521
522static int live_hwsp_engine(void *arg)
523{
524#define NUM_TIMELINES 4096
525 struct intel_gt *gt = arg;
526 struct intel_timeline **timelines;
527 struct intel_engine_cs *engine;
528 enum intel_engine_id id;
529 unsigned long count, n;
530 int err = 0;
531
532
533
534
535
536
537 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
538 sizeof(*timelines),
539 GFP_KERNEL);
540 if (!timelines)
541 return -ENOMEM;
542
543 count = 0;
544 for_each_engine(engine, gt, id) {
545 if (!intel_engine_can_store_dword(engine))
546 continue;
547
548 intel_engine_pm_get(engine);
549
550 for (n = 0; n < NUM_TIMELINES; n++) {
551 struct intel_timeline *tl;
552 struct i915_request *rq;
553
554 tl = intel_timeline_create(gt);
555 if (IS_ERR(tl)) {
556 err = PTR_ERR(tl);
557 break;
558 }
559
560 rq = checked_tl_write(tl, engine, count);
561 if (IS_ERR(rq)) {
562 intel_timeline_put(tl);
563 err = PTR_ERR(rq);
564 break;
565 }
566
567 timelines[count++] = tl;
568 i915_request_put(rq);
569 }
570
571 intel_engine_pm_put(engine);
572 if (err)
573 break;
574 }
575
576 if (igt_flush_test(gt->i915))
577 err = -EIO;
578
579 for (n = 0; n < count; n++) {
580 struct intel_timeline *tl = timelines[n];
581
582 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
583 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
584 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
585 GEM_TRACE_DUMP();
586 err = -EINVAL;
587 }
588 intel_timeline_put(tl);
589 }
590
591 kvfree(timelines);
592 return err;
593#undef NUM_TIMELINES
594}
595
596static int live_hwsp_alternate(void *arg)
597{
598#define NUM_TIMELINES 4096
599 struct intel_gt *gt = arg;
600 struct intel_timeline **timelines;
601 struct intel_engine_cs *engine;
602 enum intel_engine_id id;
603 unsigned long count, n;
604 int err = 0;
605
606
607
608
609
610
611
612 timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
613 sizeof(*timelines),
614 GFP_KERNEL);
615 if (!timelines)
616 return -ENOMEM;
617
618 count = 0;
619 for (n = 0; n < NUM_TIMELINES; n++) {
620 for_each_engine(engine, gt, id) {
621 struct intel_timeline *tl;
622 struct i915_request *rq;
623
624 if (!intel_engine_can_store_dword(engine))
625 continue;
626
627 tl = intel_timeline_create(gt);
628 if (IS_ERR(tl)) {
629 err = PTR_ERR(tl);
630 goto out;
631 }
632
633 intel_engine_pm_get(engine);
634 rq = checked_tl_write(tl, engine, count);
635 intel_engine_pm_put(engine);
636 if (IS_ERR(rq)) {
637 intel_timeline_put(tl);
638 err = PTR_ERR(rq);
639 goto out;
640 }
641
642 timelines[count++] = tl;
643 i915_request_put(rq);
644 }
645 }
646
647out:
648 if (igt_flush_test(gt->i915))
649 err = -EIO;
650
651 for (n = 0; n < count; n++) {
652 struct intel_timeline *tl = timelines[n];
653
654 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
655 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
656 n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
657 GEM_TRACE_DUMP();
658 err = -EINVAL;
659 }
660 intel_timeline_put(tl);
661 }
662
663 kvfree(timelines);
664 return err;
665#undef NUM_TIMELINES
666}
667
668static int live_hwsp_wrap(void *arg)
669{
670 struct intel_gt *gt = arg;
671 struct intel_engine_cs *engine;
672 struct intel_timeline *tl;
673 enum intel_engine_id id;
674 int err = 0;
675
676
677
678
679
680
681 tl = intel_timeline_create(gt);
682 if (IS_ERR(tl))
683 return PTR_ERR(tl);
684
685 if (!tl->has_initial_breadcrumb)
686 goto out_free;
687
688 err = selftest_tl_pin(tl);
689 if (err)
690 goto out_free;
691
692 for_each_engine(engine, gt, id) {
693 const u32 *hwsp_seqno[2];
694 struct i915_request *rq;
695 u32 seqno[2];
696
697 if (!intel_engine_can_store_dword(engine))
698 continue;
699
700 rq = intel_engine_create_kernel_request(engine);
701 if (IS_ERR(rq)) {
702 err = PTR_ERR(rq);
703 goto out;
704 }
705
706 tl->seqno = -4u;
707
708 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
709 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
710 mutex_unlock(&tl->mutex);
711 if (err) {
712 i915_request_add(rq);
713 goto out;
714 }
715 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
716 seqno[0], tl->hwsp_offset);
717
718 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
719 if (err) {
720 i915_request_add(rq);
721 goto out;
722 }
723 hwsp_seqno[0] = tl->hwsp_seqno;
724
725 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
726 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
727 mutex_unlock(&tl->mutex);
728 if (err) {
729 i915_request_add(rq);
730 goto out;
731 }
732 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
733 seqno[1], tl->hwsp_offset);
734
735 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
736 if (err) {
737 i915_request_add(rq);
738 goto out;
739 }
740 hwsp_seqno[1] = tl->hwsp_seqno;
741
742
743 GEM_BUG_ON(seqno[1] >= seqno[0]);
744 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
745
746 i915_request_add(rq);
747
748 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
749 pr_err("Wait for timeline writes timed out!\n");
750 err = -EIO;
751 goto out;
752 }
753
754 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
755 READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
756 pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
757 *hwsp_seqno[0], *hwsp_seqno[1],
758 seqno[0], seqno[1]);
759 err = -EINVAL;
760 goto out;
761 }
762
763 intel_gt_retire_requests(gt);
764 }
765
766out:
767 if (igt_flush_test(gt->i915))
768 err = -EIO;
769
770 intel_timeline_unpin(tl);
771out_free:
772 intel_timeline_put(tl);
773 return err;
774}
775
776static int emit_read_hwsp(struct i915_request *rq,
777 u32 seqno, u32 hwsp,
778 u32 *addr)
779{
780 const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
781 u32 *cs;
782
783 cs = intel_ring_begin(rq, 12);
784 if (IS_ERR(cs))
785 return PTR_ERR(cs);
786
787 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
788 *cs++ = *addr;
789 *cs++ = 0;
790 *cs++ = seqno;
791 *addr += 4;
792
793 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
794 *cs++ = gpr;
795 *cs++ = hwsp;
796 *cs++ = 0;
797
798 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
799 *cs++ = gpr;
800 *cs++ = *addr;
801 *cs++ = 0;
802 *addr += 4;
803
804 intel_ring_advance(rq, cs);
805
806 return 0;
807}
808
809struct hwsp_watcher {
810 struct i915_vma *vma;
811 struct i915_request *rq;
812 u32 addr;
813 u32 *map;
814};
815
816static bool cmp_lt(u32 a, u32 b)
817{
818 return a < b;
819}
820
821static bool cmp_gte(u32 a, u32 b)
822{
823 return a >= b;
824}
825
826static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
827{
828 struct drm_i915_gem_object *obj;
829 struct i915_vma *vma;
830
831 obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
832 if (IS_ERR(obj))
833 return PTR_ERR(obj);
834
835 w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
836 if (IS_ERR(w->map)) {
837 i915_gem_object_put(obj);
838 return PTR_ERR(w->map);
839 }
840
841 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
842 if (IS_ERR(vma)) {
843 i915_gem_object_put(obj);
844 return PTR_ERR(vma);
845 }
846
847 w->vma = vma;
848 w->addr = i915_ggtt_offset(vma);
849 return 0;
850}
851
852static void switch_tl_lock(struct i915_request *from, struct i915_request *to)
853{
854
855
856 if (from) {
857 lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie);
858 mutex_unlock(&from->context->timeline->mutex);
859 }
860
861 if (to) {
862 mutex_lock(&to->context->timeline->mutex);
863 to->cookie = lockdep_pin_lock(&to->context->timeline->mutex);
864 }
865}
866
867static int create_watcher(struct hwsp_watcher *w,
868 struct intel_engine_cs *engine,
869 int ringsz)
870{
871 struct intel_context *ce;
872
873 ce = intel_context_create(engine);
874 if (IS_ERR(ce))
875 return PTR_ERR(ce);
876
877 ce->ring_size = ringsz;
878 w->rq = intel_context_create_request(ce);
879 intel_context_put(ce);
880 if (IS_ERR(w->rq))
881 return PTR_ERR(w->rq);
882
883 w->addr = i915_ggtt_offset(w->vma);
884
885 switch_tl_lock(w->rq, NULL);
886
887 return 0;
888}
889
890static int check_watcher(struct hwsp_watcher *w, const char *name,
891 bool (*op)(u32 hwsp, u32 seqno))
892{
893 struct i915_request *rq = fetch_and_zero(&w->rq);
894 u32 offset, end;
895 int err;
896
897 GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
898
899 i915_request_get(rq);
900 switch_tl_lock(NULL, rq);
901 i915_request_add(rq);
902
903 if (i915_request_wait(rq, 0, HZ) < 0) {
904 err = -ETIME;
905 goto out;
906 }
907
908 err = 0;
909 offset = 0;
910 end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
911 while (offset < end) {
912 if (!op(w->map[offset + 1], w->map[offset])) {
913 pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
914 name, w->map[offset + 1], w->map[offset]);
915 err = -EINVAL;
916 }
917
918 offset += 2;
919 }
920
921out:
922 i915_request_put(rq);
923 return err;
924}
925
926static void cleanup_watcher(struct hwsp_watcher *w)
927{
928 if (w->rq) {
929 switch_tl_lock(NULL, w->rq);
930
931 i915_request_add(w->rq);
932 }
933
934 i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
935}
936
937static bool retire_requests(struct intel_timeline *tl)
938{
939 struct i915_request *rq, *rn;
940
941 mutex_lock(&tl->mutex);
942 list_for_each_entry_safe(rq, rn, &tl->requests, link)
943 if (!i915_request_retire(rq))
944 break;
945 mutex_unlock(&tl->mutex);
946
947 return !i915_active_fence_isset(&tl->last_request);
948}
949
950static struct i915_request *wrap_timeline(struct i915_request *rq)
951{
952 struct intel_context *ce = rq->context;
953 struct intel_timeline *tl = ce->timeline;
954 u32 seqno = rq->fence.seqno;
955
956 while (tl->seqno >= seqno) {
957 i915_request_put(rq);
958 rq = intel_context_create_request(ce);
959 if (IS_ERR(rq))
960 return rq;
961
962 i915_request_get(rq);
963 i915_request_add(rq);
964 }
965
966 i915_request_put(rq);
967 rq = i915_request_create(ce);
968 if (IS_ERR(rq))
969 return rq;
970
971 i915_request_get(rq);
972 i915_request_add(rq);
973
974 return rq;
975}
976
977static int live_hwsp_read(void *arg)
978{
979 struct intel_gt *gt = arg;
980 struct hwsp_watcher watcher[2] = {};
981 struct intel_engine_cs *engine;
982 struct intel_timeline *tl;
983 enum intel_engine_id id;
984 int err = 0;
985 int i;
986
987
988
989
990
991
992
993
994
995 if (GRAPHICS_VER(gt->i915) < 8)
996 return 0;
997
998 tl = intel_timeline_create(gt);
999 if (IS_ERR(tl))
1000 return PTR_ERR(tl);
1001
1002 if (!tl->has_initial_breadcrumb)
1003 goto out_free;
1004
1005 for (i = 0; i < ARRAY_SIZE(watcher); i++) {
1006 err = setup_watcher(&watcher[i], gt);
1007 if (err)
1008 goto out;
1009 }
1010
1011 for_each_engine(engine, gt, id) {
1012 struct intel_context *ce;
1013 unsigned long count = 0;
1014 IGT_TIMEOUT(end_time);
1015
1016
1017 err = create_watcher(&watcher[1], engine, SZ_512K);
1018 if (err)
1019 goto out;
1020
1021 do {
1022 struct i915_sw_fence *submit;
1023 struct i915_request *rq;
1024 u32 hwsp, dummy;
1025
1026 submit = heap_fence_create(GFP_KERNEL);
1027 if (!submit) {
1028 err = -ENOMEM;
1029 goto out;
1030 }
1031
1032 err = create_watcher(&watcher[0], engine, SZ_4K);
1033 if (err)
1034 goto out;
1035
1036 ce = intel_context_create(engine);
1037 if (IS_ERR(ce)) {
1038 err = PTR_ERR(ce);
1039 goto out;
1040 }
1041
1042 ce->timeline = intel_timeline_get(tl);
1043
1044
1045 err = intel_context_pin(ce);
1046 if (err) {
1047 intel_context_put(ce);
1048 goto out;
1049 }
1050
1051
1052
1053
1054
1055 tl->seqno = -12u + 2 * (count & 3);
1056 __intel_timeline_get_seqno(tl, &dummy);
1057
1058 rq = i915_request_create(ce);
1059 if (IS_ERR(rq)) {
1060 err = PTR_ERR(rq);
1061 intel_context_unpin(ce);
1062 intel_context_put(ce);
1063 goto out;
1064 }
1065
1066 err = i915_sw_fence_await_dma_fence(&rq->submit,
1067 &watcher[0].rq->fence, 0,
1068 GFP_KERNEL);
1069 if (err < 0) {
1070 i915_request_add(rq);
1071 intel_context_unpin(ce);
1072 intel_context_put(ce);
1073 goto out;
1074 }
1075
1076 switch_tl_lock(rq, watcher[0].rq);
1077 err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
1078 if (err == 0)
1079 err = emit_read_hwsp(watcher[0].rq,
1080 rq->fence.seqno, hwsp,
1081 &watcher[0].addr);
1082 switch_tl_lock(watcher[0].rq, rq);
1083 if (err) {
1084 i915_request_add(rq);
1085 intel_context_unpin(ce);
1086 intel_context_put(ce);
1087 goto out;
1088 }
1089
1090 switch_tl_lock(rq, watcher[1].rq);
1091 err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
1092 if (err == 0)
1093 err = emit_read_hwsp(watcher[1].rq,
1094 rq->fence.seqno, hwsp,
1095 &watcher[1].addr);
1096 switch_tl_lock(watcher[1].rq, rq);
1097 if (err) {
1098 i915_request_add(rq);
1099 intel_context_unpin(ce);
1100 intel_context_put(ce);
1101 goto out;
1102 }
1103
1104 i915_request_get(rq);
1105 i915_request_add(rq);
1106
1107 rq = wrap_timeline(rq);
1108 intel_context_unpin(ce);
1109 intel_context_put(ce);
1110 if (IS_ERR(rq)) {
1111 err = PTR_ERR(rq);
1112 goto out;
1113 }
1114
1115 err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
1116 &rq->fence, 0,
1117 GFP_KERNEL);
1118 if (err < 0) {
1119 i915_request_put(rq);
1120 goto out;
1121 }
1122
1123 err = check_watcher(&watcher[0], "before", cmp_lt);
1124 i915_sw_fence_commit(submit);
1125 heap_fence_put(submit);
1126 if (err) {
1127 i915_request_put(rq);
1128 goto out;
1129 }
1130 count++;
1131
1132
1133 if (i915_request_wait(rq,
1134 I915_WAIT_INTERRUPTIBLE,
1135 HZ) < 0) {
1136 err = -ETIME;
1137 i915_request_put(rq);
1138 goto out;
1139 }
1140 retire_requests(tl);
1141 i915_request_put(rq);
1142
1143
1144 if (8 * watcher[1].rq->ring->emit >
1145 3 * watcher[1].rq->ring->size)
1146 break;
1147
1148 } while (!__igt_timeout(end_time, NULL) &&
1149 count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2);
1150
1151 pr_info("%s: simulated %lu wraps\n", engine->name, count);
1152 err = check_watcher(&watcher[1], "after", cmp_gte);
1153 if (err)
1154 goto out;
1155 }
1156
1157out:
1158 for (i = 0; i < ARRAY_SIZE(watcher); i++)
1159 cleanup_watcher(&watcher[i]);
1160
1161 if (igt_flush_test(gt->i915))
1162 err = -EIO;
1163
1164out_free:
1165 intel_timeline_put(tl);
1166 return err;
1167}
1168
1169static int live_hwsp_rollover_kernel(void *arg)
1170{
1171 struct intel_gt *gt = arg;
1172 struct intel_engine_cs *engine;
1173 enum intel_engine_id id;
1174 int err = 0;
1175
1176
1177
1178
1179
1180
1181 for_each_engine(engine, gt, id) {
1182 struct intel_context *ce = engine->kernel_context;
1183 struct intel_timeline *tl = ce->timeline;
1184 struct i915_request *rq[3] = {};
1185 int i;
1186
1187 st_engine_heartbeat_disable(engine);
1188 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
1189 err = -EIO;
1190 goto out;
1191 }
1192
1193 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
1194 tl->seqno = -2u;
1195 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1196
1197 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1198 struct i915_request *this;
1199
1200 this = i915_request_create(ce);
1201 if (IS_ERR(this)) {
1202 err = PTR_ERR(this);
1203 goto out;
1204 }
1205
1206 pr_debug("%s: create fence.seqnp:%d\n",
1207 engine->name,
1208 lower_32_bits(this->fence.seqno));
1209
1210 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1211
1212 rq[i] = i915_request_get(this);
1213 i915_request_add(this);
1214 }
1215
1216
1217 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1218
1219 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1220 pr_err("Wait for timeline wrap timed out!\n");
1221 err = -EIO;
1222 goto out;
1223 }
1224
1225 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1226 if (!i915_request_completed(rq[i])) {
1227 pr_err("Pre-wrap request not completed!\n");
1228 err = -EINVAL;
1229 goto out;
1230 }
1231 }
1232
1233out:
1234 for (i = 0; i < ARRAY_SIZE(rq); i++)
1235 i915_request_put(rq[i]);
1236 st_engine_heartbeat_enable(engine);
1237 if (err)
1238 break;
1239 }
1240
1241 if (igt_flush_test(gt->i915))
1242 err = -EIO;
1243
1244 return err;
1245}
1246
1247static int live_hwsp_rollover_user(void *arg)
1248{
1249 struct intel_gt *gt = arg;
1250 struct intel_engine_cs *engine;
1251 enum intel_engine_id id;
1252 int err = 0;
1253
1254
1255
1256
1257
1258
1259 for_each_engine(engine, gt, id) {
1260 struct i915_request *rq[3] = {};
1261 struct intel_timeline *tl;
1262 struct intel_context *ce;
1263 int i;
1264
1265 ce = intel_context_create(engine);
1266 if (IS_ERR(ce))
1267 return PTR_ERR(ce);
1268
1269 err = intel_context_alloc_state(ce);
1270 if (err)
1271 goto out;
1272
1273 tl = ce->timeline;
1274 if (!tl->has_initial_breadcrumb)
1275 goto out;
1276
1277 err = intel_context_pin(ce);
1278 if (err)
1279 goto out;
1280
1281 tl->seqno = -4u;
1282 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
1283
1284 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1285 struct i915_request *this;
1286
1287 this = intel_context_create_request(ce);
1288 if (IS_ERR(this)) {
1289 err = PTR_ERR(this);
1290 goto out_unpin;
1291 }
1292
1293 pr_debug("%s: create fence.seqnp:%d\n",
1294 engine->name,
1295 lower_32_bits(this->fence.seqno));
1296
1297 GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
1298
1299 rq[i] = i915_request_get(this);
1300 i915_request_add(this);
1301 }
1302
1303
1304 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
1305
1306 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1307 pr_err("Wait for timeline wrap timed out!\n");
1308 err = -EIO;
1309 goto out_unpin;
1310 }
1311
1312 for (i = 0; i < ARRAY_SIZE(rq); i++) {
1313 if (!i915_request_completed(rq[i])) {
1314 pr_err("Pre-wrap request not completed!\n");
1315 err = -EINVAL;
1316 goto out_unpin;
1317 }
1318 }
1319out_unpin:
1320 intel_context_unpin(ce);
1321out:
1322 for (i = 0; i < ARRAY_SIZE(rq); i++)
1323 i915_request_put(rq[i]);
1324 intel_context_put(ce);
1325 if (err)
1326 break;
1327 }
1328
1329 if (igt_flush_test(gt->i915))
1330 err = -EIO;
1331
1332 return err;
1333}
1334
1335static int live_hwsp_recycle(void *arg)
1336{
1337 struct intel_gt *gt = arg;
1338 struct intel_engine_cs *engine;
1339 enum intel_engine_id id;
1340 unsigned long count;
1341 int err = 0;
1342
1343
1344
1345
1346
1347
1348
1349 count = 0;
1350 for_each_engine(engine, gt, id) {
1351 IGT_TIMEOUT(end_time);
1352
1353 if (!intel_engine_can_store_dword(engine))
1354 continue;
1355
1356 intel_engine_pm_get(engine);
1357
1358 do {
1359 struct intel_timeline *tl;
1360 struct i915_request *rq;
1361
1362 tl = intel_timeline_create(gt);
1363 if (IS_ERR(tl)) {
1364 err = PTR_ERR(tl);
1365 break;
1366 }
1367
1368 rq = checked_tl_write(tl, engine, count);
1369 if (IS_ERR(rq)) {
1370 intel_timeline_put(tl);
1371 err = PTR_ERR(rq);
1372 break;
1373 }
1374
1375 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1376 pr_err("Wait for timeline writes timed out!\n");
1377 i915_request_put(rq);
1378 intel_timeline_put(tl);
1379 err = -EIO;
1380 break;
1381 }
1382
1383 if (READ_ONCE(*tl->hwsp_seqno) != count) {
1384 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
1385 count, tl->fence_context,
1386 tl->hwsp_offset, *tl->hwsp_seqno);
1387 GEM_TRACE_DUMP();
1388 err = -EINVAL;
1389 }
1390
1391 i915_request_put(rq);
1392 intel_timeline_put(tl);
1393 count++;
1394
1395 if (err)
1396 break;
1397 } while (!__igt_timeout(end_time, NULL));
1398
1399 intel_engine_pm_put(engine);
1400 if (err)
1401 break;
1402 }
1403
1404 return err;
1405}
1406
1407int intel_timeline_live_selftests(struct drm_i915_private *i915)
1408{
1409 static const struct i915_subtest tests[] = {
1410 SUBTEST(live_hwsp_recycle),
1411 SUBTEST(live_hwsp_engine),
1412 SUBTEST(live_hwsp_alternate),
1413 SUBTEST(live_hwsp_wrap),
1414 SUBTEST(live_hwsp_read),
1415 SUBTEST(live_hwsp_rollover_kernel),
1416 SUBTEST(live_hwsp_rollover_user),
1417 };
1418
1419 if (intel_gt_is_wedged(&i915->gt))
1420 return 0;
1421
1422 return intel_gt_live_subtests(tests, &i915->gt);
1423}
1424