1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/kthread.h>
26
27#include "gem/i915_gem_context.h"
28#include "intel_engine_pm.h"
29
30#include "i915_selftest.h"
31#include "selftests/i915_random.h"
32#include "selftests/igt_flush_test.h"
33#include "selftests/igt_reset.h"
34#include "selftests/igt_wedge_me.h"
35#include "selftests/igt_atomic.h"
36
37#include "selftests/mock_drm.h"
38
39#include "gem/selftests/mock_context.h"
40#include "gem/selftests/igt_gem_utils.h"
41
42#define IGT_IDLE_TIMEOUT 50
43
44struct hang {
45 struct drm_i915_private *i915;
46 struct drm_i915_gem_object *hws;
47 struct drm_i915_gem_object *obj;
48 struct i915_gem_context *ctx;
49 u32 *seqno;
50 u32 *batch;
51};
52
53static int hang_init(struct hang *h, struct drm_i915_private *i915)
54{
55 void *vaddr;
56 int err;
57
58 memset(h, 0, sizeof(*h));
59 h->i915 = i915;
60
61 h->ctx = kernel_context(i915);
62 if (IS_ERR(h->ctx))
63 return PTR_ERR(h->ctx);
64
65 GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx));
66
67 h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
68 if (IS_ERR(h->hws)) {
69 err = PTR_ERR(h->hws);
70 goto err_ctx;
71 }
72
73 h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
74 if (IS_ERR(h->obj)) {
75 err = PTR_ERR(h->obj);
76 goto err_hws;
77 }
78
79 i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC);
80 vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
81 if (IS_ERR(vaddr)) {
82 err = PTR_ERR(vaddr);
83 goto err_obj;
84 }
85 h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
86
87 vaddr = i915_gem_object_pin_map(h->obj,
88 i915_coherent_map_type(i915));
89 if (IS_ERR(vaddr)) {
90 err = PTR_ERR(vaddr);
91 goto err_unpin_hws;
92 }
93 h->batch = vaddr;
94
95 return 0;
96
97err_unpin_hws:
98 i915_gem_object_unpin_map(h->hws);
99err_obj:
100 i915_gem_object_put(h->obj);
101err_hws:
102 i915_gem_object_put(h->hws);
103err_ctx:
104 kernel_context_close(h->ctx);
105 return err;
106}
107
108static u64 hws_address(const struct i915_vma *hws,
109 const struct i915_request *rq)
110{
111 return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
112}
113
114static int move_to_active(struct i915_vma *vma,
115 struct i915_request *rq,
116 unsigned int flags)
117{
118 int err;
119
120 i915_vma_lock(vma);
121 err = i915_vma_move_to_active(vma, rq, flags);
122 i915_vma_unlock(vma);
123
124 return err;
125}
126
127static struct i915_request *
128hang_create_request(struct hang *h, struct intel_engine_cs *engine)
129{
130 struct drm_i915_private *i915 = h->i915;
131 struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm;
132 struct i915_request *rq = NULL;
133 struct i915_vma *hws, *vma;
134 unsigned int flags;
135 u32 *batch;
136 int err;
137
138 if (i915_gem_object_is_active(h->obj)) {
139 struct drm_i915_gem_object *obj;
140 void *vaddr;
141
142 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
143 if (IS_ERR(obj))
144 return ERR_CAST(obj);
145
146 vaddr = i915_gem_object_pin_map(obj,
147 i915_coherent_map_type(h->i915));
148 if (IS_ERR(vaddr)) {
149 i915_gem_object_put(obj);
150 return ERR_CAST(vaddr);
151 }
152
153 i915_gem_object_unpin_map(h->obj);
154 i915_gem_object_put(h->obj);
155
156 h->obj = obj;
157 h->batch = vaddr;
158 }
159
160 vma = i915_vma_instance(h->obj, vm, NULL);
161 if (IS_ERR(vma))
162 return ERR_CAST(vma);
163
164 hws = i915_vma_instance(h->hws, vm, NULL);
165 if (IS_ERR(hws))
166 return ERR_CAST(hws);
167
168 err = i915_vma_pin(vma, 0, 0, PIN_USER);
169 if (err)
170 return ERR_PTR(err);
171
172 err = i915_vma_pin(hws, 0, 0, PIN_USER);
173 if (err)
174 goto unpin_vma;
175
176 rq = igt_request_alloc(h->ctx, engine);
177 if (IS_ERR(rq)) {
178 err = PTR_ERR(rq);
179 goto unpin_hws;
180 }
181
182 err = move_to_active(vma, rq, 0);
183 if (err)
184 goto cancel_rq;
185
186 err = move_to_active(hws, rq, 0);
187 if (err)
188 goto cancel_rq;
189
190 batch = h->batch;
191 if (INTEL_GEN(i915) >= 8) {
192 *batch++ = MI_STORE_DWORD_IMM_GEN4;
193 *batch++ = lower_32_bits(hws_address(hws, rq));
194 *batch++ = upper_32_bits(hws_address(hws, rq));
195 *batch++ = rq->fence.seqno;
196 *batch++ = MI_ARB_CHECK;
197
198 memset(batch, 0, 1024);
199 batch += 1024 / sizeof(*batch);
200
201 *batch++ = MI_ARB_CHECK;
202 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
203 *batch++ = lower_32_bits(vma->node.start);
204 *batch++ = upper_32_bits(vma->node.start);
205 } else if (INTEL_GEN(i915) >= 6) {
206 *batch++ = MI_STORE_DWORD_IMM_GEN4;
207 *batch++ = 0;
208 *batch++ = lower_32_bits(hws_address(hws, rq));
209 *batch++ = rq->fence.seqno;
210 *batch++ = MI_ARB_CHECK;
211
212 memset(batch, 0, 1024);
213 batch += 1024 / sizeof(*batch);
214
215 *batch++ = MI_ARB_CHECK;
216 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
217 *batch++ = lower_32_bits(vma->node.start);
218 } else if (INTEL_GEN(i915) >= 4) {
219 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
220 *batch++ = 0;
221 *batch++ = lower_32_bits(hws_address(hws, rq));
222 *batch++ = rq->fence.seqno;
223 *batch++ = MI_ARB_CHECK;
224
225 memset(batch, 0, 1024);
226 batch += 1024 / sizeof(*batch);
227
228 *batch++ = MI_ARB_CHECK;
229 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
230 *batch++ = lower_32_bits(vma->node.start);
231 } else {
232 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
233 *batch++ = lower_32_bits(hws_address(hws, rq));
234 *batch++ = rq->fence.seqno;
235 *batch++ = MI_ARB_CHECK;
236
237 memset(batch, 0, 1024);
238 batch += 1024 / sizeof(*batch);
239
240 *batch++ = MI_ARB_CHECK;
241 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
242 *batch++ = lower_32_bits(vma->node.start);
243 }
244 *batch++ = MI_BATCH_BUFFER_END;
245 i915_gem_chipset_flush(h->i915);
246
247 if (rq->engine->emit_init_breadcrumb) {
248 err = rq->engine->emit_init_breadcrumb(rq);
249 if (err)
250 goto cancel_rq;
251 }
252
253 flags = 0;
254 if (INTEL_GEN(vm->i915) <= 5)
255 flags |= I915_DISPATCH_SECURE;
256
257 err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
258
259cancel_rq:
260 if (err) {
261 i915_request_skip(rq, err);
262 i915_request_add(rq);
263 }
264unpin_hws:
265 i915_vma_unpin(hws);
266unpin_vma:
267 i915_vma_unpin(vma);
268 return err ? ERR_PTR(err) : rq;
269}
270
271static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
272{
273 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
274}
275
276static void hang_fini(struct hang *h)
277{
278 *h->batch = MI_BATCH_BUFFER_END;
279 i915_gem_chipset_flush(h->i915);
280
281 i915_gem_object_unpin_map(h->obj);
282 i915_gem_object_put(h->obj);
283
284 i915_gem_object_unpin_map(h->hws);
285 i915_gem_object_put(h->hws);
286
287 kernel_context_close(h->ctx);
288
289 igt_flush_test(h->i915, I915_WAIT_LOCKED);
290}
291
292static bool wait_until_running(struct hang *h, struct i915_request *rq)
293{
294 return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
295 rq->fence.seqno),
296 10) &&
297 wait_for(i915_seqno_passed(hws_seqno(h, rq),
298 rq->fence.seqno),
299 1000));
300}
301
302static int igt_hang_sanitycheck(void *arg)
303{
304 struct drm_i915_private *i915 = arg;
305 struct i915_request *rq;
306 struct intel_engine_cs *engine;
307 enum intel_engine_id id;
308 struct hang h;
309 int err;
310
311
312
313 mutex_lock(&i915->drm.struct_mutex);
314 err = hang_init(&h, i915);
315 if (err)
316 goto unlock;
317
318 for_each_engine(engine, i915, id) {
319 struct igt_wedge_me w;
320 long timeout;
321
322 if (!intel_engine_can_store_dword(engine))
323 continue;
324
325 rq = hang_create_request(&h, engine);
326 if (IS_ERR(rq)) {
327 err = PTR_ERR(rq);
328 pr_err("Failed to create request for %s, err=%d\n",
329 engine->name, err);
330 goto fini;
331 }
332
333 i915_request_get(rq);
334
335 *h.batch = MI_BATCH_BUFFER_END;
336 i915_gem_chipset_flush(i915);
337
338 i915_request_add(rq);
339
340 timeout = 0;
341 igt_wedge_on_timeout(&w, i915, HZ / 10 )
342 timeout = i915_request_wait(rq, 0,
343 MAX_SCHEDULE_TIMEOUT);
344 if (i915_reset_failed(i915))
345 timeout = -EIO;
346
347 i915_request_put(rq);
348
349 if (timeout < 0) {
350 err = timeout;
351 pr_err("Wait for request failed on %s, err=%d\n",
352 engine->name, err);
353 goto fini;
354 }
355 }
356
357fini:
358 hang_fini(&h);
359unlock:
360 mutex_unlock(&i915->drm.struct_mutex);
361 return err;
362}
363
364static bool wait_for_idle(struct intel_engine_cs *engine)
365{
366 return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
367}
368
369static int igt_reset_nop(void *arg)
370{
371 struct drm_i915_private *i915 = arg;
372 struct intel_engine_cs *engine;
373 struct i915_gem_context *ctx;
374 unsigned int reset_count, count;
375 enum intel_engine_id id;
376 intel_wakeref_t wakeref;
377 struct drm_file *file;
378 IGT_TIMEOUT(end_time);
379 int err = 0;
380
381
382
383 file = mock_file(i915);
384 if (IS_ERR(file))
385 return PTR_ERR(file);
386
387 mutex_lock(&i915->drm.struct_mutex);
388 ctx = live_context(i915, file);
389 mutex_unlock(&i915->drm.struct_mutex);
390 if (IS_ERR(ctx)) {
391 err = PTR_ERR(ctx);
392 goto out;
393 }
394
395 i915_gem_context_clear_bannable(ctx);
396 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
397 reset_count = i915_reset_count(&i915->gpu_error);
398 count = 0;
399 do {
400 mutex_lock(&i915->drm.struct_mutex);
401 for_each_engine(engine, i915, id) {
402 int i;
403
404 for (i = 0; i < 16; i++) {
405 struct i915_request *rq;
406
407 rq = igt_request_alloc(ctx, engine);
408 if (IS_ERR(rq)) {
409 err = PTR_ERR(rq);
410 break;
411 }
412
413 i915_request_add(rq);
414 }
415 }
416 mutex_unlock(&i915->drm.struct_mutex);
417
418 igt_global_reset_lock(i915);
419 i915_reset(i915, ALL_ENGINES, NULL);
420 igt_global_reset_unlock(i915);
421 if (i915_reset_failed(i915)) {
422 err = -EIO;
423 break;
424 }
425
426 if (i915_reset_count(&i915->gpu_error) !=
427 reset_count + ++count) {
428 pr_err("Full GPU reset not recorded!\n");
429 err = -EINVAL;
430 break;
431 }
432
433 err = igt_flush_test(i915, 0);
434 if (err)
435 break;
436 } while (time_before(jiffies, end_time));
437 pr_info("%s: %d resets\n", __func__, count);
438
439 mutex_lock(&i915->drm.struct_mutex);
440 err = igt_flush_test(i915, I915_WAIT_LOCKED);
441 mutex_unlock(&i915->drm.struct_mutex);
442
443 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
444
445out:
446 mock_file_free(i915, file);
447 if (i915_reset_failed(i915))
448 err = -EIO;
449 return err;
450}
451
452static int igt_reset_nop_engine(void *arg)
453{
454 struct drm_i915_private *i915 = arg;
455 struct intel_engine_cs *engine;
456 struct i915_gem_context *ctx;
457 enum intel_engine_id id;
458 intel_wakeref_t wakeref;
459 struct drm_file *file;
460 int err = 0;
461
462
463
464 if (!intel_has_reset_engine(i915))
465 return 0;
466
467 file = mock_file(i915);
468 if (IS_ERR(file))
469 return PTR_ERR(file);
470
471 mutex_lock(&i915->drm.struct_mutex);
472 ctx = live_context(i915, file);
473 mutex_unlock(&i915->drm.struct_mutex);
474 if (IS_ERR(ctx)) {
475 err = PTR_ERR(ctx);
476 goto out;
477 }
478
479 i915_gem_context_clear_bannable(ctx);
480 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
481 for_each_engine(engine, i915, id) {
482 unsigned int reset_count, reset_engine_count;
483 unsigned int count;
484 IGT_TIMEOUT(end_time);
485
486 reset_count = i915_reset_count(&i915->gpu_error);
487 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
488 engine);
489 count = 0;
490
491 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
492 do {
493 int i;
494
495 if (!wait_for_idle(engine)) {
496 pr_err("%s failed to idle before reset\n",
497 engine->name);
498 err = -EIO;
499 break;
500 }
501
502 mutex_lock(&i915->drm.struct_mutex);
503 for (i = 0; i < 16; i++) {
504 struct i915_request *rq;
505
506 rq = igt_request_alloc(ctx, engine);
507 if (IS_ERR(rq)) {
508 err = PTR_ERR(rq);
509 break;
510 }
511
512 i915_request_add(rq);
513 }
514 mutex_unlock(&i915->drm.struct_mutex);
515
516 err = i915_reset_engine(engine, NULL);
517 if (err) {
518 pr_err("i915_reset_engine failed\n");
519 break;
520 }
521
522 if (i915_reset_count(&i915->gpu_error) != reset_count) {
523 pr_err("Full GPU reset recorded! (engine reset expected)\n");
524 err = -EINVAL;
525 break;
526 }
527
528 if (i915_reset_engine_count(&i915->gpu_error, engine) !=
529 reset_engine_count + ++count) {
530 pr_err("%s engine reset not recorded!\n",
531 engine->name);
532 err = -EINVAL;
533 break;
534 }
535 } while (time_before(jiffies, end_time));
536 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
537 pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
538
539 if (err)
540 break;
541
542 err = igt_flush_test(i915, 0);
543 if (err)
544 break;
545 }
546
547 mutex_lock(&i915->drm.struct_mutex);
548 err = igt_flush_test(i915, I915_WAIT_LOCKED);
549 mutex_unlock(&i915->drm.struct_mutex);
550
551 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
552out:
553 mock_file_free(i915, file);
554 if (i915_reset_failed(i915))
555 err = -EIO;
556 return err;
557}
558
559static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
560{
561 struct intel_engine_cs *engine;
562 enum intel_engine_id id;
563 struct hang h;
564 int err = 0;
565
566
567
568 if (!intel_has_reset_engine(i915))
569 return 0;
570
571 if (active) {
572 mutex_lock(&i915->drm.struct_mutex);
573 err = hang_init(&h, i915);
574 mutex_unlock(&i915->drm.struct_mutex);
575 if (err)
576 return err;
577 }
578
579 for_each_engine(engine, i915, id) {
580 unsigned int reset_count, reset_engine_count;
581 IGT_TIMEOUT(end_time);
582
583 if (active && !intel_engine_can_store_dword(engine))
584 continue;
585
586 if (!wait_for_idle(engine)) {
587 pr_err("%s failed to idle before reset\n",
588 engine->name);
589 err = -EIO;
590 break;
591 }
592
593 reset_count = i915_reset_count(&i915->gpu_error);
594 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
595 engine);
596
597 intel_engine_pm_get(engine);
598 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
599 do {
600 if (active) {
601 struct i915_request *rq;
602
603 mutex_lock(&i915->drm.struct_mutex);
604 rq = hang_create_request(&h, engine);
605 if (IS_ERR(rq)) {
606 err = PTR_ERR(rq);
607 mutex_unlock(&i915->drm.struct_mutex);
608 break;
609 }
610
611 i915_request_get(rq);
612 i915_request_add(rq);
613 mutex_unlock(&i915->drm.struct_mutex);
614
615 if (!wait_until_running(&h, rq)) {
616 struct drm_printer p = drm_info_printer(i915->drm.dev);
617
618 pr_err("%s: Failed to start request %llx, at %x\n",
619 __func__, rq->fence.seqno, hws_seqno(&h, rq));
620 intel_engine_dump(engine, &p,
621 "%s\n", engine->name);
622
623 i915_request_put(rq);
624 err = -EIO;
625 break;
626 }
627
628 i915_request_put(rq);
629 }
630
631 err = i915_reset_engine(engine, NULL);
632 if (err) {
633 pr_err("i915_reset_engine failed\n");
634 break;
635 }
636
637 if (i915_reset_count(&i915->gpu_error) != reset_count) {
638 pr_err("Full GPU reset recorded! (engine reset expected)\n");
639 err = -EINVAL;
640 break;
641 }
642
643 if (i915_reset_engine_count(&i915->gpu_error, engine) !=
644 ++reset_engine_count) {
645 pr_err("%s engine reset not recorded!\n",
646 engine->name);
647 err = -EINVAL;
648 break;
649 }
650 } while (time_before(jiffies, end_time));
651 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
652 intel_engine_pm_put(engine);
653
654 if (err)
655 break;
656
657 err = igt_flush_test(i915, 0);
658 if (err)
659 break;
660 }
661
662 if (i915_reset_failed(i915))
663 err = -EIO;
664
665 if (active) {
666 mutex_lock(&i915->drm.struct_mutex);
667 hang_fini(&h);
668 mutex_unlock(&i915->drm.struct_mutex);
669 }
670
671 return err;
672}
673
674static int igt_reset_idle_engine(void *arg)
675{
676 return __igt_reset_engine(arg, false);
677}
678
679static int igt_reset_active_engine(void *arg)
680{
681 return __igt_reset_engine(arg, true);
682}
683
684struct active_engine {
685 struct task_struct *task;
686 struct intel_engine_cs *engine;
687 unsigned long resets;
688 unsigned int flags;
689};
690
691#define TEST_ACTIVE BIT(0)
692#define TEST_OTHERS BIT(1)
693#define TEST_SELF BIT(2)
694#define TEST_PRIORITY BIT(3)
695
696static int active_request_put(struct i915_request *rq)
697{
698 int err = 0;
699
700 if (!rq)
701 return 0;
702
703 if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
704 GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n",
705 rq->engine->name,
706 rq->fence.context,
707 rq->fence.seqno);
708 GEM_TRACE_DUMP();
709
710 i915_gem_set_wedged(rq->i915);
711 err = -EIO;
712 }
713
714 i915_request_put(rq);
715
716 return err;
717}
718
719static int active_engine(void *data)
720{
721 I915_RND_STATE(prng);
722 struct active_engine *arg = data;
723 struct intel_engine_cs *engine = arg->engine;
724 struct i915_request *rq[8] = {};
725 struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
726 struct drm_file *file;
727 unsigned long count = 0;
728 int err = 0;
729
730 file = mock_file(engine->i915);
731 if (IS_ERR(file))
732 return PTR_ERR(file);
733
734 for (count = 0; count < ARRAY_SIZE(ctx); count++) {
735 mutex_lock(&engine->i915->drm.struct_mutex);
736 ctx[count] = live_context(engine->i915, file);
737 mutex_unlock(&engine->i915->drm.struct_mutex);
738 if (IS_ERR(ctx[count])) {
739 err = PTR_ERR(ctx[count]);
740 while (--count)
741 i915_gem_context_put(ctx[count]);
742 goto err_file;
743 }
744 }
745
746 while (!kthread_should_stop()) {
747 unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
748 struct i915_request *old = rq[idx];
749 struct i915_request *new;
750
751 mutex_lock(&engine->i915->drm.struct_mutex);
752 new = igt_request_alloc(ctx[idx], engine);
753 if (IS_ERR(new)) {
754 mutex_unlock(&engine->i915->drm.struct_mutex);
755 err = PTR_ERR(new);
756 break;
757 }
758
759 if (arg->flags & TEST_PRIORITY)
760 ctx[idx]->sched.priority =
761 i915_prandom_u32_max_state(512, &prng);
762
763 rq[idx] = i915_request_get(new);
764 i915_request_add(new);
765 mutex_unlock(&engine->i915->drm.struct_mutex);
766
767 err = active_request_put(old);
768 if (err)
769 break;
770
771 cond_resched();
772 }
773
774 for (count = 0; count < ARRAY_SIZE(rq); count++) {
775 int err__ = active_request_put(rq[count]);
776
777
778 if (!err)
779 err = err__;
780 }
781
782err_file:
783 mock_file_free(engine->i915, file);
784 return err;
785}
786
787static int __igt_reset_engines(struct drm_i915_private *i915,
788 const char *test_name,
789 unsigned int flags)
790{
791 struct intel_engine_cs *engine, *other;
792 enum intel_engine_id id, tmp;
793 struct hang h;
794 int err = 0;
795
796
797
798
799
800 if (!intel_has_reset_engine(i915))
801 return 0;
802
803 if (flags & TEST_ACTIVE) {
804 mutex_lock(&i915->drm.struct_mutex);
805 err = hang_init(&h, i915);
806 mutex_unlock(&i915->drm.struct_mutex);
807 if (err)
808 return err;
809
810 if (flags & TEST_PRIORITY)
811 h.ctx->sched.priority = 1024;
812 }
813
814 for_each_engine(engine, i915, id) {
815 struct active_engine threads[I915_NUM_ENGINES] = {};
816 unsigned long global = i915_reset_count(&i915->gpu_error);
817 unsigned long count = 0, reported;
818 IGT_TIMEOUT(end_time);
819
820 if (flags & TEST_ACTIVE &&
821 !intel_engine_can_store_dword(engine))
822 continue;
823
824 if (!wait_for_idle(engine)) {
825 pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
826 engine->name, test_name);
827 err = -EIO;
828 break;
829 }
830
831 memset(threads, 0, sizeof(threads));
832 for_each_engine(other, i915, tmp) {
833 struct task_struct *tsk;
834
835 threads[tmp].resets =
836 i915_reset_engine_count(&i915->gpu_error,
837 other);
838
839 if (!(flags & TEST_OTHERS))
840 continue;
841
842 if (other == engine && !(flags & TEST_SELF))
843 continue;
844
845 threads[tmp].engine = other;
846 threads[tmp].flags = flags;
847
848 tsk = kthread_run(active_engine, &threads[tmp],
849 "igt/%s", other->name);
850 if (IS_ERR(tsk)) {
851 err = PTR_ERR(tsk);
852 goto unwind;
853 }
854
855 threads[tmp].task = tsk;
856 get_task_struct(tsk);
857 }
858
859 intel_engine_pm_get(engine);
860 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
861 do {
862 struct i915_request *rq = NULL;
863
864 if (flags & TEST_ACTIVE) {
865 mutex_lock(&i915->drm.struct_mutex);
866 rq = hang_create_request(&h, engine);
867 if (IS_ERR(rq)) {
868 err = PTR_ERR(rq);
869 mutex_unlock(&i915->drm.struct_mutex);
870 break;
871 }
872
873 i915_request_get(rq);
874 i915_request_add(rq);
875 mutex_unlock(&i915->drm.struct_mutex);
876
877 if (!wait_until_running(&h, rq)) {
878 struct drm_printer p = drm_info_printer(i915->drm.dev);
879
880 pr_err("%s: Failed to start request %llx, at %x\n",
881 __func__, rq->fence.seqno, hws_seqno(&h, rq));
882 intel_engine_dump(engine, &p,
883 "%s\n", engine->name);
884
885 i915_request_put(rq);
886 err = -EIO;
887 break;
888 }
889 }
890
891 err = i915_reset_engine(engine, NULL);
892 if (err) {
893 pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
894 engine->name, test_name, err);
895 break;
896 }
897
898 count++;
899
900 if (rq) {
901 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
902 struct drm_printer p =
903 drm_info_printer(i915->drm.dev);
904
905 pr_err("i915_reset_engine(%s:%s):"
906 " failed to complete request after reset\n",
907 engine->name, test_name);
908 intel_engine_dump(engine, &p,
909 "%s\n", engine->name);
910 i915_request_put(rq);
911
912 GEM_TRACE_DUMP();
913 i915_gem_set_wedged(i915);
914 err = -EIO;
915 break;
916 }
917
918 i915_request_put(rq);
919 }
920
921 if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
922 struct drm_printer p =
923 drm_info_printer(i915->drm.dev);
924
925 pr_err("i915_reset_engine(%s:%s):"
926 " failed to idle after reset\n",
927 engine->name, test_name);
928 intel_engine_dump(engine, &p,
929 "%s\n", engine->name);
930
931 err = -EIO;
932 break;
933 }
934 } while (time_before(jiffies, end_time));
935 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
936 intel_engine_pm_put(engine);
937 pr_info("i915_reset_engine(%s:%s): %lu resets\n",
938 engine->name, test_name, count);
939
940 reported = i915_reset_engine_count(&i915->gpu_error, engine);
941 reported -= threads[engine->id].resets;
942 if (reported != count) {
943 pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
944 engine->name, test_name, count, reported);
945 if (!err)
946 err = -EINVAL;
947 }
948
949unwind:
950 for_each_engine(other, i915, tmp) {
951 int ret;
952
953 if (!threads[tmp].task)
954 continue;
955
956 ret = kthread_stop(threads[tmp].task);
957 if (ret) {
958 pr_err("kthread for other engine %s failed, err=%d\n",
959 other->name, ret);
960 if (!err)
961 err = ret;
962 }
963 put_task_struct(threads[tmp].task);
964
965 if (other != engine &&
966 threads[tmp].resets !=
967 i915_reset_engine_count(&i915->gpu_error, other)) {
968 pr_err("Innocent engine %s was reset (count=%ld)\n",
969 other->name,
970 i915_reset_engine_count(&i915->gpu_error,
971 other) -
972 threads[tmp].resets);
973 if (!err)
974 err = -EINVAL;
975 }
976 }
977
978 if (global != i915_reset_count(&i915->gpu_error)) {
979 pr_err("Global reset (count=%ld)!\n",
980 i915_reset_count(&i915->gpu_error) - global);
981 if (!err)
982 err = -EINVAL;
983 }
984
985 if (err)
986 break;
987
988 mutex_lock(&i915->drm.struct_mutex);
989 err = igt_flush_test(i915, I915_WAIT_LOCKED);
990 mutex_unlock(&i915->drm.struct_mutex);
991 if (err)
992 break;
993 }
994
995 if (i915_reset_failed(i915))
996 err = -EIO;
997
998 if (flags & TEST_ACTIVE) {
999 mutex_lock(&i915->drm.struct_mutex);
1000 hang_fini(&h);
1001 mutex_unlock(&i915->drm.struct_mutex);
1002 }
1003
1004 return err;
1005}
1006
1007static int igt_reset_engines(void *arg)
1008{
1009 static const struct {
1010 const char *name;
1011 unsigned int flags;
1012 } phases[] = {
1013 { "idle", 0 },
1014 { "active", TEST_ACTIVE },
1015 { "others-idle", TEST_OTHERS },
1016 { "others-active", TEST_OTHERS | TEST_ACTIVE },
1017 {
1018 "others-priority",
1019 TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
1020 },
1021 {
1022 "self-priority",
1023 TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
1024 },
1025 { }
1026 };
1027 struct drm_i915_private *i915 = arg;
1028 typeof(*phases) *p;
1029 int err;
1030
1031 for (p = phases; p->name; p++) {
1032 if (p->flags & TEST_PRIORITY) {
1033 if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
1034 continue;
1035 }
1036
1037 err = __igt_reset_engines(arg, p->name, p->flags);
1038 if (err)
1039 return err;
1040 }
1041
1042 return 0;
1043}
1044
1045static u32 fake_hangcheck(struct drm_i915_private *i915,
1046 intel_engine_mask_t mask)
1047{
1048 u32 count = i915_reset_count(&i915->gpu_error);
1049
1050 i915_reset(i915, mask, NULL);
1051
1052 return count;
1053}
1054
1055static int igt_reset_wait(void *arg)
1056{
1057 struct drm_i915_private *i915 = arg;
1058 struct i915_request *rq;
1059 unsigned int reset_count;
1060 struct hang h;
1061 long timeout;
1062 int err;
1063
1064 if (!intel_engine_can_store_dword(i915->engine[RCS0]))
1065 return 0;
1066
1067
1068
1069 igt_global_reset_lock(i915);
1070
1071 mutex_lock(&i915->drm.struct_mutex);
1072 err = hang_init(&h, i915);
1073 if (err)
1074 goto unlock;
1075
1076 rq = hang_create_request(&h, i915->engine[RCS0]);
1077 if (IS_ERR(rq)) {
1078 err = PTR_ERR(rq);
1079 goto fini;
1080 }
1081
1082 i915_request_get(rq);
1083 i915_request_add(rq);
1084
1085 if (!wait_until_running(&h, rq)) {
1086 struct drm_printer p = drm_info_printer(i915->drm.dev);
1087
1088 pr_err("%s: Failed to start request %llx, at %x\n",
1089 __func__, rq->fence.seqno, hws_seqno(&h, rq));
1090 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1091
1092 i915_gem_set_wedged(i915);
1093
1094 err = -EIO;
1095 goto out_rq;
1096 }
1097
1098 reset_count = fake_hangcheck(i915, ALL_ENGINES);
1099
1100 timeout = i915_request_wait(rq, 0, 10);
1101 if (timeout < 0) {
1102 pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
1103 timeout);
1104 err = timeout;
1105 goto out_rq;
1106 }
1107
1108 if (i915_reset_count(&i915->gpu_error) == reset_count) {
1109 pr_err("No GPU reset recorded!\n");
1110 err = -EINVAL;
1111 goto out_rq;
1112 }
1113
1114out_rq:
1115 i915_request_put(rq);
1116fini:
1117 hang_fini(&h);
1118unlock:
1119 mutex_unlock(&i915->drm.struct_mutex);
1120 igt_global_reset_unlock(i915);
1121
1122 if (i915_reset_failed(i915))
1123 return -EIO;
1124
1125 return err;
1126}
1127
1128struct evict_vma {
1129 struct completion completion;
1130 struct i915_vma *vma;
1131};
1132
1133static int evict_vma(void *data)
1134{
1135 struct evict_vma *arg = data;
1136 struct i915_address_space *vm = arg->vma->vm;
1137 struct drm_i915_private *i915 = vm->i915;
1138 struct drm_mm_node evict = arg->vma->node;
1139 int err;
1140
1141 complete(&arg->completion);
1142
1143 mutex_lock(&i915->drm.struct_mutex);
1144 err = i915_gem_evict_for_node(vm, &evict, 0);
1145 mutex_unlock(&i915->drm.struct_mutex);
1146
1147 return err;
1148}
1149
1150static int evict_fence(void *data)
1151{
1152 struct evict_vma *arg = data;
1153 struct drm_i915_private *i915 = arg->vma->vm->i915;
1154 int err;
1155
1156 complete(&arg->completion);
1157
1158 mutex_lock(&i915->drm.struct_mutex);
1159
1160
1161 err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
1162 if (err) {
1163 pr_err("Invalid Y-tiling settings; err:%d\n", err);
1164 goto out_unlock;
1165 }
1166
1167 err = i915_vma_pin_fence(arg->vma);
1168 if (err) {
1169 pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
1170 goto out_unlock;
1171 }
1172
1173 i915_vma_unpin_fence(arg->vma);
1174
1175out_unlock:
1176 mutex_unlock(&i915->drm.struct_mutex);
1177
1178 return err;
1179}
1180
1181static int __igt_reset_evict_vma(struct drm_i915_private *i915,
1182 struct i915_address_space *vm,
1183 int (*fn)(void *),
1184 unsigned int flags)
1185{
1186 struct drm_i915_gem_object *obj;
1187 struct task_struct *tsk = NULL;
1188 struct i915_request *rq;
1189 struct evict_vma arg;
1190 struct hang h;
1191 int err;
1192
1193 if (!intel_engine_can_store_dword(i915->engine[RCS0]))
1194 return 0;
1195
1196
1197
1198 mutex_lock(&i915->drm.struct_mutex);
1199 err = hang_init(&h, i915);
1200 if (err)
1201 goto unlock;
1202
1203 obj = i915_gem_object_create_internal(i915, SZ_1M);
1204 if (IS_ERR(obj)) {
1205 err = PTR_ERR(obj);
1206 goto fini;
1207 }
1208
1209 if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1210 err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512);
1211 if (err) {
1212 pr_err("Invalid X-tiling settings; err:%d\n", err);
1213 goto out_obj;
1214 }
1215 }
1216
1217 arg.vma = i915_vma_instance(obj, vm, NULL);
1218 if (IS_ERR(arg.vma)) {
1219 err = PTR_ERR(arg.vma);
1220 goto out_obj;
1221 }
1222
1223 rq = hang_create_request(&h, i915->engine[RCS0]);
1224 if (IS_ERR(rq)) {
1225 err = PTR_ERR(rq);
1226 goto out_obj;
1227 }
1228
1229 err = i915_vma_pin(arg.vma, 0, 0,
1230 i915_vma_is_ggtt(arg.vma) ?
1231 PIN_GLOBAL | PIN_MAPPABLE :
1232 PIN_USER);
1233 if (err) {
1234 i915_request_add(rq);
1235 goto out_obj;
1236 }
1237
1238 if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1239 err = i915_vma_pin_fence(arg.vma);
1240 if (err) {
1241 pr_err("Unable to pin X-tiled fence; err:%d\n", err);
1242 i915_vma_unpin(arg.vma);
1243 i915_request_add(rq);
1244 goto out_obj;
1245 }
1246 }
1247
1248 i915_vma_lock(arg.vma);
1249 err = i915_vma_move_to_active(arg.vma, rq, flags);
1250 i915_vma_unlock(arg.vma);
1251
1252 if (flags & EXEC_OBJECT_NEEDS_FENCE)
1253 i915_vma_unpin_fence(arg.vma);
1254 i915_vma_unpin(arg.vma);
1255
1256 i915_request_get(rq);
1257 i915_request_add(rq);
1258 if (err)
1259 goto out_rq;
1260
1261 mutex_unlock(&i915->drm.struct_mutex);
1262
1263 if (!wait_until_running(&h, rq)) {
1264 struct drm_printer p = drm_info_printer(i915->drm.dev);
1265
1266 pr_err("%s: Failed to start request %llx, at %x\n",
1267 __func__, rq->fence.seqno, hws_seqno(&h, rq));
1268 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1269
1270 i915_gem_set_wedged(i915);
1271 goto out_reset;
1272 }
1273
1274 init_completion(&arg.completion);
1275
1276 tsk = kthread_run(fn, &arg, "igt/evict_vma");
1277 if (IS_ERR(tsk)) {
1278 err = PTR_ERR(tsk);
1279 tsk = NULL;
1280 goto out_reset;
1281 }
1282 get_task_struct(tsk);
1283
1284 wait_for_completion(&arg.completion);
1285
1286 if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
1287 struct drm_printer p = drm_info_printer(i915->drm.dev);
1288
1289 pr_err("igt/evict_vma kthread did not wait\n");
1290 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1291
1292 i915_gem_set_wedged(i915);
1293 goto out_reset;
1294 }
1295
1296out_reset:
1297 igt_global_reset_lock(i915);
1298 fake_hangcheck(rq->i915, rq->engine->mask);
1299 igt_global_reset_unlock(i915);
1300
1301 if (tsk) {
1302 struct igt_wedge_me w;
1303
1304
1305 igt_wedge_on_timeout(&w, i915, HZ / 10 )
1306 err = kthread_stop(tsk);
1307
1308 put_task_struct(tsk);
1309 }
1310
1311 mutex_lock(&i915->drm.struct_mutex);
1312out_rq:
1313 i915_request_put(rq);
1314out_obj:
1315 i915_gem_object_put(obj);
1316fini:
1317 hang_fini(&h);
1318unlock:
1319 mutex_unlock(&i915->drm.struct_mutex);
1320
1321 if (i915_reset_failed(i915))
1322 return -EIO;
1323
1324 return err;
1325}
1326
1327static int igt_reset_evict_ggtt(void *arg)
1328{
1329 struct drm_i915_private *i915 = arg;
1330
1331 return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
1332 evict_vma, EXEC_OBJECT_WRITE);
1333}
1334
1335static int igt_reset_evict_ppgtt(void *arg)
1336{
1337 struct drm_i915_private *i915 = arg;
1338 struct i915_gem_context *ctx;
1339 struct drm_file *file;
1340 int err;
1341
1342 file = mock_file(i915);
1343 if (IS_ERR(file))
1344 return PTR_ERR(file);
1345
1346 mutex_lock(&i915->drm.struct_mutex);
1347 ctx = live_context(i915, file);
1348 mutex_unlock(&i915->drm.struct_mutex);
1349 if (IS_ERR(ctx)) {
1350 err = PTR_ERR(ctx);
1351 goto out;
1352 }
1353
1354 err = 0;
1355 if (ctx->vm)
1356 err = __igt_reset_evict_vma(i915, ctx->vm,
1357 evict_vma, EXEC_OBJECT_WRITE);
1358
1359out:
1360 mock_file_free(i915, file);
1361 return err;
1362}
1363
1364static int igt_reset_evict_fence(void *arg)
1365{
1366 struct drm_i915_private *i915 = arg;
1367
1368 return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
1369 evict_fence, EXEC_OBJECT_NEEDS_FENCE);
1370}
1371
1372static int wait_for_others(struct drm_i915_private *i915,
1373 struct intel_engine_cs *exclude)
1374{
1375 struct intel_engine_cs *engine;
1376 enum intel_engine_id id;
1377
1378 for_each_engine(engine, i915, id) {
1379 if (engine == exclude)
1380 continue;
1381
1382 if (!wait_for_idle(engine))
1383 return -EIO;
1384 }
1385
1386 return 0;
1387}
1388
1389static int igt_reset_queue(void *arg)
1390{
1391 struct drm_i915_private *i915 = arg;
1392 struct intel_engine_cs *engine;
1393 enum intel_engine_id id;
1394 struct hang h;
1395 int err;
1396
1397
1398
1399 igt_global_reset_lock(i915);
1400
1401 mutex_lock(&i915->drm.struct_mutex);
1402 err = hang_init(&h, i915);
1403 if (err)
1404 goto unlock;
1405
1406 for_each_engine(engine, i915, id) {
1407 struct i915_request *prev;
1408 IGT_TIMEOUT(end_time);
1409 unsigned int count;
1410
1411 if (!intel_engine_can_store_dword(engine))
1412 continue;
1413
1414 prev = hang_create_request(&h, engine);
1415 if (IS_ERR(prev)) {
1416 err = PTR_ERR(prev);
1417 goto fini;
1418 }
1419
1420 i915_request_get(prev);
1421 i915_request_add(prev);
1422
1423 count = 0;
1424 do {
1425 struct i915_request *rq;
1426 unsigned int reset_count;
1427
1428 rq = hang_create_request(&h, engine);
1429 if (IS_ERR(rq)) {
1430 err = PTR_ERR(rq);
1431 goto fini;
1432 }
1433
1434 i915_request_get(rq);
1435 i915_request_add(rq);
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447 err = wait_for_others(i915, engine);
1448 if (err) {
1449 pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
1450 __func__, engine->name);
1451 i915_request_put(rq);
1452 i915_request_put(prev);
1453
1454 GEM_TRACE_DUMP();
1455 i915_gem_set_wedged(i915);
1456 goto fini;
1457 }
1458
1459 if (!wait_until_running(&h, prev)) {
1460 struct drm_printer p = drm_info_printer(i915->drm.dev);
1461
1462 pr_err("%s(%s): Failed to start request %llx, at %x\n",
1463 __func__, engine->name,
1464 prev->fence.seqno, hws_seqno(&h, prev));
1465 intel_engine_dump(engine, &p,
1466 "%s\n", engine->name);
1467
1468 i915_request_put(rq);
1469 i915_request_put(prev);
1470
1471 i915_gem_set_wedged(i915);
1472
1473 err = -EIO;
1474 goto fini;
1475 }
1476
1477 reset_count = fake_hangcheck(i915, BIT(id));
1478
1479 if (prev->fence.error != -EIO) {
1480 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
1481 prev->fence.error);
1482 i915_request_put(rq);
1483 i915_request_put(prev);
1484 err = -EINVAL;
1485 goto fini;
1486 }
1487
1488 if (rq->fence.error) {
1489 pr_err("Fence error status not zero [%d] after unrelated reset\n",
1490 rq->fence.error);
1491 i915_request_put(rq);
1492 i915_request_put(prev);
1493 err = -EINVAL;
1494 goto fini;
1495 }
1496
1497 if (i915_reset_count(&i915->gpu_error) == reset_count) {
1498 pr_err("No GPU reset recorded!\n");
1499 i915_request_put(rq);
1500 i915_request_put(prev);
1501 err = -EINVAL;
1502 goto fini;
1503 }
1504
1505 i915_request_put(prev);
1506 prev = rq;
1507 count++;
1508 } while (time_before(jiffies, end_time));
1509 pr_info("%s: Completed %d resets\n", engine->name, count);
1510
1511 *h.batch = MI_BATCH_BUFFER_END;
1512 i915_gem_chipset_flush(i915);
1513
1514 i915_request_put(prev);
1515
1516 err = igt_flush_test(i915, I915_WAIT_LOCKED);
1517 if (err)
1518 break;
1519 }
1520
1521fini:
1522 hang_fini(&h);
1523unlock:
1524 mutex_unlock(&i915->drm.struct_mutex);
1525 igt_global_reset_unlock(i915);
1526
1527 if (i915_reset_failed(i915))
1528 return -EIO;
1529
1530 return err;
1531}
1532
1533static int igt_handle_error(void *arg)
1534{
1535 struct drm_i915_private *i915 = arg;
1536 struct intel_engine_cs *engine = i915->engine[RCS0];
1537 struct hang h;
1538 struct i915_request *rq;
1539 struct i915_gpu_state *error;
1540 int err;
1541
1542
1543
1544 if (!intel_has_reset_engine(i915))
1545 return 0;
1546
1547 if (!engine || !intel_engine_can_store_dword(engine))
1548 return 0;
1549
1550 mutex_lock(&i915->drm.struct_mutex);
1551
1552 err = hang_init(&h, i915);
1553 if (err)
1554 goto err_unlock;
1555
1556 rq = hang_create_request(&h, engine);
1557 if (IS_ERR(rq)) {
1558 err = PTR_ERR(rq);
1559 goto err_fini;
1560 }
1561
1562 i915_request_get(rq);
1563 i915_request_add(rq);
1564
1565 if (!wait_until_running(&h, rq)) {
1566 struct drm_printer p = drm_info_printer(i915->drm.dev);
1567
1568 pr_err("%s: Failed to start request %llx, at %x\n",
1569 __func__, rq->fence.seqno, hws_seqno(&h, rq));
1570 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1571
1572 i915_gem_set_wedged(i915);
1573
1574 err = -EIO;
1575 goto err_request;
1576 }
1577
1578 mutex_unlock(&i915->drm.struct_mutex);
1579
1580
1581 error = xchg(&i915->gpu_error.first_error, (void *)-1);
1582
1583 i915_handle_error(i915, engine->mask, 0, NULL);
1584
1585 xchg(&i915->gpu_error.first_error, error);
1586
1587 mutex_lock(&i915->drm.struct_mutex);
1588
1589 if (rq->fence.error != -EIO) {
1590 pr_err("Guilty request not identified!\n");
1591 err = -EINVAL;
1592 goto err_request;
1593 }
1594
1595err_request:
1596 i915_request_put(rq);
1597err_fini:
1598 hang_fini(&h);
1599err_unlock:
1600 mutex_unlock(&i915->drm.struct_mutex);
1601 return err;
1602}
1603
1604static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
1605 const struct igt_atomic_section *p,
1606 const char *mode)
1607{
1608 struct tasklet_struct * const t = &engine->execlists.tasklet;
1609 int err;
1610
1611 GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
1612 engine->name, mode, p->name);
1613
1614 tasklet_disable_nosync(t);
1615 p->critical_section_begin();
1616
1617 err = i915_reset_engine(engine, NULL);
1618
1619 p->critical_section_end();
1620 tasklet_enable(t);
1621
1622 if (err)
1623 pr_err("i915_reset_engine(%s:%s) failed under %s\n",
1624 engine->name, mode, p->name);
1625
1626 return err;
1627}
1628
1629static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
1630 const struct igt_atomic_section *p)
1631{
1632 struct drm_i915_private *i915 = engine->i915;
1633 struct i915_request *rq;
1634 struct hang h;
1635 int err;
1636
1637 err = __igt_atomic_reset_engine(engine, p, "idle");
1638 if (err)
1639 return err;
1640
1641 err = hang_init(&h, i915);
1642 if (err)
1643 return err;
1644
1645 rq = hang_create_request(&h, engine);
1646 if (IS_ERR(rq)) {
1647 err = PTR_ERR(rq);
1648 goto out;
1649 }
1650
1651 i915_request_get(rq);
1652 i915_request_add(rq);
1653
1654 if (wait_until_running(&h, rq)) {
1655 err = __igt_atomic_reset_engine(engine, p, "active");
1656 } else {
1657 pr_err("%s(%s): Failed to start request %llx, at %x\n",
1658 __func__, engine->name,
1659 rq->fence.seqno, hws_seqno(&h, rq));
1660 i915_gem_set_wedged(i915);
1661 err = -EIO;
1662 }
1663
1664 if (err == 0) {
1665 struct igt_wedge_me w;
1666
1667 igt_wedge_on_timeout(&w, i915, HZ / 20 )
1668 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1669 if (i915_reset_failed(i915))
1670 err = -EIO;
1671 }
1672
1673 i915_request_put(rq);
1674out:
1675 hang_fini(&h);
1676 return err;
1677}
1678
1679static int igt_reset_engines_atomic(void *arg)
1680{
1681 struct drm_i915_private *i915 = arg;
1682 const typeof(*igt_atomic_phases) *p;
1683 int err = 0;
1684
1685
1686
1687 if (!intel_has_reset_engine(i915))
1688 return 0;
1689
1690 if (USES_GUC_SUBMISSION(i915))
1691 return 0;
1692
1693 igt_global_reset_lock(i915);
1694 mutex_lock(&i915->drm.struct_mutex);
1695
1696
1697 if (!igt_force_reset(i915))
1698 goto unlock;
1699
1700 for (p = igt_atomic_phases; p->name; p++) {
1701 struct intel_engine_cs *engine;
1702 enum intel_engine_id id;
1703
1704 for_each_engine(engine, i915, id) {
1705 err = igt_atomic_reset_engine(engine, p);
1706 if (err)
1707 goto out;
1708 }
1709 }
1710
1711out:
1712
1713 igt_force_reset(i915);
1714
1715unlock:
1716 mutex_unlock(&i915->drm.struct_mutex);
1717 igt_global_reset_unlock(i915);
1718
1719 return err;
1720}
1721
1722int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
1723{
1724 static const struct i915_subtest tests[] = {
1725 SUBTEST(igt_hang_sanitycheck),
1726 SUBTEST(igt_reset_nop),
1727 SUBTEST(igt_reset_nop_engine),
1728 SUBTEST(igt_reset_idle_engine),
1729 SUBTEST(igt_reset_active_engine),
1730 SUBTEST(igt_reset_engines),
1731 SUBTEST(igt_reset_engines_atomic),
1732 SUBTEST(igt_reset_queue),
1733 SUBTEST(igt_reset_wait),
1734 SUBTEST(igt_reset_evict_ggtt),
1735 SUBTEST(igt_reset_evict_ppgtt),
1736 SUBTEST(igt_reset_evict_fence),
1737 SUBTEST(igt_handle_error),
1738 };
1739 intel_wakeref_t wakeref;
1740 bool saved_hangcheck;
1741 int err;
1742
1743 if (!intel_has_gpu_reset(i915))
1744 return 0;
1745
1746 if (i915_terminally_wedged(i915))
1747 return -EIO;
1748
1749 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1750 saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
1751 drain_delayed_work(&i915->gpu_error.hangcheck_work);
1752
1753 err = i915_subtests(tests, i915);
1754
1755 mutex_lock(&i915->drm.struct_mutex);
1756 igt_flush_test(i915, I915_WAIT_LOCKED);
1757 mutex_unlock(&i915->drm.struct_mutex);
1758
1759 i915_modparams.enable_hangcheck = saved_hangcheck;
1760 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1761
1762 return err;
1763}
1764