1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/dma-fence-array.h>
26#include <linux/irq_work.h>
27#include <linux/prefetch.h>
28#include <linux/sched.h>
29#include <linux/sched/clock.h>
30#include <linux/sched/signal.h>
31
32#include "gem/i915_gem_context.h"
33#include "gt/intel_context.h"
34#include "gt/intel_ring.h"
35#include "gt/intel_rps.h"
36
37#include "i915_active.h"
38#include "i915_drv.h"
39#include "i915_globals.h"
40#include "i915_trace.h"
41#include "intel_pm.h"
42
43struct execute_cb {
44 struct list_head link;
45 struct irq_work work;
46 struct i915_sw_fence *fence;
47 void (*hook)(struct i915_request *rq, struct dma_fence *signal);
48 struct i915_request *signal;
49};
50
51static struct i915_global_request {
52 struct i915_global base;
53 struct kmem_cache *slab_requests;
54 struct kmem_cache *slab_dependencies;
55 struct kmem_cache *slab_execute_cbs;
56} global;
57
58static const char *i915_fence_get_driver_name(struct dma_fence *fence)
59{
60 return "i915";
61}
62
63static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
64{
65
66
67
68
69
70
71
72
73
74 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
75 return "signaled";
76
77 return to_request(fence)->gem_context->name ?: "[i915]";
78}
79
80static bool i915_fence_signaled(struct dma_fence *fence)
81{
82 return i915_request_completed(to_request(fence));
83}
84
85static bool i915_fence_enable_signaling(struct dma_fence *fence)
86{
87 return i915_request_enable_breadcrumb(to_request(fence));
88}
89
90static signed long i915_fence_wait(struct dma_fence *fence,
91 bool interruptible,
92 signed long timeout)
93{
94 return i915_request_wait(to_request(fence),
95 interruptible | I915_WAIT_PRIORITY,
96 timeout);
97}
98
99static void i915_fence_release(struct dma_fence *fence)
100{
101 struct i915_request *rq = to_request(fence);
102
103
104
105
106
107
108
109
110 i915_sw_fence_fini(&rq->submit);
111 i915_sw_fence_fini(&rq->semaphore);
112
113 kmem_cache_free(global.slab_requests, rq);
114}
115
116const struct dma_fence_ops i915_fence_ops = {
117 .get_driver_name = i915_fence_get_driver_name,
118 .get_timeline_name = i915_fence_get_timeline_name,
119 .enable_signaling = i915_fence_enable_signaling,
120 .signaled = i915_fence_signaled,
121 .wait = i915_fence_wait,
122 .release = i915_fence_release,
123};
124
125static void irq_execute_cb(struct irq_work *wrk)
126{
127 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
128
129 i915_sw_fence_complete(cb->fence);
130 kmem_cache_free(global.slab_execute_cbs, cb);
131}
132
133static void irq_execute_cb_hook(struct irq_work *wrk)
134{
135 struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
136
137 cb->hook(container_of(cb->fence, struct i915_request, submit),
138 &cb->signal->fence);
139 i915_request_put(cb->signal);
140
141 irq_execute_cb(wrk);
142}
143
144static void __notify_execute_cb(struct i915_request *rq)
145{
146 struct execute_cb *cb;
147
148 lockdep_assert_held(&rq->lock);
149
150 if (list_empty(&rq->execute_cb))
151 return;
152
153 list_for_each_entry(cb, &rq->execute_cb, link)
154 irq_work_queue(&cb->work);
155
156
157
158
159
160
161
162
163
164
165
166 INIT_LIST_HEAD(&rq->execute_cb);
167}
168
169static inline void
170remove_from_client(struct i915_request *request)
171{
172 struct drm_i915_file_private *file_priv;
173
174 if (!READ_ONCE(request->file_priv))
175 return;
176
177 rcu_read_lock();
178 file_priv = xchg(&request->file_priv, NULL);
179 if (file_priv) {
180 spin_lock(&file_priv->mm.lock);
181 list_del(&request->client_link);
182 spin_unlock(&file_priv->mm.lock);
183 }
184 rcu_read_unlock();
185}
186
187static void free_capture_list(struct i915_request *request)
188{
189 struct i915_capture_list *capture;
190
191 capture = request->capture_list;
192 while (capture) {
193 struct i915_capture_list *next = capture->next;
194
195 kfree(capture);
196 capture = next;
197 }
198}
199
200static void remove_from_engine(struct i915_request *rq)
201{
202 struct intel_engine_cs *engine, *locked;
203
204
205
206
207
208
209
210 locked = READ_ONCE(rq->engine);
211 spin_lock_irq(&locked->active.lock);
212 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
213 spin_unlock(&locked->active.lock);
214 spin_lock(&engine->active.lock);
215 locked = engine;
216 }
217 list_del(&rq->sched.link);
218 spin_unlock_irq(&locked->active.lock);
219}
220
221bool i915_request_retire(struct i915_request *rq)
222{
223 if (!i915_request_completed(rq))
224 return false;
225
226 GEM_TRACE("%s fence %llx:%lld, current %d\n",
227 rq->engine->name,
228 rq->fence.context, rq->fence.seqno,
229 hwsp_seqno(rq));
230
231 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
232 trace_i915_request_retire(rq);
233
234
235
236
237
238
239
240
241
242
243 GEM_BUG_ON(!list_is_first(&rq->link,
244 &i915_request_timeline(rq)->requests));
245 rq->ring->head = rq->postfix;
246
247
248
249
250
251
252
253 remove_from_engine(rq);
254
255 spin_lock_irq(&rq->lock);
256 i915_request_mark_complete(rq);
257 if (!i915_request_signaled(rq))
258 dma_fence_signal_locked(&rq->fence);
259 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
260 i915_request_cancel_breadcrumb(rq);
261 if (i915_request_has_waitboost(rq)) {
262 GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
263 atomic_dec(&rq->engine->gt->rps.num_waiters);
264 }
265 if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
266 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
267 __notify_execute_cb(rq);
268 }
269 GEM_BUG_ON(!list_empty(&rq->execute_cb));
270 spin_unlock_irq(&rq->lock);
271
272 remove_from_client(rq);
273 list_del(&rq->link);
274
275 intel_context_exit(rq->hw_context);
276 intel_context_unpin(rq->hw_context);
277
278 free_capture_list(rq);
279 i915_sched_node_fini(&rq->sched);
280 i915_request_put(rq);
281
282 return true;
283}
284
285void i915_request_retire_upto(struct i915_request *rq)
286{
287 struct intel_timeline * const tl = i915_request_timeline(rq);
288 struct i915_request *tmp;
289
290 GEM_TRACE("%s fence %llx:%lld, current %d\n",
291 rq->engine->name,
292 rq->fence.context, rq->fence.seqno,
293 hwsp_seqno(rq));
294
295 GEM_BUG_ON(!i915_request_completed(rq));
296
297 do {
298 tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
299 } while (i915_request_retire(tmp) && tmp != rq);
300}
301
302static int
303__await_execution(struct i915_request *rq,
304 struct i915_request *signal,
305 void (*hook)(struct i915_request *rq,
306 struct dma_fence *signal),
307 gfp_t gfp)
308{
309 struct execute_cb *cb;
310
311 if (i915_request_is_active(signal)) {
312 if (hook)
313 hook(rq, &signal->fence);
314 return 0;
315 }
316
317 cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
318 if (!cb)
319 return -ENOMEM;
320
321 cb->fence = &rq->submit;
322 i915_sw_fence_await(cb->fence);
323 init_irq_work(&cb->work, irq_execute_cb);
324
325 if (hook) {
326 cb->hook = hook;
327 cb->signal = i915_request_get(signal);
328 cb->work.func = irq_execute_cb_hook;
329 }
330
331 spin_lock_irq(&signal->lock);
332 if (i915_request_is_active(signal)) {
333 if (hook) {
334 hook(rq, &signal->fence);
335 i915_request_put(signal);
336 }
337 i915_sw_fence_complete(cb->fence);
338 kmem_cache_free(global.slab_execute_cbs, cb);
339 } else {
340 list_add_tail(&cb->link, &signal->execute_cb);
341 }
342 spin_unlock_irq(&signal->lock);
343
344
345 rq->sched.flags |= signal->sched.flags;
346 return 0;
347}
348
349bool __i915_request_submit(struct i915_request *request)
350{
351 struct intel_engine_cs *engine = request->engine;
352 bool result = false;
353
354 GEM_TRACE("%s fence %llx:%lld, current %d\n",
355 engine->name,
356 request->fence.context, request->fence.seqno,
357 hwsp_seqno(request));
358
359 GEM_BUG_ON(!irqs_disabled());
360 lockdep_assert_held(&engine->active.lock);
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378 if (i915_request_completed(request))
379 goto xfer;
380
381 if (i915_gem_context_is_banned(request->gem_context))
382 i915_request_skip(request, -EIO);
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 if (request->sched.semaphores &&
401 i915_sw_fence_signaled(&request->semaphore))
402 engine->saturated |= request->sched.semaphores;
403
404 engine->emit_fini_breadcrumb(request,
405 request->ring->vaddr + request->postfix);
406
407 trace_i915_request_execute(request);
408 engine->serial++;
409 result = true;
410
411xfer:
412 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
413
414 if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
415 list_move_tail(&request->sched.link, &engine->active.requests);
416
417 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
418 !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
419 !i915_request_enable_breadcrumb(request))
420 intel_engine_queue_breadcrumbs(engine);
421
422 __notify_execute_cb(request);
423
424 spin_unlock(&request->lock);
425
426 return result;
427}
428
429void i915_request_submit(struct i915_request *request)
430{
431 struct intel_engine_cs *engine = request->engine;
432 unsigned long flags;
433
434
435 spin_lock_irqsave(&engine->active.lock, flags);
436
437 __i915_request_submit(request);
438
439 spin_unlock_irqrestore(&engine->active.lock, flags);
440}
441
442void __i915_request_unsubmit(struct i915_request *request)
443{
444 struct intel_engine_cs *engine = request->engine;
445
446 GEM_TRACE("%s fence %llx:%lld, current %d\n",
447 engine->name,
448 request->fence.context, request->fence.seqno,
449 hwsp_seqno(request));
450
451 GEM_BUG_ON(!irqs_disabled());
452 lockdep_assert_held(&engine->active.lock);
453
454
455
456
457
458
459
460 spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
461
462 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
463 i915_request_cancel_breadcrumb(request);
464
465 GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
466 clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
467
468 spin_unlock(&request->lock);
469
470
471 if (request->sched.semaphores && i915_request_started(request)) {
472 request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
473 request->sched.semaphores = 0;
474 }
475
476
477
478
479
480
481
482
483}
484
485void i915_request_unsubmit(struct i915_request *request)
486{
487 struct intel_engine_cs *engine = request->engine;
488 unsigned long flags;
489
490
491 spin_lock_irqsave(&engine->active.lock, flags);
492
493 __i915_request_unsubmit(request);
494
495 spin_unlock_irqrestore(&engine->active.lock, flags);
496}
497
498static int __i915_sw_fence_call
499submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
500{
501 struct i915_request *request =
502 container_of(fence, typeof(*request), submit);
503
504 switch (state) {
505 case FENCE_COMPLETE:
506 trace_i915_request_submit(request);
507
508 if (unlikely(fence->error))
509 i915_request_skip(request, fence->error);
510
511
512
513
514
515
516
517
518
519 rcu_read_lock();
520 request->engine->submit_request(request);
521 rcu_read_unlock();
522 break;
523
524 case FENCE_FREE:
525 i915_request_put(request);
526 break;
527 }
528
529 return NOTIFY_DONE;
530}
531
532static int __i915_sw_fence_call
533semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
534{
535 struct i915_request *request =
536 container_of(fence, typeof(*request), semaphore);
537
538 switch (state) {
539 case FENCE_COMPLETE:
540 i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
541 break;
542
543 case FENCE_FREE:
544 i915_request_put(request);
545 break;
546 }
547
548 return NOTIFY_DONE;
549}
550
551static void retire_requests(struct intel_timeline *tl)
552{
553 struct i915_request *rq, *rn;
554
555 list_for_each_entry_safe(rq, rn, &tl->requests, link)
556 if (!i915_request_retire(rq))
557 break;
558}
559
560static noinline struct i915_request *
561request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
562{
563 struct i915_request *rq;
564
565 if (list_empty(&tl->requests))
566 goto out;
567
568 if (!gfpflags_allow_blocking(gfp))
569 goto out;
570
571
572 rq = list_first_entry(&tl->requests, typeof(*rq), link);
573 i915_request_retire(rq);
574
575 rq = kmem_cache_alloc(global.slab_requests,
576 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
577 if (rq)
578 return rq;
579
580
581 rq = list_last_entry(&tl->requests, typeof(*rq), link);
582 cond_synchronize_rcu(rq->rcustate);
583
584
585 retire_requests(tl);
586
587out:
588 return kmem_cache_alloc(global.slab_requests, gfp);
589}
590
591struct i915_request *
592__i915_request_create(struct intel_context *ce, gfp_t gfp)
593{
594 struct intel_timeline *tl = ce->timeline;
595 struct i915_request *rq;
596 u32 seqno;
597 int ret;
598
599 might_sleep_if(gfpflags_allow_blocking(gfp));
600
601
602 __intel_context_pin(ce);
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633 rq = kmem_cache_alloc(global.slab_requests,
634 gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
635 if (unlikely(!rq)) {
636 rq = request_alloc_slow(tl, gfp);
637 if (!rq) {
638 ret = -ENOMEM;
639 goto err_unreserve;
640 }
641 }
642
643 ret = intel_timeline_get_seqno(tl, rq, &seqno);
644 if (ret)
645 goto err_free;
646
647 rq->i915 = ce->engine->i915;
648 rq->hw_context = ce;
649 rq->gem_context = ce->gem_context;
650 rq->engine = ce->engine;
651 rq->ring = ce->ring;
652 rq->execution_mask = ce->engine->mask;
653
654 rcu_assign_pointer(rq->timeline, tl);
655 rq->hwsp_seqno = tl->hwsp_seqno;
656 rq->hwsp_cacheline = tl->hwsp_cacheline;
657
658 rq->rcustate = get_state_synchronize_rcu();
659
660 spin_lock_init(&rq->lock);
661 dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
662 tl->fence_context, seqno);
663
664
665 i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
666 i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
667
668 i915_sched_node_init(&rq->sched);
669
670
671 rq->file_priv = NULL;
672 rq->batch = NULL;
673 rq->capture_list = NULL;
674 rq->flags = 0;
675
676 INIT_LIST_HEAD(&rq->execute_cb);
677
678
679
680
681
682
683
684
685
686
687
688
689
690 rq->reserved_space =
691 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
692
693
694
695
696
697
698
699 rq->head = rq->ring->emit;
700
701 ret = rq->engine->request_alloc(rq);
702 if (ret)
703 goto err_unwind;
704
705 rq->infix = rq->ring->emit;
706
707 intel_context_mark_active(ce);
708 return rq;
709
710err_unwind:
711 ce->ring->emit = rq->head;
712
713
714 GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
715 GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
716
717err_free:
718 kmem_cache_free(global.slab_requests, rq);
719err_unreserve:
720 intel_context_unpin(ce);
721 return ERR_PTR(ret);
722}
723
724struct i915_request *
725i915_request_create(struct intel_context *ce)
726{
727 struct i915_request *rq;
728 struct intel_timeline *tl;
729
730 tl = intel_context_timeline_lock(ce);
731 if (IS_ERR(tl))
732 return ERR_CAST(tl);
733
734
735 rq = list_first_entry(&tl->requests, typeof(*rq), link);
736 if (!list_is_last(&rq->link, &tl->requests))
737 i915_request_retire(rq);
738
739 intel_context_enter(ce);
740 rq = __i915_request_create(ce, GFP_KERNEL);
741 intel_context_exit(ce);
742 if (IS_ERR(rq))
743 goto err_unlock;
744
745
746 rq->cookie = lockdep_pin_lock(&tl->mutex);
747
748 return rq;
749
750err_unlock:
751 intel_context_timeline_unlock(tl);
752 return rq;
753}
754
755static int
756i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
757{
758 struct intel_timeline *tl;
759 struct dma_fence *fence;
760 int err;
761
762 GEM_BUG_ON(i915_request_timeline(rq) ==
763 rcu_access_pointer(signal->timeline));
764
765 rcu_read_lock();
766 tl = rcu_dereference(signal->timeline);
767 if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref))
768 tl = NULL;
769 rcu_read_unlock();
770 if (!tl)
771 return 0;
772
773 fence = ERR_PTR(-EBUSY);
774 if (mutex_trylock(&tl->mutex)) {
775 fence = NULL;
776 if (!i915_request_started(signal) &&
777 !list_is_first(&signal->link, &tl->requests)) {
778 signal = list_prev_entry(signal, link);
779 fence = dma_fence_get(&signal->fence);
780 }
781 mutex_unlock(&tl->mutex);
782 }
783 intel_timeline_put(tl);
784 if (IS_ERR_OR_NULL(fence))
785 return PTR_ERR_OR_ZERO(fence);
786
787 err = 0;
788 if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
789 err = i915_sw_fence_await_dma_fence(&rq->submit,
790 fence, 0,
791 I915_FENCE_GFP);
792 dma_fence_put(fence);
793
794 return err;
795}
796
797static intel_engine_mask_t
798already_busywaiting(struct i915_request *rq)
799{
800
801
802
803
804
805
806
807
808
809
810
811
812 return rq->sched.semaphores | rq->engine->saturated;
813}
814
815static int
816__emit_semaphore_wait(struct i915_request *to,
817 struct i915_request *from,
818 u32 seqno)
819{
820 const int has_token = INTEL_GEN(to->i915) >= 12;
821 u32 hwsp_offset;
822 int len, err;
823 u32 *cs;
824
825 GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
826
827
828 err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
829 if (err)
830 return err;
831
832 len = 4;
833 if (has_token)
834 len += 2;
835
836 cs = intel_ring_begin(to, len);
837 if (IS_ERR(cs))
838 return PTR_ERR(cs);
839
840
841
842
843
844
845
846
847
848 *cs++ = (MI_SEMAPHORE_WAIT |
849 MI_SEMAPHORE_GLOBAL_GTT |
850 MI_SEMAPHORE_POLL |
851 MI_SEMAPHORE_SAD_GTE_SDD) +
852 has_token;
853 *cs++ = seqno;
854 *cs++ = hwsp_offset;
855 *cs++ = 0;
856 if (has_token) {
857 *cs++ = 0;
858 *cs++ = MI_NOOP;
859 }
860
861 intel_ring_advance(to, cs);
862 return 0;
863}
864
865static int
866emit_semaphore_wait(struct i915_request *to,
867 struct i915_request *from,
868 gfp_t gfp)
869{
870
871 if (already_busywaiting(to) & from->engine->mask)
872 goto await_fence;
873
874 if (i915_request_await_start(to, from) < 0)
875 goto await_fence;
876
877
878 if (__await_execution(to, from, NULL, gfp))
879 goto await_fence;
880
881 if (__emit_semaphore_wait(to, from, from->fence.seqno))
882 goto await_fence;
883
884 to->sched.semaphores |= from->engine->mask;
885 to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
886 return 0;
887
888await_fence:
889 return i915_sw_fence_await_dma_fence(&to->submit,
890 &from->fence, 0,
891 I915_FENCE_GFP);
892}
893
894static int
895i915_request_await_request(struct i915_request *to, struct i915_request *from)
896{
897 int ret;
898
899 GEM_BUG_ON(to == from);
900 GEM_BUG_ON(to->timeline == from->timeline);
901
902 if (i915_request_completed(from))
903 return 0;
904
905 if (to->engine->schedule) {
906 ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
907 if (ret < 0)
908 return ret;
909 }
910
911 if (to->engine == from->engine) {
912 ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
913 &from->submit,
914 I915_FENCE_GFP);
915 } else if (intel_engine_has_semaphores(to->engine) &&
916 to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
917 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
918 } else {
919 ret = i915_sw_fence_await_dma_fence(&to->submit,
920 &from->fence, 0,
921 I915_FENCE_GFP);
922 }
923 if (ret < 0)
924 return ret;
925
926 if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
927 ret = i915_sw_fence_await_dma_fence(&to->semaphore,
928 &from->fence, 0,
929 I915_FENCE_GFP);
930 if (ret < 0)
931 return ret;
932 }
933
934 return 0;
935}
936
937int
938i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
939{
940 struct dma_fence **child = &fence;
941 unsigned int nchild = 1;
942 int ret;
943
944
945
946
947
948
949
950
951
952 if (dma_fence_is_array(fence)) {
953 struct dma_fence_array *array = to_dma_fence_array(fence);
954
955 child = array->fences;
956 nchild = array->num_fences;
957 GEM_BUG_ON(!nchild);
958 }
959
960 do {
961 fence = *child++;
962 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
963 continue;
964
965
966
967
968
969
970 if (fence->context == rq->fence.context)
971 continue;
972
973
974 if (fence->context &&
975 intel_timeline_sync_is_later(i915_request_timeline(rq),
976 fence))
977 continue;
978
979 if (dma_fence_is_i915(fence))
980 ret = i915_request_await_request(rq, to_request(fence));
981 else
982 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
983 fence->context ? I915_FENCE_TIMEOUT : 0,
984 I915_FENCE_GFP);
985 if (ret < 0)
986 return ret;
987
988
989 if (fence->context)
990 intel_timeline_sync_set(i915_request_timeline(rq),
991 fence);
992 } while (--nchild);
993
994 return 0;
995}
996
997static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
998 struct dma_fence *fence)
999{
1000 return __intel_timeline_sync_is_later(tl,
1001 fence->context,
1002 fence->seqno - 1);
1003}
1004
1005static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1006 const struct dma_fence *fence)
1007{
1008 return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1009}
1010
1011static int
1012__i915_request_await_execution(struct i915_request *to,
1013 struct i915_request *from,
1014 void (*hook)(struct i915_request *rq,
1015 struct dma_fence *signal))
1016{
1017 int err;
1018
1019
1020 err = __await_execution(to, from, hook, I915_FENCE_GFP);
1021 if (err)
1022 return err;
1023
1024
1025 if (intel_timeline_sync_has_start(i915_request_timeline(to),
1026 &from->fence))
1027 return 0;
1028
1029
1030 if (intel_engine_has_semaphores(to->engine))
1031 err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1032 else
1033 err = i915_request_await_start(to, from);
1034 if (err < 0)
1035 return err;
1036
1037
1038 if (to->engine->schedule) {
1039 err = i915_sched_node_add_dependency(&to->sched, &from->sched);
1040 if (err < 0)
1041 return err;
1042 }
1043
1044 return intel_timeline_sync_set_start(i915_request_timeline(to),
1045 &from->fence);
1046}
1047
1048int
1049i915_request_await_execution(struct i915_request *rq,
1050 struct dma_fence *fence,
1051 void (*hook)(struct i915_request *rq,
1052 struct dma_fence *signal))
1053{
1054 struct dma_fence **child = &fence;
1055 unsigned int nchild = 1;
1056 int ret;
1057
1058 if (dma_fence_is_array(fence)) {
1059 struct dma_fence_array *array = to_dma_fence_array(fence);
1060
1061
1062
1063 child = array->fences;
1064 nchild = array->num_fences;
1065 GEM_BUG_ON(!nchild);
1066 }
1067
1068 do {
1069 fence = *child++;
1070 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1071 continue;
1072
1073
1074
1075
1076
1077
1078 if (dma_fence_is_i915(fence))
1079 ret = __i915_request_await_execution(rq,
1080 to_request(fence),
1081 hook);
1082 else
1083 ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
1084 I915_FENCE_TIMEOUT,
1085 GFP_KERNEL);
1086 if (ret < 0)
1087 return ret;
1088 } while (--nchild);
1089
1090 return 0;
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113int
1114i915_request_await_object(struct i915_request *to,
1115 struct drm_i915_gem_object *obj,
1116 bool write)
1117{
1118 struct dma_fence *excl;
1119 int ret = 0;
1120
1121 if (write) {
1122 struct dma_fence **shared;
1123 unsigned int count, i;
1124
1125 ret = dma_resv_get_fences_rcu(obj->base.resv,
1126 &excl, &count, &shared);
1127 if (ret)
1128 return ret;
1129
1130 for (i = 0; i < count; i++) {
1131 ret = i915_request_await_dma_fence(to, shared[i]);
1132 if (ret)
1133 break;
1134
1135 dma_fence_put(shared[i]);
1136 }
1137
1138 for (; i < count; i++)
1139 dma_fence_put(shared[i]);
1140 kfree(shared);
1141 } else {
1142 excl = dma_resv_get_excl_rcu(obj->base.resv);
1143 }
1144
1145 if (excl) {
1146 if (ret == 0)
1147 ret = i915_request_await_dma_fence(to, excl);
1148
1149 dma_fence_put(excl);
1150 }
1151
1152 return ret;
1153}
1154
1155void i915_request_skip(struct i915_request *rq, int error)
1156{
1157 void *vaddr = rq->ring->vaddr;
1158 u32 head;
1159
1160 GEM_BUG_ON(!IS_ERR_VALUE((long)error));
1161 dma_fence_set_error(&rq->fence, error);
1162
1163 if (rq->infix == rq->postfix)
1164 return;
1165
1166
1167
1168
1169
1170
1171 head = rq->infix;
1172 if (rq->postfix < head) {
1173 memset(vaddr + head, 0, rq->ring->size - head);
1174 head = 0;
1175 }
1176 memset(vaddr + head, 0, rq->postfix - head);
1177 rq->infix = rq->postfix;
1178}
1179
1180static struct i915_request *
1181__i915_request_add_to_timeline(struct i915_request *rq)
1182{
1183 struct intel_timeline *timeline = i915_request_timeline(rq);
1184 struct i915_request *prev;
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 prev = to_request(__i915_active_fence_set(&timeline->last_request,
1207 &rq->fence));
1208 if (prev && !i915_request_completed(prev)) {
1209 if (is_power_of_2(prev->engine->mask | rq->engine->mask))
1210 i915_sw_fence_await_sw_fence(&rq->submit,
1211 &prev->submit,
1212 &rq->submitq);
1213 else
1214 __i915_sw_fence_await_dma_fence(&rq->submit,
1215 &prev->fence,
1216 &rq->dmaq);
1217 if (rq->engine->schedule)
1218 __i915_sched_node_add_dependency(&rq->sched,
1219 &prev->sched,
1220 &rq->dep,
1221 0);
1222 }
1223
1224 list_add_tail(&rq->link, &timeline->requests);
1225
1226
1227
1228
1229
1230
1231 GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
1232
1233 return prev;
1234}
1235
1236
1237
1238
1239
1240
1241struct i915_request *__i915_request_commit(struct i915_request *rq)
1242{
1243 struct intel_engine_cs *engine = rq->engine;
1244 struct intel_ring *ring = rq->ring;
1245 u32 *cs;
1246
1247 GEM_TRACE("%s fence %llx:%lld\n",
1248 engine->name, rq->fence.context, rq->fence.seqno);
1249
1250
1251
1252
1253
1254
1255 GEM_BUG_ON(rq->reserved_space > ring->space);
1256 rq->reserved_space = 0;
1257 rq->emitted_jiffies = jiffies;
1258
1259
1260
1261
1262
1263
1264
1265 cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1266 GEM_BUG_ON(IS_ERR(cs));
1267 rq->postfix = intel_ring_offset(rq, cs);
1268
1269 return __i915_request_add_to_timeline(rq);
1270}
1271
1272void __i915_request_queue(struct i915_request *rq,
1273 const struct i915_sched_attr *attr)
1274{
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286 i915_sw_fence_commit(&rq->semaphore);
1287 if (attr && rq->engine->schedule)
1288 rq->engine->schedule(rq, attr);
1289 i915_sw_fence_commit(&rq->submit);
1290}
1291
1292void i915_request_add(struct i915_request *rq)
1293{
1294 struct i915_sched_attr attr = rq->gem_context->sched;
1295 struct intel_timeline * const tl = i915_request_timeline(rq);
1296 struct i915_request *prev;
1297
1298 lockdep_assert_held(&tl->mutex);
1299 lockdep_unpin_lock(&tl->mutex, rq->cookie);
1300
1301 trace_i915_request_add(rq);
1302
1303 prev = __i915_request_commit(rq);
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317 if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
1318 attr.priority |= I915_PRIORITY_NOSEMAPHORE;
1319
1320
1321
1322
1323
1324
1325
1326 if (list_empty(&rq->sched.signalers_list))
1327 attr.priority |= I915_PRIORITY_WAIT;
1328
1329 local_bh_disable();
1330 __i915_request_queue(rq, &attr);
1331 local_bh_enable();
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350 if (prev &&
1351 i915_request_completed(prev) &&
1352 rcu_access_pointer(prev->timeline) == tl)
1353 i915_request_retire_upto(prev);
1354
1355 mutex_unlock(&tl->mutex);
1356}
1357
1358static unsigned long local_clock_us(unsigned int *cpu)
1359{
1360 unsigned long t;
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374 *cpu = get_cpu();
1375 t = local_clock() >> 10;
1376 put_cpu();
1377
1378 return t;
1379}
1380
1381static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1382{
1383 unsigned int this_cpu;
1384
1385 if (time_after(local_clock_us(&this_cpu), timeout))
1386 return true;
1387
1388 return this_cpu != cpu;
1389}
1390
1391static bool __i915_spin_request(const struct i915_request * const rq,
1392 int state, unsigned long timeout_us)
1393{
1394 unsigned int cpu;
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407 if (!i915_request_is_running(rq))
1408 return false;
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421 timeout_us += local_clock_us(&cpu);
1422 do {
1423 if (i915_request_completed(rq))
1424 return true;
1425
1426 if (signal_pending_state(state, current))
1427 break;
1428
1429 if (busywait_stop(timeout_us, cpu))
1430 break;
1431
1432 cpu_relax();
1433 } while (!need_resched());
1434
1435 return false;
1436}
1437
1438struct request_wait {
1439 struct dma_fence_cb cb;
1440 struct task_struct *tsk;
1441};
1442
1443static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
1444{
1445 struct request_wait *wait = container_of(cb, typeof(*wait), cb);
1446
1447 wake_up_process(wait->tsk);
1448}
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465long i915_request_wait(struct i915_request *rq,
1466 unsigned int flags,
1467 long timeout)
1468{
1469 const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1470 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1471 struct request_wait wait;
1472
1473 might_sleep();
1474 GEM_BUG_ON(timeout < 0);
1475
1476 if (dma_fence_is_signaled(&rq->fence))
1477 return timeout;
1478
1479 if (!timeout)
1480 return -ETIME;
1481
1482 trace_i915_request_wait_begin(rq, flags);
1483
1484
1485
1486
1487
1488
1489
1490 mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515 if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
1516 __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
1517 dma_fence_signal(&rq->fence);
1518 goto out;
1519 }
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533 if (flags & I915_WAIT_PRIORITY) {
1534 if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
1535 intel_rps_boost(rq);
1536 i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
1537 }
1538
1539 wait.tsk = current;
1540 if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
1541 goto out;
1542
1543 for (;;) {
1544 set_current_state(state);
1545
1546 if (i915_request_completed(rq)) {
1547 dma_fence_signal(&rq->fence);
1548 break;
1549 }
1550
1551 if (signal_pending_state(state, current)) {
1552 timeout = -ERESTARTSYS;
1553 break;
1554 }
1555
1556 if (!timeout) {
1557 timeout = -ETIME;
1558 break;
1559 }
1560
1561 intel_engine_flush_submission(rq->engine);
1562 timeout = io_schedule_timeout(timeout);
1563 }
1564 __set_current_state(TASK_RUNNING);
1565
1566 dma_fence_remove_callback(&rq->fence, &wait.cb);
1567
1568out:
1569 mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
1570 trace_i915_request_wait_end(rq);
1571 return timeout;
1572}
1573
1574#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1575#include "selftests/mock_request.c"
1576#include "selftests/i915_request.c"
1577#endif
1578
1579static void i915_global_request_shrink(void)
1580{
1581 kmem_cache_shrink(global.slab_dependencies);
1582 kmem_cache_shrink(global.slab_execute_cbs);
1583 kmem_cache_shrink(global.slab_requests);
1584}
1585
1586static void i915_global_request_exit(void)
1587{
1588 kmem_cache_destroy(global.slab_dependencies);
1589 kmem_cache_destroy(global.slab_execute_cbs);
1590 kmem_cache_destroy(global.slab_requests);
1591}
1592
1593static struct i915_global_request global = { {
1594 .shrink = i915_global_request_shrink,
1595 .exit = i915_global_request_exit,
1596} };
1597
1598int __init i915_global_request_init(void)
1599{
1600 global.slab_requests = KMEM_CACHE(i915_request,
1601 SLAB_HWCACHE_ALIGN |
1602 SLAB_RECLAIM_ACCOUNT |
1603 SLAB_TYPESAFE_BY_RCU);
1604 if (!global.slab_requests)
1605 return -ENOMEM;
1606
1607 global.slab_execute_cbs = KMEM_CACHE(execute_cb,
1608 SLAB_HWCACHE_ALIGN |
1609 SLAB_RECLAIM_ACCOUNT |
1610 SLAB_TYPESAFE_BY_RCU);
1611 if (!global.slab_execute_cbs)
1612 goto err_requests;
1613
1614 global.slab_dependencies = KMEM_CACHE(i915_dependency,
1615 SLAB_HWCACHE_ALIGN |
1616 SLAB_RECLAIM_ACCOUNT);
1617 if (!global.slab_dependencies)
1618 goto err_execute_cbs;
1619
1620 i915_global_register(&global.base);
1621 return 0;
1622
1623err_execute_cbs:
1624 kmem_cache_destroy(global.slab_execute_cbs);
1625err_requests:
1626 kmem_cache_destroy(global.slab_requests);
1627 return -ENOMEM;
1628}
1629