1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134#include <linux/interrupt.h>
135
136#include "gem/i915_gem_context.h"
137
138#include "i915_drv.h"
139#include "i915_gem_render_state.h"
140#include "i915_vgpu.h"
141#include "intel_engine_pm.h"
142#include "intel_lrc_reg.h"
143#include "intel_mocs.h"
144#include "intel_reset.h"
145#include "intel_workarounds.h"
146
147#define RING_EXECLIST_QFULL (1 << 0x2)
148#define RING_EXECLIST1_VALID (1 << 0x3)
149#define RING_EXECLIST0_VALID (1 << 0x4)
150#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
151#define RING_EXECLIST1_ACTIVE (1 << 0x11)
152#define RING_EXECLIST0_ACTIVE (1 << 0x12)
153
154#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
155#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
156#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
157#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
158#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
159#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
160
161#define GEN8_CTX_STATUS_COMPLETED_MASK \
162 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
163
164
165#define EXECLISTS_REQUEST_SIZE 64
166#define WA_TAIL_DWORDS 2
167#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
168
169struct virtual_engine {
170 struct intel_engine_cs base;
171 struct intel_context context;
172
173
174
175
176
177
178
179
180
181
182 struct i915_request *request;
183
184
185
186
187
188
189 struct ve_node {
190 struct rb_node rb;
191 int prio;
192 } nodes[I915_NUM_ENGINES];
193
194
195
196
197
198
199
200 struct ve_bond {
201 const struct intel_engine_cs *master;
202 intel_engine_mask_t sibling_mask;
203 } *bonds;
204 unsigned int num_bonds;
205
206
207 unsigned int num_siblings;
208 struct intel_engine_cs *siblings[0];
209};
210
211static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
212{
213 GEM_BUG_ON(!intel_engine_is_virtual(engine));
214 return container_of(engine, struct virtual_engine, base);
215}
216
217static int execlists_context_deferred_alloc(struct intel_context *ce,
218 struct intel_engine_cs *engine);
219static void execlists_init_reg_state(u32 *reg_state,
220 struct intel_context *ce,
221 struct intel_engine_cs *engine,
222 struct intel_ring *ring);
223
224static inline struct i915_priolist *to_priolist(struct rb_node *rb)
225{
226 return rb_entry(rb, struct i915_priolist, node);
227}
228
229static inline int rq_prio(const struct i915_request *rq)
230{
231 return rq->sched.attr.priority;
232}
233
234static int effective_prio(const struct i915_request *rq)
235{
236 int prio = rq_prio(rq);
237
238
239
240
241
242
243
244 if (__i915_request_has_started(rq))
245 prio |= I915_PRIORITY_NOSEMAPHORE;
246
247
248 return prio | __NO_PREEMPTION;
249}
250
251static int queue_prio(const struct intel_engine_execlists *execlists)
252{
253 struct i915_priolist *p;
254 struct rb_node *rb;
255
256 rb = rb_first_cached(&execlists->queue);
257 if (!rb)
258 return INT_MIN;
259
260
261
262
263
264 p = to_priolist(rb);
265 return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
266}
267
268static inline bool need_preempt(const struct intel_engine_cs *engine,
269 const struct i915_request *rq,
270 struct rb_node *rb)
271{
272 int last_prio;
273
274 if (!engine->preempt_context)
275 return false;
276
277 if (i915_request_completed(rq))
278 return false;
279
280
281
282
283
284
285
286
287
288
289
290
291
292 last_prio = effective_prio(rq);
293 if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
294 last_prio))
295 return false;
296
297
298
299
300
301 if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
302 rq_prio(list_next_entry(rq, sched.link)) > last_prio)
303 return true;
304
305 if (rb) {
306 struct virtual_engine *ve =
307 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
308 bool preempt = false;
309
310 if (engine == ve->siblings[0]) {
311 struct i915_request *next;
312
313 rcu_read_lock();
314 next = READ_ONCE(ve->request);
315 if (next)
316 preempt = rq_prio(next) > last_prio;
317 rcu_read_unlock();
318 }
319
320 if (preempt)
321 return preempt;
322 }
323
324
325
326
327
328
329
330
331
332
333
334 return queue_prio(&engine->execlists) > last_prio;
335}
336
337__maybe_unused static inline bool
338assert_priority_queue(const struct i915_request *prev,
339 const struct i915_request *next)
340{
341 const struct intel_engine_execlists *execlists =
342 &prev->engine->execlists;
343
344
345
346
347
348
349
350
351 if (port_request(execlists->port) == prev)
352 return true;
353
354 return rq_prio(prev) >= rq_prio(next);
355}
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383static u64
384lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
385{
386 struct i915_gem_context *ctx = ce->gem_context;
387 u64 desc;
388
389 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
390 BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
391
392 desc = ctx->desc_template;
393 GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
394
395 desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
396
397 GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
398
399
400
401
402
403
404 if (INTEL_GEN(engine->i915) >= 11) {
405 GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
406 desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
407
408
409 desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
410
411
412
413
414 desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
415
416 } else {
417 GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
418 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;
419 }
420
421 return desc;
422}
423
424static void unwind_wa_tail(struct i915_request *rq)
425{
426 rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
427 assert_ring_tail_valid(rq->ring, rq->tail);
428}
429
430static struct i915_request *
431__unwind_incomplete_requests(struct intel_engine_cs *engine)
432{
433 struct i915_request *rq, *rn, *active = NULL;
434 struct list_head *uninitialized_var(pl);
435 int prio = I915_PRIORITY_INVALID;
436
437 lockdep_assert_held(&engine->active.lock);
438
439 list_for_each_entry_safe_reverse(rq, rn,
440 &engine->active.requests,
441 sched.link) {
442 struct intel_engine_cs *owner;
443
444 if (i915_request_completed(rq))
445 break;
446
447 __i915_request_unsubmit(rq);
448 unwind_wa_tail(rq);
449
450 GEM_BUG_ON(rq->hw_context->inflight);
451
452
453
454
455
456
457
458
459 owner = rq->hw_context->engine;
460 if (likely(owner == engine)) {
461 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
462 if (rq_prio(rq) != prio) {
463 prio = rq_prio(rq);
464 pl = i915_sched_lookup_priolist(engine, prio);
465 }
466 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
467
468 list_move(&rq->sched.link, pl);
469 active = rq;
470 } else {
471 rq->engine = owner;
472 owner->submit_request(rq);
473 active = NULL;
474 }
475 }
476
477 return active;
478}
479
480struct i915_request *
481execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
482{
483 struct intel_engine_cs *engine =
484 container_of(execlists, typeof(*engine), execlists);
485
486 return __unwind_incomplete_requests(engine);
487}
488
489static inline void
490execlists_context_status_change(struct i915_request *rq, unsigned long status)
491{
492
493
494
495
496 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
497 return;
498
499 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
500 status, rq);
501}
502
503inline void
504execlists_user_begin(struct intel_engine_execlists *execlists,
505 const struct execlist_port *port)
506{
507 execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
508}
509
510inline void
511execlists_user_end(struct intel_engine_execlists *execlists)
512{
513 execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
514}
515
516static inline void
517execlists_context_schedule_in(struct i915_request *rq)
518{
519 GEM_BUG_ON(rq->hw_context->inflight);
520
521 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
522 intel_engine_context_in(rq->engine);
523 rq->hw_context->inflight = rq->engine;
524}
525
526static void kick_siblings(struct i915_request *rq)
527{
528 struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
529 struct i915_request *next = READ_ONCE(ve->request);
530
531 if (next && next->execution_mask & ~rq->execution_mask)
532 tasklet_schedule(&ve->base.execlists.tasklet);
533}
534
535static inline void
536execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
537{
538 rq->hw_context->inflight = NULL;
539 intel_engine_context_out(rq->engine);
540 execlists_context_status_change(rq, status);
541 trace_i915_request_out(rq);
542
543
544
545
546
547
548
549
550
551
552 if (rq->engine != rq->hw_context->engine)
553 kick_siblings(rq);
554}
555
556static u64 execlists_update_context(struct i915_request *rq)
557{
558 struct intel_context *ce = rq->hw_context;
559
560 ce->lrc_reg_state[CTX_RING_TAIL + 1] =
561 intel_ring_set_tail(rq->ring, rq->tail);
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578 mb();
579 return ce->lrc_desc;
580}
581
582static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
583{
584 if (execlists->ctrl_reg) {
585 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
586 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
587 } else {
588 writel(upper_32_bits(desc), execlists->submit_reg);
589 writel(lower_32_bits(desc), execlists->submit_reg);
590 }
591}
592
593static void execlists_submit_ports(struct intel_engine_cs *engine)
594{
595 struct intel_engine_execlists *execlists = &engine->execlists;
596 struct execlist_port *port = execlists->port;
597 unsigned int n;
598
599
600
601
602
603
604
605
606
607 GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref));
608
609
610
611
612
613
614
615 for (n = execlists_num_ports(execlists); n--; ) {
616 struct i915_request *rq;
617 unsigned int count;
618 u64 desc;
619
620 rq = port_unpack(&port[n], &count);
621 if (rq) {
622 GEM_BUG_ON(count > !n);
623 if (!count++)
624 execlists_context_schedule_in(rq);
625 port_set(&port[n], port_pack(rq, count));
626 desc = execlists_update_context(rq);
627 GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
628
629 GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
630 engine->name, n,
631 port[n].context_id, count,
632 rq->fence.context, rq->fence.seqno,
633 hwsp_seqno(rq),
634 rq_prio(rq));
635 } else {
636 GEM_BUG_ON(!n);
637 desc = 0;
638 }
639
640 write_desc(execlists, desc, n);
641 }
642
643
644 if (execlists->ctrl_reg)
645 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
646
647 execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
648}
649
650static bool ctx_single_port_submission(const struct intel_context *ce)
651{
652 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
653 i915_gem_context_force_single_submission(ce->gem_context));
654}
655
656static bool can_merge_ctx(const struct intel_context *prev,
657 const struct intel_context *next)
658{
659 if (prev != next)
660 return false;
661
662 if (ctx_single_port_submission(prev))
663 return false;
664
665 return true;
666}
667
668static bool can_merge_rq(const struct i915_request *prev,
669 const struct i915_request *next)
670{
671 GEM_BUG_ON(!assert_priority_queue(prev, next));
672
673 if (!can_merge_ctx(prev->hw_context, next->hw_context))
674 return false;
675
676 return true;
677}
678
679static void port_assign(struct execlist_port *port, struct i915_request *rq)
680{
681 GEM_BUG_ON(rq == port_request(port));
682
683 if (port_isset(port))
684 i915_request_put(port_request(port));
685
686 port_set(port, port_pack(i915_request_get(rq), port_count(port)));
687}
688
689static void inject_preempt_context(struct intel_engine_cs *engine)
690{
691 struct intel_engine_execlists *execlists = &engine->execlists;
692 struct intel_context *ce = engine->preempt_context;
693 unsigned int n;
694
695 GEM_BUG_ON(execlists->preempt_complete_status !=
696 upper_32_bits(ce->lrc_desc));
697
698
699
700
701
702 GEM_TRACE("%s\n", engine->name);
703 for (n = execlists_num_ports(execlists); --n; )
704 write_desc(execlists, 0, n);
705
706 write_desc(execlists, ce->lrc_desc, n);
707
708
709 if (execlists->ctrl_reg)
710 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
711
712 execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
713 execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
714
715 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
716}
717
718static void complete_preempt_context(struct intel_engine_execlists *execlists)
719{
720 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
721
722 if (inject_preempt_hang(execlists))
723 return;
724
725 execlists_cancel_port_requests(execlists);
726 __unwind_incomplete_requests(container_of(execlists,
727 struct intel_engine_cs,
728 execlists));
729}
730
731static void virtual_update_register_offsets(u32 *regs,
732 struct intel_engine_cs *engine)
733{
734 u32 base = engine->mmio_base;
735
736
737
738 regs[CTX_CONTEXT_CONTROL] =
739 i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
740 regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
741 regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
742 regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
743 regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
744
745 regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
746 regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
747 regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
748 regs[CTX_SECOND_BB_HEAD_U] =
749 i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
750 regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
751 regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
752
753 regs[CTX_CTX_TIMESTAMP] =
754 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
755 regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
756 regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
757 regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
758 regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
759 regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
760 regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
761 regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
762 regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
763
764 if (engine->class == RENDER_CLASS) {
765 regs[CTX_RCS_INDIRECT_CTX] =
766 i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
767 regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
768 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
769 regs[CTX_BB_PER_CTX_PTR] =
770 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
771
772 regs[CTX_R_PWR_CLK_STATE] =
773 i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
774 }
775}
776
777static bool virtual_matches(const struct virtual_engine *ve,
778 const struct i915_request *rq,
779 const struct intel_engine_cs *engine)
780{
781 const struct intel_engine_cs *inflight;
782
783 if (!(rq->execution_mask & engine->mask))
784 return false;
785
786
787
788
789
790
791
792
793
794
795 inflight = READ_ONCE(ve->context.inflight);
796 if (inflight && inflight != engine)
797 return false;
798
799 return true;
800}
801
802static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
803 struct intel_engine_cs *engine)
804{
805 struct intel_engine_cs *old = ve->siblings[0];
806
807
808
809 spin_lock(&old->breadcrumbs.irq_lock);
810 if (!list_empty(&ve->context.signal_link)) {
811 list_move_tail(&ve->context.signal_link,
812 &engine->breadcrumbs.signalers);
813 intel_engine_queue_breadcrumbs(engine);
814 }
815 spin_unlock(&old->breadcrumbs.irq_lock);
816}
817
818static void execlists_dequeue(struct intel_engine_cs *engine)
819{
820 struct intel_engine_execlists * const execlists = &engine->execlists;
821 struct execlist_port *port = execlists->port;
822 const struct execlist_port * const last_port =
823 &execlists->port[execlists->port_mask];
824 struct i915_request *last = port_request(port);
825 struct rb_node *rb;
826 bool submit = false;
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850 for (rb = rb_first_cached(&execlists->virtual); rb; ) {
851 struct virtual_engine *ve =
852 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
853 struct i915_request *rq = READ_ONCE(ve->request);
854
855 if (!rq) {
856 rb_erase_cached(rb, &execlists->virtual);
857 RB_CLEAR_NODE(rb);
858 rb = rb_first_cached(&execlists->virtual);
859 continue;
860 }
861
862 if (!virtual_matches(ve, rq, engine)) {
863 rb = rb_next(rb);
864 continue;
865 }
866
867 break;
868 }
869
870 if (last) {
871
872
873
874
875
876
877 GEM_BUG_ON(!execlists_is_active(execlists,
878 EXECLISTS_ACTIVE_USER));
879 GEM_BUG_ON(!port_count(&port[0]));
880
881
882
883
884
885
886
887
888 if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
889 return;
890
891 if (need_preempt(engine, last, rb)) {
892 inject_preempt_context(engine);
893 return;
894 }
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917 if (port_count(&port[1]))
918 return;
919
920
921
922
923
924
925
926
927
928 last->tail = last->wa_tail;
929 }
930
931 while (rb) {
932 struct virtual_engine *ve =
933 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
934 struct i915_request *rq;
935
936 spin_lock(&ve->base.active.lock);
937
938 rq = ve->request;
939 if (unlikely(!rq)) {
940 spin_unlock(&ve->base.active.lock);
941 rb_erase_cached(rb, &execlists->virtual);
942 RB_CLEAR_NODE(rb);
943 rb = rb_first_cached(&execlists->virtual);
944 continue;
945 }
946
947 GEM_BUG_ON(rq != ve->request);
948 GEM_BUG_ON(rq->engine != &ve->base);
949 GEM_BUG_ON(rq->hw_context != &ve->context);
950
951 if (rq_prio(rq) >= queue_prio(execlists)) {
952 if (!virtual_matches(ve, rq, engine)) {
953 spin_unlock(&ve->base.active.lock);
954 rb = rb_next(rb);
955 continue;
956 }
957
958 if (last && !can_merge_rq(last, rq)) {
959 spin_unlock(&ve->base.active.lock);
960 return;
961 }
962
963 GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
964 engine->name,
965 rq->fence.context,
966 rq->fence.seqno,
967 i915_request_completed(rq) ? "!" :
968 i915_request_started(rq) ? "*" :
969 "",
970 yesno(engine != ve->siblings[0]));
971
972 ve->request = NULL;
973 ve->base.execlists.queue_priority_hint = INT_MIN;
974 rb_erase_cached(rb, &execlists->virtual);
975 RB_CLEAR_NODE(rb);
976
977 GEM_BUG_ON(!(rq->execution_mask & engine->mask));
978 rq->engine = engine;
979
980 if (engine != ve->siblings[0]) {
981 u32 *regs = ve->context.lrc_reg_state;
982 unsigned int n;
983
984 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
985 virtual_update_register_offsets(regs, engine);
986
987 if (!list_empty(&ve->context.signals))
988 virtual_xfer_breadcrumbs(ve, engine);
989
990
991
992
993
994
995
996
997 for (n = 1; n < ve->num_siblings; n++) {
998 if (ve->siblings[n] == engine) {
999 swap(ve->siblings[n],
1000 ve->siblings[0]);
1001 break;
1002 }
1003 }
1004
1005 GEM_BUG_ON(ve->siblings[0] != engine);
1006 }
1007
1008 __i915_request_submit(rq);
1009 trace_i915_request_in(rq, port_index(port, execlists));
1010 submit = true;
1011 last = rq;
1012 }
1013
1014 spin_unlock(&ve->base.active.lock);
1015 break;
1016 }
1017
1018 while ((rb = rb_first_cached(&execlists->queue))) {
1019 struct i915_priolist *p = to_priolist(rb);
1020 struct i915_request *rq, *rn;
1021 int i;
1022
1023 priolist_for_each_request_consume(rq, rn, p, i) {
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035 if (last && !can_merge_rq(last, rq)) {
1036
1037
1038
1039
1040
1041 if (port == last_port)
1042 goto done;
1043
1044
1045
1046
1047
1048
1049 if (last->hw_context == rq->hw_context)
1050 goto done;
1051
1052
1053
1054
1055
1056
1057
1058
1059 if (ctx_single_port_submission(last->hw_context) ||
1060 ctx_single_port_submission(rq->hw_context))
1061 goto done;
1062
1063
1064 if (submit)
1065 port_assign(port, last);
1066 port++;
1067
1068 GEM_BUG_ON(port_isset(port));
1069 }
1070
1071 __i915_request_submit(rq);
1072 trace_i915_request_in(rq, port_index(port, execlists));
1073
1074 last = rq;
1075 submit = true;
1076 }
1077
1078 rb_erase_cached(&p->node, &execlists->queue);
1079 i915_priolist_free(p);
1080 }
1081
1082done:
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099 execlists->queue_priority_hint = queue_prio(execlists);
1100
1101 if (submit) {
1102 port_assign(port, last);
1103 execlists_submit_ports(engine);
1104 }
1105
1106
1107 GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
1108 !port_isset(execlists->port));
1109
1110
1111 if (last)
1112 execlists_user_begin(execlists, execlists->port);
1113
1114
1115 GEM_BUG_ON(execlists_is_active(&engine->execlists,
1116 EXECLISTS_ACTIVE_USER) ==
1117 !port_isset(engine->execlists.port));
1118}
1119
1120void
1121execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
1122{
1123 struct execlist_port *port = execlists->port;
1124 unsigned int num_ports = execlists_num_ports(execlists);
1125
1126 while (num_ports-- && port_isset(port)) {
1127 struct i915_request *rq = port_request(port);
1128
1129 GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
1130 rq->engine->name,
1131 (unsigned int)(port - execlists->port),
1132 rq->fence.context, rq->fence.seqno,
1133 hwsp_seqno(rq));
1134
1135 GEM_BUG_ON(!execlists->active);
1136 execlists_context_schedule_out(rq,
1137 i915_request_completed(rq) ?
1138 INTEL_CONTEXT_SCHEDULE_OUT :
1139 INTEL_CONTEXT_SCHEDULE_PREEMPTED);
1140
1141 i915_request_put(rq);
1142
1143 memset(port, 0, sizeof(*port));
1144 port++;
1145 }
1146
1147 execlists_clear_all_active(execlists);
1148}
1149
1150static inline void
1151invalidate_csb_entries(const u32 *first, const u32 *last)
1152{
1153 clflush((void *)first);
1154 clflush((void *)last);
1155}
1156
1157static inline bool
1158reset_in_progress(const struct intel_engine_execlists *execlists)
1159{
1160 return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1161}
1162
1163static void process_csb(struct intel_engine_cs *engine)
1164{
1165 struct intel_engine_execlists * const execlists = &engine->execlists;
1166 struct execlist_port *port = execlists->port;
1167 const u32 * const buf = execlists->csb_status;
1168 const u8 num_entries = execlists->csb_size;
1169 u8 head, tail;
1170
1171 lockdep_assert_held(&engine->active.lock);
1172 GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184 head = execlists->csb_head;
1185 tail = READ_ONCE(*execlists->csb_write);
1186 GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
1187 if (unlikely(head == tail))
1188 return;
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198 rmb();
1199
1200 do {
1201 struct i915_request *rq;
1202 unsigned int status;
1203 unsigned int count;
1204
1205 if (++head == num_entries)
1206 head = 0;
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226 GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
1227 engine->name, head,
1228 buf[2 * head + 0], buf[2 * head + 1],
1229 execlists->active);
1230
1231 status = buf[2 * head];
1232 if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
1233 GEN8_CTX_STATUS_PREEMPTED))
1234 execlists_set_active(execlists,
1235 EXECLISTS_ACTIVE_HWACK);
1236 if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
1237 execlists_clear_active(execlists,
1238 EXECLISTS_ACTIVE_HWACK);
1239
1240 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
1241 continue;
1242
1243
1244 GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
1245
1246 if (status & GEN8_CTX_STATUS_COMPLETE &&
1247 buf[2*head + 1] == execlists->preempt_complete_status) {
1248 GEM_TRACE("%s preempt-idle\n", engine->name);
1249 complete_preempt_context(execlists);
1250 continue;
1251 }
1252
1253 if (status & GEN8_CTX_STATUS_PREEMPTED &&
1254 execlists_is_active(execlists,
1255 EXECLISTS_ACTIVE_PREEMPT))
1256 continue;
1257
1258 GEM_BUG_ON(!execlists_is_active(execlists,
1259 EXECLISTS_ACTIVE_USER));
1260
1261 rq = port_unpack(port, &count);
1262 GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
1263 engine->name,
1264 port->context_id, count,
1265 rq ? rq->fence.context : 0,
1266 rq ? rq->fence.seqno : 0,
1267 rq ? hwsp_seqno(rq) : 0,
1268 rq ? rq_prio(rq) : 0);
1269
1270
1271 GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
1272
1273 GEM_BUG_ON(count == 0);
1274 if (--count == 0) {
1275
1276
1277
1278
1279
1280
1281
1282
1283 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
1284 GEM_BUG_ON(port_isset(&port[1]) &&
1285 !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
1286 GEM_BUG_ON(!port_isset(&port[1]) &&
1287 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
1288
1289
1290
1291
1292
1293
1294
1295 GEM_BUG_ON(!i915_request_completed(rq));
1296
1297 execlists_context_schedule_out(rq,
1298 INTEL_CONTEXT_SCHEDULE_OUT);
1299 i915_request_put(rq);
1300
1301 GEM_TRACE("%s completed ctx=%d\n",
1302 engine->name, port->context_id);
1303
1304 port = execlists_port_complete(execlists, port);
1305 if (port_isset(port))
1306 execlists_user_begin(execlists, port);
1307 else
1308 execlists_user_end(execlists);
1309 } else {
1310 port_set(port, port_pack(rq, count));
1311 }
1312 } while (head != tail);
1313
1314 execlists->csb_head = head;
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
1328}
1329
1330static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
1331{
1332 lockdep_assert_held(&engine->active.lock);
1333
1334 process_csb(engine);
1335 if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
1336 execlists_dequeue(engine);
1337}
1338
1339
1340
1341
1342
1343static void execlists_submission_tasklet(unsigned long data)
1344{
1345 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
1346 unsigned long flags;
1347
1348 GEM_TRACE("%s awake?=%d, active=%x\n",
1349 engine->name,
1350 !!intel_wakeref_active(&engine->wakeref),
1351 engine->execlists.active);
1352
1353 spin_lock_irqsave(&engine->active.lock, flags);
1354 __execlists_submission_tasklet(engine);
1355 spin_unlock_irqrestore(&engine->active.lock, flags);
1356}
1357
1358static void queue_request(struct intel_engine_cs *engine,
1359 struct i915_sched_node *node,
1360 int prio)
1361{
1362 GEM_BUG_ON(!list_empty(&node->link));
1363 list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
1364}
1365
1366static void __submit_queue_imm(struct intel_engine_cs *engine)
1367{
1368 struct intel_engine_execlists * const execlists = &engine->execlists;
1369
1370 if (reset_in_progress(execlists))
1371 return;
1372
1373 if (execlists->tasklet.func == execlists_submission_tasklet)
1374 __execlists_submission_tasklet(engine);
1375 else
1376 tasklet_hi_schedule(&execlists->tasklet);
1377}
1378
1379static void submit_queue(struct intel_engine_cs *engine, int prio)
1380{
1381 if (prio > engine->execlists.queue_priority_hint) {
1382 engine->execlists.queue_priority_hint = prio;
1383 __submit_queue_imm(engine);
1384 }
1385}
1386
1387static void execlists_submit_request(struct i915_request *request)
1388{
1389 struct intel_engine_cs *engine = request->engine;
1390 unsigned long flags;
1391
1392
1393 spin_lock_irqsave(&engine->active.lock, flags);
1394
1395 queue_request(engine, &request->sched, rq_prio(request));
1396
1397 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1398 GEM_BUG_ON(list_empty(&request->sched.link));
1399
1400 submit_queue(engine, rq_prio(request));
1401
1402 spin_unlock_irqrestore(&engine->active.lock, flags);
1403}
1404
1405static void __execlists_context_fini(struct intel_context *ce)
1406{
1407 intel_ring_put(ce->ring);
1408
1409 GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
1410 i915_gem_object_put(ce->state->obj);
1411}
1412
1413static void execlists_context_destroy(struct kref *kref)
1414{
1415 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1416
1417 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1418 GEM_BUG_ON(intel_context_is_pinned(ce));
1419
1420 if (ce->state)
1421 __execlists_context_fini(ce);
1422
1423 intel_context_free(ce);
1424}
1425
1426static void execlists_context_unpin(struct intel_context *ce)
1427{
1428 i915_gem_context_unpin_hw_id(ce->gem_context);
1429 i915_gem_object_unpin_map(ce->state->obj);
1430}
1431
1432static void
1433__execlists_update_reg_state(struct intel_context *ce,
1434 struct intel_engine_cs *engine)
1435{
1436 struct intel_ring *ring = ce->ring;
1437 u32 *regs = ce->lrc_reg_state;
1438
1439 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
1440 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1441
1442 regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
1443 regs[CTX_RING_HEAD + 1] = ring->head;
1444 regs[CTX_RING_TAIL + 1] = ring->tail;
1445
1446
1447 if (engine->class == RENDER_CLASS)
1448 regs[CTX_R_PWR_CLK_STATE + 1] =
1449 intel_sseu_make_rpcs(engine->i915, &ce->sseu);
1450}
1451
1452static int
1453__execlists_context_pin(struct intel_context *ce,
1454 struct intel_engine_cs *engine)
1455{
1456 void *vaddr;
1457 int ret;
1458
1459 GEM_BUG_ON(!ce->gem_context->vm);
1460
1461 ret = execlists_context_deferred_alloc(ce, engine);
1462 if (ret)
1463 goto err;
1464 GEM_BUG_ON(!ce->state);
1465
1466 ret = intel_context_active_acquire(ce,
1467 engine->i915->ggtt.pin_bias |
1468 PIN_OFFSET_BIAS |
1469 PIN_HIGH);
1470 if (ret)
1471 goto err;
1472
1473 vaddr = i915_gem_object_pin_map(ce->state->obj,
1474 i915_coherent_map_type(engine->i915) |
1475 I915_MAP_OVERRIDE);
1476 if (IS_ERR(vaddr)) {
1477 ret = PTR_ERR(vaddr);
1478 goto unpin_active;
1479 }
1480
1481 ret = i915_gem_context_pin_hw_id(ce->gem_context);
1482 if (ret)
1483 goto unpin_map;
1484
1485 ce->lrc_desc = lrc_descriptor(ce, engine);
1486 ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
1487 __execlists_update_reg_state(ce, engine);
1488
1489 return 0;
1490
1491unpin_map:
1492 i915_gem_object_unpin_map(ce->state->obj);
1493unpin_active:
1494 intel_context_active_release(ce);
1495err:
1496 return ret;
1497}
1498
1499static int execlists_context_pin(struct intel_context *ce)
1500{
1501 return __execlists_context_pin(ce, ce->engine);
1502}
1503
1504static void execlists_context_reset(struct intel_context *ce)
1505{
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522 intel_ring_reset(ce->ring, 0);
1523 __execlists_update_reg_state(ce, ce->engine);
1524}
1525
1526static const struct intel_context_ops execlists_context_ops = {
1527 .pin = execlists_context_pin,
1528 .unpin = execlists_context_unpin,
1529
1530 .enter = intel_context_enter_engine,
1531 .exit = intel_context_exit_engine,
1532
1533 .reset = execlists_context_reset,
1534 .destroy = execlists_context_destroy,
1535};
1536
1537static int gen8_emit_init_breadcrumb(struct i915_request *rq)
1538{
1539 u32 *cs;
1540
1541 GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
1542
1543 cs = intel_ring_begin(rq, 6);
1544 if (IS_ERR(cs))
1545 return PTR_ERR(cs);
1546
1547
1548
1549
1550
1551
1552
1553 *cs++ = MI_ARB_CHECK;
1554 *cs++ = MI_NOOP;
1555
1556 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1557 *cs++ = rq->timeline->hwsp_offset;
1558 *cs++ = 0;
1559 *cs++ = rq->fence.seqno - 1;
1560
1561 intel_ring_advance(rq, cs);
1562
1563
1564 rq->infix = intel_ring_offset(rq, cs);
1565
1566 return 0;
1567}
1568
1569static int emit_pdps(struct i915_request *rq)
1570{
1571 const struct intel_engine_cs * const engine = rq->engine;
1572 struct i915_ppgtt * const ppgtt =
1573 i915_vm_to_ppgtt(rq->gem_context->vm);
1574 int err, i;
1575 u32 *cs;
1576
1577 GEM_BUG_ON(intel_vgpu_active(rq->i915));
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587 err = engine->emit_flush(rq, EMIT_FLUSH);
1588 if (err)
1589 return err;
1590
1591
1592 err = engine->emit_flush(rq, EMIT_INVALIDATE);
1593 if (err)
1594 return err;
1595
1596 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
1597 if (IS_ERR(cs))
1598 return PTR_ERR(cs);
1599
1600
1601 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
1602 for (i = GEN8_3LVL_PDPES; i--; ) {
1603 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1604 u32 base = engine->mmio_base;
1605
1606 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
1607 *cs++ = upper_32_bits(pd_daddr);
1608 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
1609 *cs++ = lower_32_bits(pd_daddr);
1610 }
1611 *cs++ = MI_NOOP;
1612
1613 intel_ring_advance(rq, cs);
1614
1615
1616 err = engine->emit_flush(rq, EMIT_FLUSH);
1617 if (err)
1618 return err;
1619
1620
1621 return engine->emit_flush(rq, EMIT_INVALIDATE);
1622}
1623
1624static int execlists_request_alloc(struct i915_request *request)
1625{
1626 int ret;
1627
1628 GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
1629
1630
1631
1632
1633
1634
1635 request->reserved_space += EXECLISTS_REQUEST_SIZE;
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 if (i915_vm_is_4lvl(request->gem_context->vm))
1647 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
1648 else
1649 ret = emit_pdps(request);
1650 if (ret)
1651 return ret;
1652
1653 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
1654 return 0;
1655}
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673static u32 *
1674gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1675{
1676
1677 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1678 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1679 *batch++ = i915_scratch_offset(engine->i915) + 256;
1680 *batch++ = 0;
1681
1682 *batch++ = MI_LOAD_REGISTER_IMM(1);
1683 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1684 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1685
1686 batch = gen8_emit_pipe_control(batch,
1687 PIPE_CONTROL_CS_STALL |
1688 PIPE_CONTROL_DC_FLUSH_ENABLE,
1689 0);
1690
1691 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1692 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1693 *batch++ = i915_scratch_offset(engine->i915) + 256;
1694 *batch++ = 0;
1695
1696 return batch;
1697}
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1715{
1716
1717 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1718
1719
1720 if (IS_BROADWELL(engine->i915))
1721 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1722
1723
1724
1725 batch = gen8_emit_pipe_control(batch,
1726 PIPE_CONTROL_FLUSH_L3 |
1727 PIPE_CONTROL_GLOBAL_GTT_IVB |
1728 PIPE_CONTROL_CS_STALL |
1729 PIPE_CONTROL_QW_WRITE,
1730 i915_scratch_offset(engine->i915) +
1731 2 * CACHELINE_BYTES);
1732
1733 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1734
1735
1736 while ((unsigned long)batch % CACHELINE_BYTES)
1737 *batch++ = MI_NOOP;
1738
1739
1740
1741
1742
1743
1744
1745 return batch;
1746}
1747
1748struct lri {
1749 i915_reg_t reg;
1750 u32 value;
1751};
1752
1753static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1754{
1755 GEM_BUG_ON(!count || count > 63);
1756
1757 *batch++ = MI_LOAD_REGISTER_IMM(count);
1758 do {
1759 *batch++ = i915_mmio_reg_offset(lri->reg);
1760 *batch++ = lri->value;
1761 } while (lri++, --count);
1762 *batch++ = MI_NOOP;
1763
1764 return batch;
1765}
1766
1767static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1768{
1769 static const struct lri lri[] = {
1770
1771 {
1772 COMMON_SLICE_CHICKEN2,
1773 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1774 0),
1775 },
1776
1777
1778 {
1779 FF_SLICE_CHICKEN,
1780 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1781 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1782 },
1783
1784
1785 {
1786 _3D_CHICKEN3,
1787 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1788 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1789 }
1790 };
1791
1792 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1793
1794
1795 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1796
1797 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1798
1799
1800 if (HAS_POOLED_EU(engine->i915)) {
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814 *batch++ = GEN9_MEDIA_POOL_STATE;
1815 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1816 *batch++ = 0x00777000;
1817 *batch++ = 0;
1818 *batch++ = 0;
1819 *batch++ = 0;
1820 }
1821
1822 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1823
1824
1825 while ((unsigned long)batch % CACHELINE_BYTES)
1826 *batch++ = MI_NOOP;
1827
1828 return batch;
1829}
1830
1831static u32 *
1832gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1833{
1834 int i;
1835
1836
1837
1838
1839
1840
1841
1842 batch = gen8_emit_pipe_control(batch,
1843 PIPE_CONTROL_CS_STALL,
1844 0);
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855 for (i = 0; i < 10; i++)
1856 *batch++ = MI_NOOP;
1857
1858
1859 while ((unsigned long)batch % CACHELINE_BYTES)
1860 *batch++ = MI_NOOP;
1861
1862 return batch;
1863}
1864
1865#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
1866
1867static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
1868{
1869 struct drm_i915_gem_object *obj;
1870 struct i915_vma *vma;
1871 int err;
1872
1873 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
1874 if (IS_ERR(obj))
1875 return PTR_ERR(obj);
1876
1877 vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
1878 if (IS_ERR(vma)) {
1879 err = PTR_ERR(vma);
1880 goto err;
1881 }
1882
1883 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
1884 if (err)
1885 goto err;
1886
1887 engine->wa_ctx.vma = vma;
1888 return 0;
1889
1890err:
1891 i915_gem_object_put(obj);
1892 return err;
1893}
1894
1895static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
1896{
1897 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1898}
1899
1900typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1901
1902static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1903{
1904 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1905 struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
1906 &wa_ctx->per_ctx };
1907 wa_bb_func_t wa_bb_fn[2];
1908 struct page *page;
1909 void *batch, *batch_ptr;
1910 unsigned int i;
1911 int ret;
1912
1913 if (engine->class != RENDER_CLASS)
1914 return 0;
1915
1916 switch (INTEL_GEN(engine->i915)) {
1917 case 11:
1918 return 0;
1919 case 10:
1920 wa_bb_fn[0] = gen10_init_indirectctx_bb;
1921 wa_bb_fn[1] = NULL;
1922 break;
1923 case 9:
1924 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1925 wa_bb_fn[1] = NULL;
1926 break;
1927 case 8:
1928 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1929 wa_bb_fn[1] = NULL;
1930 break;
1931 default:
1932 MISSING_CASE(INTEL_GEN(engine->i915));
1933 return 0;
1934 }
1935
1936 ret = lrc_setup_wa_ctx(engine);
1937 if (ret) {
1938 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
1939 return ret;
1940 }
1941
1942 page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
1943 batch = batch_ptr = kmap_atomic(page);
1944
1945
1946
1947
1948
1949
1950 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1951 wa_bb[i]->offset = batch_ptr - batch;
1952 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1953 CACHELINE_BYTES))) {
1954 ret = -EINVAL;
1955 break;
1956 }
1957 if (wa_bb_fn[i])
1958 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1959 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1960 }
1961
1962 BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
1963
1964 kunmap_atomic(batch);
1965 if (ret)
1966 lrc_destroy_wa_ctx(engine);
1967
1968 return ret;
1969}
1970
1971static void enable_execlists(struct intel_engine_cs *engine)
1972{
1973 intel_engine_set_hwsp_writemask(engine, ~0u);
1974
1975 if (INTEL_GEN(engine->i915) >= 11)
1976 ENGINE_WRITE(engine,
1977 RING_MODE_GEN7,
1978 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
1979 else
1980 ENGINE_WRITE(engine,
1981 RING_MODE_GEN7,
1982 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1983
1984 ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
1985
1986 ENGINE_WRITE(engine,
1987 RING_HWS_PGA,
1988 i915_ggtt_offset(engine->status_page.vma));
1989 ENGINE_POSTING_READ(engine, RING_HWS_PGA);
1990}
1991
1992static bool unexpected_starting_state(struct intel_engine_cs *engine)
1993{
1994 bool unexpected = false;
1995
1996 if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) {
1997 DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
1998 unexpected = true;
1999 }
2000
2001 return unexpected;
2002}
2003
2004static int execlists_resume(struct intel_engine_cs *engine)
2005{
2006 intel_engine_apply_workarounds(engine);
2007 intel_engine_apply_whitelist(engine);
2008
2009 intel_mocs_init_engine(engine);
2010
2011 intel_engine_reset_breadcrumbs(engine);
2012
2013 if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
2014 struct drm_printer p = drm_debug_printer(__func__);
2015
2016 intel_engine_dump(engine, &p, NULL);
2017 }
2018
2019 enable_execlists(engine);
2020
2021 return 0;
2022}
2023
2024static void execlists_reset_prepare(struct intel_engine_cs *engine)
2025{
2026 struct intel_engine_execlists * const execlists = &engine->execlists;
2027 unsigned long flags;
2028
2029 GEM_TRACE("%s: depth<-%d\n", engine->name,
2030 atomic_read(&execlists->tasklet.count));
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041 __tasklet_disable_sync_once(&execlists->tasklet);
2042 GEM_BUG_ON(!reset_in_progress(execlists));
2043
2044 intel_engine_stop_cs(engine);
2045
2046
2047 spin_lock_irqsave(&engine->active.lock, flags);
2048 spin_unlock_irqrestore(&engine->active.lock, flags);
2049}
2050
2051static bool lrc_regs_ok(const struct i915_request *rq)
2052{
2053 const struct intel_ring *ring = rq->ring;
2054 const u32 *regs = rq->hw_context->lrc_reg_state;
2055
2056
2057
2058 if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
2059 (RING_CTL_SIZE(ring->size) | RING_VALID))
2060 return false;
2061
2062 if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
2063 return false;
2064
2065 return true;
2066}
2067
2068static void reset_csb_pointers(struct intel_engine_execlists *execlists)
2069{
2070 const unsigned int reset_value = execlists->csb_size - 1;
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081 execlists->csb_head = reset_value;
2082 WRITE_ONCE(*execlists->csb_write, reset_value);
2083 wmb();
2084
2085 invalidate_csb_entries(&execlists->csb_status[0],
2086 &execlists->csb_status[reset_value]);
2087}
2088
2089static struct i915_request *active_request(struct i915_request *rq)
2090{
2091 const struct list_head * const list = &rq->engine->active.requests;
2092 const struct intel_context * const context = rq->hw_context;
2093 struct i915_request *active = NULL;
2094
2095 list_for_each_entry_from_reverse(rq, list, sched.link) {
2096 if (i915_request_completed(rq))
2097 break;
2098
2099 if (rq->hw_context != context)
2100 break;
2101
2102 active = rq;
2103 }
2104
2105 return active;
2106}
2107
2108static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
2109{
2110 struct intel_engine_execlists * const execlists = &engine->execlists;
2111 struct intel_context *ce;
2112 struct i915_request *rq;
2113 u32 *regs;
2114
2115 process_csb(engine);
2116
2117
2118 reset_csb_pointers(&engine->execlists);
2119
2120
2121
2122
2123
2124
2125 if (!port_isset(execlists->port))
2126 goto out_clear;
2127
2128 rq = port_request(execlists->port);
2129 ce = rq->hw_context;
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140 execlists_cancel_port_requests(execlists);
2141
2142 rq = active_request(rq);
2143 if (!rq)
2144 goto out_replay;
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158 if (!i915_request_started(rq) && lrc_regs_ok(rq))
2159 goto out_replay;
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172 i915_reset_request(rq, stalled);
2173 if (!stalled && lrc_regs_ok(rq))
2174 goto out_replay;
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184 regs = ce->lrc_reg_state;
2185 if (engine->pinned_default_state) {
2186 memcpy(regs,
2187 engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
2188 engine->context_size - PAGE_SIZE);
2189 }
2190 execlists_init_reg_state(regs, ce, engine, ce->ring);
2191
2192out_replay:
2193
2194 ce->ring->head =
2195 rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
2196 intel_ring_update_space(ce->ring);
2197 __execlists_update_reg_state(ce, engine);
2198
2199
2200 __unwind_incomplete_requests(engine);
2201
2202out_clear:
2203 execlists_clear_all_active(execlists);
2204}
2205
2206static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
2207{
2208 unsigned long flags;
2209
2210 GEM_TRACE("%s\n", engine->name);
2211
2212 spin_lock_irqsave(&engine->active.lock, flags);
2213
2214 __execlists_reset(engine, stalled);
2215
2216 spin_unlock_irqrestore(&engine->active.lock, flags);
2217}
2218
2219static void nop_submission_tasklet(unsigned long data)
2220{
2221
2222}
2223
2224static void execlists_cancel_requests(struct intel_engine_cs *engine)
2225{
2226 struct intel_engine_execlists * const execlists = &engine->execlists;
2227 struct i915_request *rq, *rn;
2228 struct rb_node *rb;
2229 unsigned long flags;
2230
2231 GEM_TRACE("%s\n", engine->name);
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247 spin_lock_irqsave(&engine->active.lock, flags);
2248
2249 __execlists_reset(engine, true);
2250
2251
2252 list_for_each_entry(rq, &engine->active.requests, sched.link) {
2253 if (!i915_request_signaled(rq))
2254 dma_fence_set_error(&rq->fence, -EIO);
2255
2256 i915_request_mark_complete(rq);
2257 }
2258
2259
2260 while ((rb = rb_first_cached(&execlists->queue))) {
2261 struct i915_priolist *p = to_priolist(rb);
2262 int i;
2263
2264 priolist_for_each_request_consume(rq, rn, p, i) {
2265 list_del_init(&rq->sched.link);
2266 __i915_request_submit(rq);
2267 dma_fence_set_error(&rq->fence, -EIO);
2268 i915_request_mark_complete(rq);
2269 }
2270
2271 rb_erase_cached(&p->node, &execlists->queue);
2272 i915_priolist_free(p);
2273 }
2274
2275
2276 while ((rb = rb_first_cached(&execlists->virtual))) {
2277 struct virtual_engine *ve =
2278 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2279
2280 rb_erase_cached(rb, &execlists->virtual);
2281 RB_CLEAR_NODE(rb);
2282
2283 spin_lock(&ve->base.active.lock);
2284 if (ve->request) {
2285 ve->request->engine = engine;
2286 __i915_request_submit(ve->request);
2287 dma_fence_set_error(&ve->request->fence, -EIO);
2288 i915_request_mark_complete(ve->request);
2289 ve->base.execlists.queue_priority_hint = INT_MIN;
2290 ve->request = NULL;
2291 }
2292 spin_unlock(&ve->base.active.lock);
2293 }
2294
2295
2296
2297 execlists->queue_priority_hint = INT_MIN;
2298 execlists->queue = RB_ROOT_CACHED;
2299 GEM_BUG_ON(port_isset(execlists->port));
2300
2301 GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
2302 execlists->tasklet.func = nop_submission_tasklet;
2303
2304 spin_unlock_irqrestore(&engine->active.lock, flags);
2305}
2306
2307static void execlists_reset_finish(struct intel_engine_cs *engine)
2308{
2309 struct intel_engine_execlists * const execlists = &engine->execlists;
2310
2311
2312
2313
2314
2315
2316 GEM_BUG_ON(!reset_in_progress(execlists));
2317 if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
2318 execlists->tasklet.func(execlists->tasklet.data);
2319
2320 if (__tasklet_enable(&execlists->tasklet))
2321
2322 tasklet_hi_schedule(&execlists->tasklet);
2323 GEM_TRACE("%s: depth->%d\n", engine->name,
2324 atomic_read(&execlists->tasklet.count));
2325}
2326
2327static int gen8_emit_bb_start(struct i915_request *rq,
2328 u64 offset, u32 len,
2329 const unsigned int flags)
2330{
2331 u32 *cs;
2332
2333 cs = intel_ring_begin(rq, 4);
2334 if (IS_ERR(cs))
2335 return PTR_ERR(cs);
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2351
2352
2353 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
2354 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
2355 *cs++ = lower_32_bits(offset);
2356 *cs++ = upper_32_bits(offset);
2357
2358 intel_ring_advance(rq, cs);
2359
2360 return 0;
2361}
2362
2363static int gen9_emit_bb_start(struct i915_request *rq,
2364 u64 offset, u32 len,
2365 const unsigned int flags)
2366{
2367 u32 *cs;
2368
2369 cs = intel_ring_begin(rq, 6);
2370 if (IS_ERR(cs))
2371 return PTR_ERR(cs);
2372
2373 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2374
2375 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
2376 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
2377 *cs++ = lower_32_bits(offset);
2378 *cs++ = upper_32_bits(offset);
2379
2380 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
2381 *cs++ = MI_NOOP;
2382
2383 intel_ring_advance(rq, cs);
2384
2385 return 0;
2386}
2387
2388static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
2389{
2390 ENGINE_WRITE(engine, RING_IMR,
2391 ~(engine->irq_enable_mask | engine->irq_keep_mask));
2392 ENGINE_POSTING_READ(engine, RING_IMR);
2393}
2394
2395static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
2396{
2397 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
2398}
2399
2400static int gen8_emit_flush(struct i915_request *request, u32 mode)
2401{
2402 u32 cmd, *cs;
2403
2404 cs = intel_ring_begin(request, 4);
2405 if (IS_ERR(cs))
2406 return PTR_ERR(cs);
2407
2408 cmd = MI_FLUSH_DW + 1;
2409
2410
2411
2412
2413
2414
2415 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2416
2417 if (mode & EMIT_INVALIDATE) {
2418 cmd |= MI_INVALIDATE_TLB;
2419 if (request->engine->class == VIDEO_DECODE_CLASS)
2420 cmd |= MI_INVALIDATE_BSD;
2421 }
2422
2423 *cs++ = cmd;
2424 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
2425 *cs++ = 0;
2426 *cs++ = 0;
2427 intel_ring_advance(request, cs);
2428
2429 return 0;
2430}
2431
2432static int gen8_emit_flush_render(struct i915_request *request,
2433 u32 mode)
2434{
2435 struct intel_engine_cs *engine = request->engine;
2436 u32 scratch_addr =
2437 i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
2438 bool vf_flush_wa = false, dc_flush_wa = false;
2439 u32 *cs, flags = 0;
2440 int len;
2441
2442 flags |= PIPE_CONTROL_CS_STALL;
2443
2444 if (mode & EMIT_FLUSH) {
2445 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
2446 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
2447 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
2448 flags |= PIPE_CONTROL_FLUSH_ENABLE;
2449 }
2450
2451 if (mode & EMIT_INVALIDATE) {
2452 flags |= PIPE_CONTROL_TLB_INVALIDATE;
2453 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
2454 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
2455 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
2456 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
2457 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
2458 flags |= PIPE_CONTROL_QW_WRITE;
2459 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
2460
2461
2462
2463
2464
2465 if (IS_GEN(request->i915, 9))
2466 vf_flush_wa = true;
2467
2468
2469 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
2470 dc_flush_wa = true;
2471 }
2472
2473 len = 6;
2474
2475 if (vf_flush_wa)
2476 len += 6;
2477
2478 if (dc_flush_wa)
2479 len += 12;
2480
2481 cs = intel_ring_begin(request, len);
2482 if (IS_ERR(cs))
2483 return PTR_ERR(cs);
2484
2485 if (vf_flush_wa)
2486 cs = gen8_emit_pipe_control(cs, 0, 0);
2487
2488 if (dc_flush_wa)
2489 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
2490 0);
2491
2492 cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
2493
2494 if (dc_flush_wa)
2495 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
2496
2497 intel_ring_advance(request, cs);
2498
2499 return 0;
2500}
2501
2502
2503
2504
2505
2506
2507static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
2508{
2509
2510 *cs++ = MI_ARB_CHECK;
2511 *cs++ = MI_NOOP;
2512 request->wa_tail = intel_ring_offset(request, cs);
2513
2514 return cs;
2515}
2516
2517static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
2518{
2519 cs = gen8_emit_ggtt_write(cs,
2520 request->fence.seqno,
2521 request->timeline->hwsp_offset,
2522 0);
2523
2524 *cs++ = MI_USER_INTERRUPT;
2525 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2526
2527 request->tail = intel_ring_offset(request, cs);
2528 assert_ring_tail_valid(request->ring, request->tail);
2529
2530 return gen8_emit_wa_tail(request, cs);
2531}
2532
2533static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
2534{
2535
2536 cs = gen8_emit_ggtt_write_rcs(cs,
2537 request->fence.seqno,
2538 request->timeline->hwsp_offset,
2539 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
2540 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2541 PIPE_CONTROL_DC_FLUSH_ENABLE);
2542 cs = gen8_emit_pipe_control(cs,
2543 PIPE_CONTROL_FLUSH_ENABLE |
2544 PIPE_CONTROL_CS_STALL,
2545 0);
2546
2547 *cs++ = MI_USER_INTERRUPT;
2548 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2549
2550 request->tail = intel_ring_offset(request, cs);
2551 assert_ring_tail_valid(request->ring, request->tail);
2552
2553 return gen8_emit_wa_tail(request, cs);
2554}
2555
2556static int gen8_init_rcs_context(struct i915_request *rq)
2557{
2558 int ret;
2559
2560 ret = intel_engine_emit_ctx_wa(rq);
2561 if (ret)
2562 return ret;
2563
2564 ret = intel_rcs_context_init_mocs(rq);
2565
2566
2567
2568
2569 if (ret)
2570 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
2571
2572 return i915_gem_render_state_emit(rq);
2573}
2574
2575static void execlists_park(struct intel_engine_cs *engine)
2576{
2577 intel_engine_park(engine);
2578}
2579
2580void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
2581{
2582 engine->submit_request = execlists_submit_request;
2583 engine->cancel_requests = execlists_cancel_requests;
2584 engine->schedule = i915_schedule;
2585 engine->execlists.tasklet.func = execlists_submission_tasklet;
2586
2587 engine->reset.prepare = execlists_reset_prepare;
2588 engine->reset.reset = execlists_reset;
2589 engine->reset.finish = execlists_reset_finish;
2590
2591 engine->park = execlists_park;
2592 engine->unpark = NULL;
2593
2594 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
2595 if (!intel_vgpu_active(engine->i915))
2596 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
2597 if (engine->preempt_context &&
2598 HAS_LOGICAL_RING_PREEMPTION(engine->i915))
2599 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
2600}
2601
2602static void execlists_destroy(struct intel_engine_cs *engine)
2603{
2604 intel_engine_cleanup_common(engine);
2605 lrc_destroy_wa_ctx(engine);
2606 kfree(engine);
2607}
2608
2609static void
2610logical_ring_default_vfuncs(struct intel_engine_cs *engine)
2611{
2612
2613
2614 engine->destroy = execlists_destroy;
2615 engine->resume = execlists_resume;
2616
2617 engine->reset.prepare = execlists_reset_prepare;
2618 engine->reset.reset = execlists_reset;
2619 engine->reset.finish = execlists_reset_finish;
2620
2621 engine->cops = &execlists_context_ops;
2622 engine->request_alloc = execlists_request_alloc;
2623
2624 engine->emit_flush = gen8_emit_flush;
2625 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
2626 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
2627
2628 engine->set_default_submission = intel_execlists_set_default_submission;
2629
2630 if (INTEL_GEN(engine->i915) < 11) {
2631 engine->irq_enable = gen8_logical_ring_enable_irq;
2632 engine->irq_disable = gen8_logical_ring_disable_irq;
2633 } else {
2634
2635
2636
2637
2638
2639
2640 }
2641 if (IS_GEN(engine->i915, 8))
2642 engine->emit_bb_start = gen8_emit_bb_start;
2643 else
2644 engine->emit_bb_start = gen9_emit_bb_start;
2645}
2646
2647static inline void
2648logical_ring_default_irqs(struct intel_engine_cs *engine)
2649{
2650 unsigned int shift = 0;
2651
2652 if (INTEL_GEN(engine->i915) < 11) {
2653 const u8 irq_shifts[] = {
2654 [RCS0] = GEN8_RCS_IRQ_SHIFT,
2655 [BCS0] = GEN8_BCS_IRQ_SHIFT,
2656 [VCS0] = GEN8_VCS0_IRQ_SHIFT,
2657 [VCS1] = GEN8_VCS1_IRQ_SHIFT,
2658 [VECS0] = GEN8_VECS_IRQ_SHIFT,
2659 };
2660
2661 shift = irq_shifts[engine->id];
2662 }
2663
2664 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
2665 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
2666}
2667
2668int intel_execlists_submission_setup(struct intel_engine_cs *engine)
2669{
2670
2671 engine->buffer = NULL;
2672
2673 tasklet_init(&engine->execlists.tasklet,
2674 execlists_submission_tasklet, (unsigned long)engine);
2675
2676 logical_ring_default_vfuncs(engine);
2677 logical_ring_default_irqs(engine);
2678
2679 if (engine->class == RENDER_CLASS) {
2680 engine->init_context = gen8_init_rcs_context;
2681 engine->emit_flush = gen8_emit_flush_render;
2682 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
2683 }
2684
2685 return 0;
2686}
2687
2688int intel_execlists_submission_init(struct intel_engine_cs *engine)
2689{
2690 struct intel_engine_execlists * const execlists = &engine->execlists;
2691 struct drm_i915_private *i915 = engine->i915;
2692 struct intel_uncore *uncore = engine->uncore;
2693 u32 base = engine->mmio_base;
2694 int ret;
2695
2696 ret = intel_engine_init_common(engine);
2697 if (ret)
2698 return ret;
2699
2700 intel_engine_init_workarounds(engine);
2701 intel_engine_init_whitelist(engine);
2702
2703 if (intel_init_workaround_bb(engine))
2704
2705
2706
2707
2708
2709 DRM_ERROR("WA batch buffer initialization failed\n");
2710
2711 if (HAS_LOGICAL_RING_ELSQ(i915)) {
2712 execlists->submit_reg = uncore->regs +
2713 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
2714 execlists->ctrl_reg = uncore->regs +
2715 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
2716 } else {
2717 execlists->submit_reg = uncore->regs +
2718 i915_mmio_reg_offset(RING_ELSP(base));
2719 }
2720
2721 execlists->preempt_complete_status = ~0u;
2722 if (engine->preempt_context)
2723 execlists->preempt_complete_status =
2724 upper_32_bits(engine->preempt_context->lrc_desc);
2725
2726 execlists->csb_status =
2727 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
2728
2729 execlists->csb_write =
2730 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
2731
2732 if (INTEL_GEN(i915) < 11)
2733 execlists->csb_size = GEN8_CSB_ENTRIES;
2734 else
2735 execlists->csb_size = GEN11_CSB_ENTRIES;
2736
2737 reset_csb_pointers(execlists);
2738
2739 return 0;
2740}
2741
2742static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
2743{
2744 u32 indirect_ctx_offset;
2745
2746 switch (INTEL_GEN(engine->i915)) {
2747 default:
2748 MISSING_CASE(INTEL_GEN(engine->i915));
2749
2750 case 11:
2751 indirect_ctx_offset =
2752 GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
2753 break;
2754 case 10:
2755 indirect_ctx_offset =
2756 GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
2757 break;
2758 case 9:
2759 indirect_ctx_offset =
2760 GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
2761 break;
2762 case 8:
2763 indirect_ctx_offset =
2764 GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
2765 break;
2766 }
2767
2768 return indirect_ctx_offset;
2769}
2770
2771static void execlists_init_reg_state(u32 *regs,
2772 struct intel_context *ce,
2773 struct intel_engine_cs *engine,
2774 struct intel_ring *ring)
2775{
2776 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
2777 bool rcs = engine->class == RENDER_CLASS;
2778 u32 base = engine->mmio_base;
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790 regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
2791 MI_LRI_FORCE_POSTED;
2792
2793 CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
2794 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
2795 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
2796 if (INTEL_GEN(engine->i915) < 11) {
2797 regs[CTX_CONTEXT_CONTROL + 1] |=
2798 _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
2799 CTX_CTRL_RS_CTX_ENABLE);
2800 }
2801 CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
2802 CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
2803 CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
2804 CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
2805 RING_CTL_SIZE(ring->size) | RING_VALID);
2806 CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
2807 CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
2808 CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
2809 CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
2810 CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
2811 CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
2812 if (rcs) {
2813 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
2814
2815 CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
2816 CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
2817 RING_INDIRECT_CTX_OFFSET(base), 0);
2818 if (wa_ctx->indirect_ctx.size) {
2819 u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
2820
2821 regs[CTX_RCS_INDIRECT_CTX + 1] =
2822 (ggtt_offset + wa_ctx->indirect_ctx.offset) |
2823 (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
2824
2825 regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
2826 intel_lr_indirect_ctx_offset(engine) << 6;
2827 }
2828
2829 CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
2830 if (wa_ctx->per_ctx.size) {
2831 u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
2832
2833 regs[CTX_BB_PER_CTX_PTR + 1] =
2834 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
2835 }
2836 }
2837
2838 regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
2839
2840 CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
2841
2842 CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
2843 CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
2844 CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
2845 CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
2846 CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
2847 CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
2848 CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
2849 CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
2850
2851 if (i915_vm_is_4lvl(&ppgtt->vm)) {
2852
2853
2854
2855
2856 ASSIGN_CTX_PML4(ppgtt, regs);
2857 } else {
2858 ASSIGN_CTX_PDP(ppgtt, regs, 3);
2859 ASSIGN_CTX_PDP(ppgtt, regs, 2);
2860 ASSIGN_CTX_PDP(ppgtt, regs, 1);
2861 ASSIGN_CTX_PDP(ppgtt, regs, 0);
2862 }
2863
2864 if (rcs) {
2865 regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
2866 CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
2867
2868 i915_oa_init_reg_state(engine, ce, regs);
2869 }
2870
2871 regs[CTX_END] = MI_BATCH_BUFFER_END;
2872 if (INTEL_GEN(engine->i915) >= 10)
2873 regs[CTX_END] |= BIT(0);
2874}
2875
2876static int
2877populate_lr_context(struct intel_context *ce,
2878 struct drm_i915_gem_object *ctx_obj,
2879 struct intel_engine_cs *engine,
2880 struct intel_ring *ring)
2881{
2882 void *vaddr;
2883 u32 *regs;
2884 int ret;
2885
2886 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
2887 if (IS_ERR(vaddr)) {
2888 ret = PTR_ERR(vaddr);
2889 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
2890 return ret;
2891 }
2892
2893 if (engine->default_state) {
2894
2895
2896
2897
2898
2899 const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
2900 void *defaults;
2901
2902 defaults = i915_gem_object_pin_map(engine->default_state,
2903 I915_MAP_WB);
2904 if (IS_ERR(defaults)) {
2905 ret = PTR_ERR(defaults);
2906 goto err_unpin_ctx;
2907 }
2908
2909 memcpy(vaddr + start, defaults + start, engine->context_size);
2910 i915_gem_object_unpin_map(engine->default_state);
2911 }
2912
2913
2914
2915 regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
2916 execlists_init_reg_state(regs, ce, engine, ring);
2917 if (!engine->default_state)
2918 regs[CTX_CONTEXT_CONTROL + 1] |=
2919 _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
2920 if (ce->gem_context == engine->i915->preempt_context &&
2921 INTEL_GEN(engine->i915) < 11)
2922 regs[CTX_CONTEXT_CONTROL + 1] |=
2923 _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
2924 CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
2925
2926 ret = 0;
2927err_unpin_ctx:
2928 __i915_gem_object_flush_map(ctx_obj,
2929 LRC_HEADER_PAGES * PAGE_SIZE,
2930 engine->context_size);
2931 i915_gem_object_unpin_map(ctx_obj);
2932 return ret;
2933}
2934
2935static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
2936{
2937 if (ctx->timeline)
2938 return i915_timeline_get(ctx->timeline);
2939 else
2940 return i915_timeline_create(ctx->i915, NULL);
2941}
2942
2943static int execlists_context_deferred_alloc(struct intel_context *ce,
2944 struct intel_engine_cs *engine)
2945{
2946 struct drm_i915_gem_object *ctx_obj;
2947 struct i915_vma *vma;
2948 u32 context_size;
2949 struct intel_ring *ring;
2950 struct i915_timeline *timeline;
2951 int ret;
2952
2953 if (ce->state)
2954 return 0;
2955
2956 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
2957
2958
2959
2960
2961
2962 context_size += LRC_HEADER_PAGES * PAGE_SIZE;
2963
2964 ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
2965 if (IS_ERR(ctx_obj))
2966 return PTR_ERR(ctx_obj);
2967
2968 vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
2969 if (IS_ERR(vma)) {
2970 ret = PTR_ERR(vma);
2971 goto error_deref_obj;
2972 }
2973
2974 timeline = get_timeline(ce->gem_context);
2975 if (IS_ERR(timeline)) {
2976 ret = PTR_ERR(timeline);
2977 goto error_deref_obj;
2978 }
2979
2980 ring = intel_engine_create_ring(engine,
2981 timeline,
2982 ce->gem_context->ring_size);
2983 i915_timeline_put(timeline);
2984 if (IS_ERR(ring)) {
2985 ret = PTR_ERR(ring);
2986 goto error_deref_obj;
2987 }
2988
2989 ret = populate_lr_context(ce, ctx_obj, engine, ring);
2990 if (ret) {
2991 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
2992 goto error_ring_free;
2993 }
2994
2995 ce->ring = ring;
2996 ce->state = vma;
2997
2998 return 0;
2999
3000error_ring_free:
3001 intel_ring_put(ring);
3002error_deref_obj:
3003 i915_gem_object_put(ctx_obj);
3004 return ret;
3005}
3006
3007static struct list_head *virtual_queue(struct virtual_engine *ve)
3008{
3009 return &ve->base.execlists.default_priolist.requests[0];
3010}
3011
3012static void virtual_context_destroy(struct kref *kref)
3013{
3014 struct virtual_engine *ve =
3015 container_of(kref, typeof(*ve), context.ref);
3016 unsigned int n;
3017
3018 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3019 GEM_BUG_ON(ve->request);
3020 GEM_BUG_ON(ve->context.inflight);
3021
3022 for (n = 0; n < ve->num_siblings; n++) {
3023 struct intel_engine_cs *sibling = ve->siblings[n];
3024 struct rb_node *node = &ve->nodes[sibling->id].rb;
3025
3026 if (RB_EMPTY_NODE(node))
3027 continue;
3028
3029 spin_lock_irq(&sibling->active.lock);
3030
3031
3032 if (!RB_EMPTY_NODE(node))
3033 rb_erase_cached(node, &sibling->execlists.virtual);
3034
3035 spin_unlock_irq(&sibling->active.lock);
3036 }
3037 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
3038
3039 if (ve->context.state)
3040 __execlists_context_fini(&ve->context);
3041
3042 kfree(ve->bonds);
3043 kfree(ve);
3044}
3045
3046static void virtual_engine_initial_hint(struct virtual_engine *ve)
3047{
3048 int swp;
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063 swp = prandom_u32_max(ve->num_siblings);
3064 if (!swp)
3065 return;
3066
3067 swap(ve->siblings[swp], ve->siblings[0]);
3068 virtual_update_register_offsets(ve->context.lrc_reg_state,
3069 ve->siblings[0]);
3070}
3071
3072static int virtual_context_pin(struct intel_context *ce)
3073{
3074 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3075 int err;
3076
3077
3078 err = __execlists_context_pin(ce, ve->siblings[0]);
3079 if (err)
3080 return err;
3081
3082 virtual_engine_initial_hint(ve);
3083 return 0;
3084}
3085
3086static void virtual_context_enter(struct intel_context *ce)
3087{
3088 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3089 unsigned int n;
3090
3091 for (n = 0; n < ve->num_siblings; n++)
3092 intel_engine_pm_get(ve->siblings[n]);
3093}
3094
3095static void virtual_context_exit(struct intel_context *ce)
3096{
3097 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
3098 unsigned int n;
3099
3100 for (n = 0; n < ve->num_siblings; n++)
3101 intel_engine_pm_put(ve->siblings[n]);
3102}
3103
3104static const struct intel_context_ops virtual_context_ops = {
3105 .pin = virtual_context_pin,
3106 .unpin = execlists_context_unpin,
3107
3108 .enter = virtual_context_enter,
3109 .exit = virtual_context_exit,
3110
3111 .destroy = virtual_context_destroy,
3112};
3113
3114static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
3115{
3116 struct i915_request *rq;
3117 intel_engine_mask_t mask;
3118
3119 rq = READ_ONCE(ve->request);
3120 if (!rq)
3121 return 0;
3122
3123
3124 mask = rq->execution_mask;
3125 if (unlikely(!mask)) {
3126
3127 i915_request_skip(rq, -ENODEV);
3128 mask = ve->siblings[0]->mask;
3129 }
3130
3131 GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
3132 ve->base.name,
3133 rq->fence.context, rq->fence.seqno,
3134 mask, ve->base.execlists.queue_priority_hint);
3135
3136 return mask;
3137}
3138
3139static void virtual_submission_tasklet(unsigned long data)
3140{
3141 struct virtual_engine * const ve = (struct virtual_engine *)data;
3142 const int prio = ve->base.execlists.queue_priority_hint;
3143 intel_engine_mask_t mask;
3144 unsigned int n;
3145
3146 rcu_read_lock();
3147 mask = virtual_submission_mask(ve);
3148 rcu_read_unlock();
3149 if (unlikely(!mask))
3150 return;
3151
3152 local_irq_disable();
3153 for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
3154 struct intel_engine_cs *sibling = ve->siblings[n];
3155 struct ve_node * const node = &ve->nodes[sibling->id];
3156 struct rb_node **parent, *rb;
3157 bool first;
3158
3159 if (unlikely(!(mask & sibling->mask))) {
3160 if (!RB_EMPTY_NODE(&node->rb)) {
3161 spin_lock(&sibling->active.lock);
3162 rb_erase_cached(&node->rb,
3163 &sibling->execlists.virtual);
3164 RB_CLEAR_NODE(&node->rb);
3165 spin_unlock(&sibling->active.lock);
3166 }
3167 continue;
3168 }
3169
3170 spin_lock(&sibling->active.lock);
3171
3172 if (!RB_EMPTY_NODE(&node->rb)) {
3173
3174
3175
3176
3177 first = rb_first_cached(&sibling->execlists.virtual) ==
3178 &node->rb;
3179 if (prio == node->prio || (prio > node->prio && first))
3180 goto submit_engine;
3181
3182 rb_erase_cached(&node->rb, &sibling->execlists.virtual);
3183 }
3184
3185 rb = NULL;
3186 first = true;
3187 parent = &sibling->execlists.virtual.rb_root.rb_node;
3188 while (*parent) {
3189 struct ve_node *other;
3190
3191 rb = *parent;
3192 other = rb_entry(rb, typeof(*other), rb);
3193 if (prio > other->prio) {
3194 parent = &rb->rb_left;
3195 } else {
3196 parent = &rb->rb_right;
3197 first = false;
3198 }
3199 }
3200
3201 rb_link_node(&node->rb, rb, parent);
3202 rb_insert_color_cached(&node->rb,
3203 &sibling->execlists.virtual,
3204 first);
3205
3206submit_engine:
3207 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
3208 node->prio = prio;
3209 if (first && prio > sibling->execlists.queue_priority_hint) {
3210 sibling->execlists.queue_priority_hint = prio;
3211 tasklet_hi_schedule(&sibling->execlists.tasklet);
3212 }
3213
3214 spin_unlock(&sibling->active.lock);
3215 }
3216 local_irq_enable();
3217}
3218
3219static void virtual_submit_request(struct i915_request *rq)
3220{
3221 struct virtual_engine *ve = to_virtual_engine(rq->engine);
3222
3223 GEM_TRACE("%s: rq=%llx:%lld\n",
3224 ve->base.name,
3225 rq->fence.context,
3226 rq->fence.seqno);
3227
3228 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
3229
3230 GEM_BUG_ON(ve->request);
3231 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
3232
3233 ve->base.execlists.queue_priority_hint = rq_prio(rq);
3234 WRITE_ONCE(ve->request, rq);
3235
3236 list_move_tail(&rq->sched.link, virtual_queue(ve));
3237
3238 tasklet_schedule(&ve->base.execlists.tasklet);
3239}
3240
3241static struct ve_bond *
3242virtual_find_bond(struct virtual_engine *ve,
3243 const struct intel_engine_cs *master)
3244{
3245 int i;
3246
3247 for (i = 0; i < ve->num_bonds; i++) {
3248 if (ve->bonds[i].master == master)
3249 return &ve->bonds[i];
3250 }
3251
3252 return NULL;
3253}
3254
3255static void
3256virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
3257{
3258 struct virtual_engine *ve = to_virtual_engine(rq->engine);
3259 struct ve_bond *bond;
3260
3261 bond = virtual_find_bond(ve, to_request(signal)->engine);
3262 if (bond) {
3263 intel_engine_mask_t old, new, cmp;
3264
3265 cmp = READ_ONCE(rq->execution_mask);
3266 do {
3267 old = cmp;
3268 new = cmp & bond->sibling_mask;
3269 } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
3270 }
3271}
3272
3273struct intel_context *
3274intel_execlists_create_virtual(struct i915_gem_context *ctx,
3275 struct intel_engine_cs **siblings,
3276 unsigned int count)
3277{
3278 struct virtual_engine *ve;
3279 unsigned int n;
3280 int err;
3281
3282 if (count == 0)
3283 return ERR_PTR(-EINVAL);
3284
3285 if (count == 1)
3286 return intel_context_create(ctx, siblings[0]);
3287
3288 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
3289 if (!ve)
3290 return ERR_PTR(-ENOMEM);
3291
3292 ve->base.i915 = ctx->i915;
3293 ve->base.id = -1;
3294 ve->base.class = OTHER_CLASS;
3295 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
3296 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
3297 ve->base.flags = I915_ENGINE_IS_VIRTUAL;
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312 ve->base.saturated = ALL_ENGINES;
3313
3314 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
3315
3316 intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
3317
3318 intel_engine_init_execlists(&ve->base);
3319
3320 ve->base.cops = &virtual_context_ops;
3321 ve->base.request_alloc = execlists_request_alloc;
3322
3323 ve->base.schedule = i915_schedule;
3324 ve->base.submit_request = virtual_submit_request;
3325 ve->base.bond_execute = virtual_bond_execute;
3326
3327 INIT_LIST_HEAD(virtual_queue(ve));
3328 ve->base.execlists.queue_priority_hint = INT_MIN;
3329 tasklet_init(&ve->base.execlists.tasklet,
3330 virtual_submission_tasklet,
3331 (unsigned long)ve);
3332
3333 intel_context_init(&ve->context, ctx, &ve->base);
3334
3335 for (n = 0; n < count; n++) {
3336 struct intel_engine_cs *sibling = siblings[n];
3337
3338 GEM_BUG_ON(!is_power_of_2(sibling->mask));
3339 if (sibling->mask & ve->base.mask) {
3340 DRM_DEBUG("duplicate %s entry in load balancer\n",
3341 sibling->name);
3342 err = -EINVAL;
3343 goto err_put;
3344 }
3345
3346
3347
3348
3349
3350
3351
3352
3353 if (sibling->execlists.tasklet.func !=
3354 execlists_submission_tasklet) {
3355 err = -ENODEV;
3356 goto err_put;
3357 }
3358
3359 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
3360 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
3361
3362 ve->siblings[ve->num_siblings++] = sibling;
3363 ve->base.mask |= sibling->mask;
3364
3365
3366
3367
3368
3369
3370
3371
3372 if (ve->base.class != OTHER_CLASS) {
3373 if (ve->base.class != sibling->class) {
3374 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
3375 sibling->class, ve->base.class);
3376 err = -EINVAL;
3377 goto err_put;
3378 }
3379 continue;
3380 }
3381
3382 ve->base.class = sibling->class;
3383 ve->base.uabi_class = sibling->uabi_class;
3384 snprintf(ve->base.name, sizeof(ve->base.name),
3385 "v%dx%d", ve->base.class, count);
3386 ve->base.context_size = sibling->context_size;
3387
3388 ve->base.emit_bb_start = sibling->emit_bb_start;
3389 ve->base.emit_flush = sibling->emit_flush;
3390 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
3391 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
3392 ve->base.emit_fini_breadcrumb_dw =
3393 sibling->emit_fini_breadcrumb_dw;
3394 }
3395
3396 return &ve->context;
3397
3398err_put:
3399 intel_context_put(&ve->context);
3400 return ERR_PTR(err);
3401}
3402
3403struct intel_context *
3404intel_execlists_clone_virtual(struct i915_gem_context *ctx,
3405 struct intel_engine_cs *src)
3406{
3407 struct virtual_engine *se = to_virtual_engine(src);
3408 struct intel_context *dst;
3409
3410 dst = intel_execlists_create_virtual(ctx,
3411 se->siblings,
3412 se->num_siblings);
3413 if (IS_ERR(dst))
3414 return dst;
3415
3416 if (se->num_bonds) {
3417 struct virtual_engine *de = to_virtual_engine(dst->engine);
3418
3419 de->bonds = kmemdup(se->bonds,
3420 sizeof(*se->bonds) * se->num_bonds,
3421 GFP_KERNEL);
3422 if (!de->bonds) {
3423 intel_context_put(dst);
3424 return ERR_PTR(-ENOMEM);
3425 }
3426
3427 de->num_bonds = se->num_bonds;
3428 }
3429
3430 return dst;
3431}
3432
3433int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
3434 const struct intel_engine_cs *master,
3435 const struct intel_engine_cs *sibling)
3436{
3437 struct virtual_engine *ve = to_virtual_engine(engine);
3438 struct ve_bond *bond;
3439 int n;
3440
3441
3442 for (n = 0; n < ve->num_siblings; n++)
3443 if (sibling == ve->siblings[n])
3444 break;
3445 if (n == ve->num_siblings)
3446 return -EINVAL;
3447
3448 bond = virtual_find_bond(ve, master);
3449 if (bond) {
3450 bond->sibling_mask |= sibling->mask;
3451 return 0;
3452 }
3453
3454 bond = krealloc(ve->bonds,
3455 sizeof(*bond) * (ve->num_bonds + 1),
3456 GFP_KERNEL);
3457 if (!bond)
3458 return -ENOMEM;
3459
3460 bond[ve->num_bonds].master = master;
3461 bond[ve->num_bonds].sibling_mask = sibling->mask;
3462
3463 ve->bonds = bond;
3464 ve->num_bonds++;
3465
3466 return 0;
3467}
3468
3469void intel_execlists_show_requests(struct intel_engine_cs *engine,
3470 struct drm_printer *m,
3471 void (*show_request)(struct drm_printer *m,
3472 struct i915_request *rq,
3473 const char *prefix),
3474 unsigned int max)
3475{
3476 const struct intel_engine_execlists *execlists = &engine->execlists;
3477 struct i915_request *rq, *last;
3478 unsigned long flags;
3479 unsigned int count;
3480 struct rb_node *rb;
3481
3482 spin_lock_irqsave(&engine->active.lock, flags);
3483
3484 last = NULL;
3485 count = 0;
3486 list_for_each_entry(rq, &engine->active.requests, sched.link) {
3487 if (count++ < max - 1)
3488 show_request(m, rq, "\t\tE ");
3489 else
3490 last = rq;
3491 }
3492 if (last) {
3493 if (count > max) {
3494 drm_printf(m,
3495 "\t\t...skipping %d executing requests...\n",
3496 count - max);
3497 }
3498 show_request(m, last, "\t\tE ");
3499 }
3500
3501 last = NULL;
3502 count = 0;
3503 if (execlists->queue_priority_hint != INT_MIN)
3504 drm_printf(m, "\t\tQueue priority hint: %d\n",
3505 execlists->queue_priority_hint);
3506 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
3507 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
3508 int i;
3509
3510 priolist_for_each_request(rq, p, i) {
3511 if (count++ < max - 1)
3512 show_request(m, rq, "\t\tQ ");
3513 else
3514 last = rq;
3515 }
3516 }
3517 if (last) {
3518 if (count > max) {
3519 drm_printf(m,
3520 "\t\t...skipping %d queued requests...\n",
3521 count - max);
3522 }
3523 show_request(m, last, "\t\tQ ");
3524 }
3525
3526 last = NULL;
3527 count = 0;
3528 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
3529 struct virtual_engine *ve =
3530 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
3531 struct i915_request *rq = READ_ONCE(ve->request);
3532
3533 if (rq) {
3534 if (count++ < max - 1)
3535 show_request(m, rq, "\t\tV ");
3536 else
3537 last = rq;
3538 }
3539 }
3540 if (last) {
3541 if (count > max) {
3542 drm_printf(m,
3543 "\t\t...skipping %d virtual requests...\n",
3544 count - max);
3545 }
3546 show_request(m, last, "\t\tV ");
3547 }
3548
3549 spin_unlock_irqrestore(&engine->active.lock, flags);
3550}
3551
3552void intel_lr_context_reset(struct intel_engine_cs *engine,
3553 struct intel_context *ce,
3554 u32 head,
3555 bool scrub)
3556{
3557
3558
3559
3560
3561
3562
3563
3564
3565 if (scrub) {
3566 u32 *regs = ce->lrc_reg_state;
3567
3568 if (engine->pinned_default_state) {
3569 memcpy(regs,
3570 engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
3571 engine->context_size - PAGE_SIZE);
3572 }
3573 execlists_init_reg_state(regs, ce, engine, ce->ring);
3574 }
3575
3576
3577 ce->ring->head = head;
3578 intel_ring_update_space(ce->ring);
3579
3580 __execlists_update_reg_state(ce, engine);
3581}
3582
3583#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3584#include "selftest_lrc.c"
3585#endif
3586