1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134#include <linux/interrupt.h>
135
136#include <drm/drmP.h>
137#include <drm/i915_drm.h>
138#include "i915_drv.h"
139#include "intel_mocs.h"
140
141#define RING_EXECLIST_QFULL (1 << 0x2)
142#define RING_EXECLIST1_VALID (1 << 0x3)
143#define RING_EXECLIST0_VALID (1 << 0x4)
144#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
145#define RING_EXECLIST1_ACTIVE (1 << 0x11)
146#define RING_EXECLIST0_ACTIVE (1 << 0x12)
147
148#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
149#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
150#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
151#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
152#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
153#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
154
155#define GEN8_CTX_STATUS_COMPLETED_MASK \
156 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
157 GEN8_CTX_STATUS_PREEMPTED | \
158 GEN8_CTX_STATUS_ELEMENT_SWITCH)
159
160#define CTX_LRI_HEADER_0 0x01
161#define CTX_CONTEXT_CONTROL 0x02
162#define CTX_RING_HEAD 0x04
163#define CTX_RING_TAIL 0x06
164#define CTX_RING_BUFFER_START 0x08
165#define CTX_RING_BUFFER_CONTROL 0x0a
166#define CTX_BB_HEAD_U 0x0c
167#define CTX_BB_HEAD_L 0x0e
168#define CTX_BB_STATE 0x10
169#define CTX_SECOND_BB_HEAD_U 0x12
170#define CTX_SECOND_BB_HEAD_L 0x14
171#define CTX_SECOND_BB_STATE 0x16
172#define CTX_BB_PER_CTX_PTR 0x18
173#define CTX_RCS_INDIRECT_CTX 0x1a
174#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
175#define CTX_LRI_HEADER_1 0x21
176#define CTX_CTX_TIMESTAMP 0x22
177#define CTX_PDP3_UDW 0x24
178#define CTX_PDP3_LDW 0x26
179#define CTX_PDP2_UDW 0x28
180#define CTX_PDP2_LDW 0x2a
181#define CTX_PDP1_UDW 0x2c
182#define CTX_PDP1_LDW 0x2e
183#define CTX_PDP0_UDW 0x30
184#define CTX_PDP0_LDW 0x32
185#define CTX_LRI_HEADER_2 0x41
186#define CTX_R_PWR_CLK_STATE 0x42
187#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
188
189#define CTX_REG(reg_state, pos, reg, val) do { \
190 (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
191 (reg_state)[(pos)+1] = (val); \
192} while (0)
193
194#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
195 const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
196 reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
197 reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
198} while (0)
199
200#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
201 reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
202 reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
203} while (0)
204
205#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
206#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
207#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
208
209
210#define EXECLISTS_REQUEST_SIZE 64
211
212#define WA_TAIL_DWORDS 2
213
214static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
215 struct intel_engine_cs *engine);
216static void execlists_init_reg_state(u32 *reg_state,
217 struct i915_gem_context *ctx,
218 struct intel_engine_cs *engine,
219 struct intel_ring *ring);
220
221
222
223
224
225
226
227
228
229
230
231int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists)
232{
233
234
235
236 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && intel_vgpu_active(dev_priv))
237 return 1;
238
239 if (INTEL_GEN(dev_priv) >= 9)
240 return 1;
241
242 if (enable_execlists == 0)
243 return 0;
244
245 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
246 USES_PPGTT(dev_priv) &&
247 i915.use_mmio_flip >= 0)
248 return 1;
249
250 return 0;
251}
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272static void
273intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
274 struct intel_engine_cs *engine)
275{
276 struct intel_context *ce = &ctx->engine[engine->id];
277 u64 desc;
278
279 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
280
281 desc = ctx->desc_template;
282 desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
283
284 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;
285
286 ce->lrc_desc = desc;
287}
288
289uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
290 struct intel_engine_cs *engine)
291{
292 return ctx->engine[engine->id].lrc_desc;
293}
294
295static inline void
296execlists_context_status_change(struct drm_i915_gem_request *rq,
297 unsigned long status)
298{
299
300
301
302
303 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
304 return;
305
306 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
307 status, rq);
308}
309
310static void
311execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
312{
313 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
314 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
315 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
316 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
317}
318
319static u64 execlists_update_context(struct drm_i915_gem_request *rq)
320{
321 struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
322 struct i915_hw_ppgtt *ppgtt =
323 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
324 u32 *reg_state = ce->lrc_reg_state;
325
326 reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
327
328
329
330
331
332
333 if (ppgtt && !i915_vm_is_48bit(&ppgtt->base))
334 execlists_update_context_pdps(ppgtt, reg_state);
335
336 return ce->lrc_desc;
337}
338
339static void execlists_submit_ports(struct intel_engine_cs *engine)
340{
341 struct execlist_port *port = engine->execlist_port;
342 u32 __iomem *elsp =
343 engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
344 unsigned int n;
345
346 for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
347 struct drm_i915_gem_request *rq;
348 unsigned int count;
349 u64 desc;
350
351 rq = port_unpack(&port[n], &count);
352 if (rq) {
353 GEM_BUG_ON(count > !n);
354 if (!count++)
355 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
356 port_set(&port[n], port_pack(rq, count));
357 desc = execlists_update_context(rq);
358 GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
359 } else {
360 GEM_BUG_ON(!n);
361 desc = 0;
362 }
363
364 writel(upper_32_bits(desc), elsp);
365 writel(lower_32_bits(desc), elsp);
366 }
367}
368
369static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
370{
371 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
372 i915_gem_context_force_single_submission(ctx));
373}
374
375static bool can_merge_ctx(const struct i915_gem_context *prev,
376 const struct i915_gem_context *next)
377{
378 if (prev != next)
379 return false;
380
381 if (ctx_single_port_submission(prev))
382 return false;
383
384 return true;
385}
386
387static void port_assign(struct execlist_port *port,
388 struct drm_i915_gem_request *rq)
389{
390 GEM_BUG_ON(rq == port_request(port));
391
392 if (port_isset(port))
393 i915_gem_request_put(port_request(port));
394
395 port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
396}
397
398static void execlists_dequeue(struct intel_engine_cs *engine)
399{
400 struct drm_i915_gem_request *last;
401 struct execlist_port *port = engine->execlist_port;
402 struct rb_node *rb;
403 bool submit = false;
404
405 last = port_request(port);
406 if (last)
407
408
409
410
411
412
413 last->tail = last->wa_tail;
414
415 GEM_BUG_ON(port_isset(&port[1]));
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438 spin_lock_irq(&engine->timeline->lock);
439 rb = engine->execlist_first;
440 GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
441 while (rb) {
442 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
443 struct drm_i915_gem_request *rq, *rn;
444
445 list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
446
447
448
449
450
451
452
453
454
455
456
457 if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
458
459
460
461
462
463 if (port != engine->execlist_port) {
464 __list_del_many(&p->requests,
465 &rq->priotree.link);
466 goto done;
467 }
468
469
470
471
472
473
474
475
476 if (ctx_single_port_submission(last->ctx) ||
477 ctx_single_port_submission(rq->ctx)) {
478 __list_del_many(&p->requests,
479 &rq->priotree.link);
480 goto done;
481 }
482
483 GEM_BUG_ON(last->ctx == rq->ctx);
484
485 if (submit)
486 port_assign(port, last);
487 port++;
488 }
489
490 INIT_LIST_HEAD(&rq->priotree.link);
491 rq->priotree.priority = INT_MAX;
492
493 __i915_gem_request_submit(rq);
494 trace_i915_gem_request_in(rq, port_index(port, engine));
495 last = rq;
496 submit = true;
497 }
498
499 rb = rb_next(rb);
500 rb_erase(&p->node, &engine->execlist_queue);
501 INIT_LIST_HEAD(&p->requests);
502 if (p->priority != I915_PRIORITY_NORMAL)
503 kmem_cache_free(engine->i915->priorities, p);
504 }
505done:
506 engine->execlist_first = rb;
507 if (submit)
508 port_assign(port, last);
509 spin_unlock_irq(&engine->timeline->lock);
510
511 if (submit)
512 execlists_submit_ports(engine);
513}
514
515static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
516{
517 const struct execlist_port *port = engine->execlist_port;
518
519 return port_count(&port[0]) + port_count(&port[1]) < 2;
520}
521
522
523
524
525
526static void intel_lrc_irq_handler(unsigned long data)
527{
528 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
529 struct execlist_port *port = engine->execlist_port;
530 struct drm_i915_private *dev_priv = engine->i915;
531
532
533
534
535
536
537
538
539 GEM_BUG_ON(!dev_priv->gt.awake);
540
541 intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
542
543
544
545
546
547 while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
548 u32 __iomem *csb_mmio =
549 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
550 u32 __iomem *buf =
551 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
552 unsigned int head, tail;
553
554
555
556
557
558
559
560
561
562
563
564 __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
565 head = readl(csb_mmio);
566 tail = GEN8_CSB_WRITE_PTR(head);
567 head = GEN8_CSB_READ_PTR(head);
568 while (head != tail) {
569 struct drm_i915_gem_request *rq;
570 unsigned int status;
571 unsigned int count;
572
573 if (++head == GEN8_CSB_ENTRIES)
574 head = 0;
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593 status = readl(buf + 2 * head);
594 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
595 continue;
596
597
598 GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
599 port->context_id);
600
601 rq = port_unpack(port, &count);
602 GEM_BUG_ON(count == 0);
603 if (--count == 0) {
604 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
605 GEM_BUG_ON(!i915_gem_request_completed(rq));
606 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
607
608 trace_i915_gem_request_out(rq);
609 i915_gem_request_put(rq);
610
611 port[0] = port[1];
612 memset(&port[1], 0, sizeof(port[1]));
613 } else {
614 port_set(port, port_pack(rq, count));
615 }
616
617
618 GEM_BUG_ON(port_count(port) == 0 &&
619 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
620 }
621
622 writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
623 csb_mmio);
624 }
625
626 if (execlists_elsp_ready(engine))
627 execlists_dequeue(engine);
628
629 intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
630}
631
632static bool
633insert_request(struct intel_engine_cs *engine,
634 struct i915_priotree *pt,
635 int prio)
636{
637 struct i915_priolist *p;
638 struct rb_node **parent, *rb;
639 bool first = true;
640
641 if (unlikely(engine->no_priolist))
642 prio = I915_PRIORITY_NORMAL;
643
644find_priolist:
645
646 rb = NULL;
647 parent = &engine->execlist_queue.rb_node;
648 while (*parent) {
649 rb = *parent;
650 p = rb_entry(rb, typeof(*p), node);
651 if (prio > p->priority) {
652 parent = &rb->rb_left;
653 } else if (prio < p->priority) {
654 parent = &rb->rb_right;
655 first = false;
656 } else {
657 list_add_tail(&pt->link, &p->requests);
658 return false;
659 }
660 }
661
662 if (prio == I915_PRIORITY_NORMAL) {
663 p = &engine->default_priolist;
664 } else {
665 p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
666
667 if (unlikely(!p)) {
668 prio = I915_PRIORITY_NORMAL;
669
670
671
672
673
674
675
676
677
678 engine->no_priolist = true;
679 goto find_priolist;
680 }
681 }
682
683 p->priority = prio;
684 rb_link_node(&p->node, rb, parent);
685 rb_insert_color(&p->node, &engine->execlist_queue);
686
687 INIT_LIST_HEAD(&p->requests);
688 list_add_tail(&pt->link, &p->requests);
689
690 if (first)
691 engine->execlist_first = &p->node;
692
693 return first;
694}
695
696static void execlists_submit_request(struct drm_i915_gem_request *request)
697{
698 struct intel_engine_cs *engine = request->engine;
699 unsigned long flags;
700
701
702 spin_lock_irqsave(&engine->timeline->lock, flags);
703
704 if (insert_request(engine,
705 &request->priotree,
706 request->priotree.priority)) {
707 if (execlists_elsp_ready(engine))
708 tasklet_hi_schedule(&engine->irq_tasklet);
709 }
710
711 GEM_BUG_ON(!engine->execlist_first);
712 GEM_BUG_ON(list_empty(&request->priotree.link));
713
714 spin_unlock_irqrestore(&engine->timeline->lock, flags);
715}
716
717static struct intel_engine_cs *
718pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
719{
720 struct intel_engine_cs *engine =
721 container_of(pt, struct drm_i915_gem_request, priotree)->engine;
722
723 GEM_BUG_ON(!locked);
724
725 if (engine != locked) {
726 spin_unlock(&locked->timeline->lock);
727 spin_lock(&engine->timeline->lock);
728 }
729
730 return engine;
731}
732
733static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
734{
735 struct intel_engine_cs *engine;
736 struct i915_dependency *dep, *p;
737 struct i915_dependency stack;
738 LIST_HEAD(dfs);
739
740 if (prio <= READ_ONCE(request->priotree.priority))
741 return;
742
743
744 lockdep_assert_held(&request->i915->drm.struct_mutex);
745
746 stack.signaler = &request->priotree;
747 list_add(&stack.dfs_link, &dfs);
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766 list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
767 struct i915_priotree *pt = dep->signaler;
768
769
770
771
772
773
774 list_for_each_entry(p, &pt->signalers_list, signal_link) {
775 GEM_BUG_ON(p->signaler->priority < pt->priority);
776 if (prio > READ_ONCE(p->signaler->priority))
777 list_move_tail(&p->dfs_link, &dfs);
778 }
779
780 list_safe_reset_next(dep, p, dfs_link);
781 }
782
783
784
785
786
787
788 if (request->priotree.priority == INT_MIN) {
789 GEM_BUG_ON(!list_empty(&request->priotree.link));
790 request->priotree.priority = prio;
791 if (stack.dfs_link.next == stack.dfs_link.prev)
792 return;
793 __list_del_entry(&stack.dfs_link);
794 }
795
796 engine = request->engine;
797 spin_lock_irq(&engine->timeline->lock);
798
799
800 list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
801 struct i915_priotree *pt = dep->signaler;
802
803 INIT_LIST_HEAD(&dep->dfs_link);
804
805 engine = pt_lock_engine(pt, engine);
806
807 if (prio <= pt->priority)
808 continue;
809
810 pt->priority = prio;
811 if (!list_empty(&pt->link)) {
812 __list_del_entry(&pt->link);
813 insert_request(engine, pt, prio);
814 }
815 }
816
817 spin_unlock_irq(&engine->timeline->lock);
818
819
820}
821
822static struct intel_ring *
823execlists_context_pin(struct intel_engine_cs *engine,
824 struct i915_gem_context *ctx)
825{
826 struct intel_context *ce = &ctx->engine[engine->id];
827 unsigned int flags;
828 void *vaddr;
829 int ret;
830
831 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
832
833 if (likely(ce->pin_count++))
834 goto out;
835 GEM_BUG_ON(!ce->pin_count);
836
837 if (!ce->state) {
838 ret = execlists_context_deferred_alloc(ctx, engine);
839 if (ret)
840 goto err;
841 }
842 GEM_BUG_ON(!ce->state);
843
844 flags = PIN_GLOBAL | PIN_HIGH;
845 if (ctx->ggtt_offset_bias)
846 flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias;
847
848 ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags);
849 if (ret)
850 goto err;
851
852 vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
853 if (IS_ERR(vaddr)) {
854 ret = PTR_ERR(vaddr);
855 goto unpin_vma;
856 }
857
858 ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias);
859 if (ret)
860 goto unpin_map;
861
862 intel_lr_context_descriptor_update(ctx, engine);
863
864 ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
865 ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
866 i915_ggtt_offset(ce->ring->vma);
867
868 ce->state->obj->mm.dirty = true;
869
870 i915_gem_context_get(ctx);
871out:
872 return ce->ring;
873
874unpin_map:
875 i915_gem_object_unpin_map(ce->state->obj);
876unpin_vma:
877 __i915_vma_unpin(ce->state);
878err:
879 ce->pin_count = 0;
880 return ERR_PTR(ret);
881}
882
883static void execlists_context_unpin(struct intel_engine_cs *engine,
884 struct i915_gem_context *ctx)
885{
886 struct intel_context *ce = &ctx->engine[engine->id];
887
888 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
889 GEM_BUG_ON(ce->pin_count == 0);
890
891 if (--ce->pin_count)
892 return;
893
894 intel_ring_unpin(ce->ring);
895
896 i915_gem_object_unpin_map(ce->state->obj);
897 i915_vma_unpin(ce->state);
898
899 i915_gem_context_put(ctx);
900}
901
902static int execlists_request_alloc(struct drm_i915_gem_request *request)
903{
904 struct intel_engine_cs *engine = request->engine;
905 struct intel_context *ce = &request->ctx->engine[engine->id];
906 u32 *cs;
907 int ret;
908
909 GEM_BUG_ON(!ce->pin_count);
910
911
912
913
914
915 request->reserved_space += EXECLISTS_REQUEST_SIZE;
916
917 if (i915.enable_guc_submission) {
918
919
920
921
922
923 ret = i915_guc_wq_reserve(request);
924 if (ret)
925 goto err;
926 }
927
928 cs = intel_ring_begin(request, 0);
929 if (IS_ERR(cs)) {
930 ret = PTR_ERR(cs);
931 goto err_unreserve;
932 }
933
934 if (!ce->initialised) {
935 ret = engine->init_context(request);
936 if (ret)
937 goto err_unreserve;
938
939 ce->initialised = true;
940 }
941
942
943
944
945
946
947
948
949 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
950 return 0;
951
952err_unreserve:
953 if (i915.enable_guc_submission)
954 i915_guc_wq_unreserve(request);
955err:
956 return ret;
957}
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975static u32 *
976gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
977{
978 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
979 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
980 *batch++ = i915_ggtt_offset(engine->scratch) + 256;
981 *batch++ = 0;
982
983 *batch++ = MI_LOAD_REGISTER_IMM(1);
984 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
985 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
986
987 batch = gen8_emit_pipe_control(batch,
988 PIPE_CONTROL_CS_STALL |
989 PIPE_CONTROL_DC_FLUSH_ENABLE,
990 0);
991
992 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
993 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
994 *batch++ = i915_ggtt_offset(engine->scratch) + 256;
995 *batch++ = 0;
996
997 return batch;
998}
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1016{
1017
1018 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1019
1020
1021 if (IS_BROADWELL(engine->i915))
1022 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1023
1024
1025
1026 batch = gen8_emit_pipe_control(batch,
1027 PIPE_CONTROL_FLUSH_L3 |
1028 PIPE_CONTROL_GLOBAL_GTT_IVB |
1029 PIPE_CONTROL_CS_STALL |
1030 PIPE_CONTROL_QW_WRITE,
1031 i915_ggtt_offset(engine->scratch) +
1032 2 * CACHELINE_BYTES);
1033
1034
1035 while ((unsigned long)batch % CACHELINE_BYTES)
1036 *batch++ = MI_NOOP;
1037
1038
1039
1040
1041
1042
1043
1044 return batch;
1045}
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
1057{
1058
1059 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1060 *batch++ = MI_BATCH_BUFFER_END;
1061
1062 return batch;
1063}
1064
1065static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1066{
1067
1068 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1069
1070
1071 *batch++ = MI_LOAD_REGISTER_IMM(1);
1072 *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
1073 *batch++ = _MASKED_BIT_DISABLE(
1074 GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
1075 *batch++ = MI_NOOP;
1076
1077
1078
1079 if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
1080 batch = gen8_emit_pipe_control(batch,
1081 PIPE_CONTROL_FLUSH_L3 |
1082 PIPE_CONTROL_GLOBAL_GTT_IVB |
1083 PIPE_CONTROL_CS_STALL |
1084 PIPE_CONTROL_QW_WRITE,
1085 i915_ggtt_offset(engine->scratch)
1086 + 2 * CACHELINE_BYTES);
1087 }
1088
1089
1090 if (HAS_POOLED_EU(engine->i915)) {
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 *batch++ = GEN9_MEDIA_POOL_STATE;
1105 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1106 *batch++ = 0x00777000;
1107 *batch++ = 0;
1108 *batch++ = 0;
1109 *batch++ = 0;
1110 }
1111
1112
1113 while ((unsigned long)batch % CACHELINE_BYTES)
1114 *batch++ = MI_NOOP;
1115
1116 return batch;
1117}
1118
1119static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
1120{
1121 *batch++ = MI_BATCH_BUFFER_END;
1122
1123 return batch;
1124}
1125
1126#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
1127
1128static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
1129{
1130 struct drm_i915_gem_object *obj;
1131 struct i915_vma *vma;
1132 int err;
1133
1134 obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
1135 if (IS_ERR(obj))
1136 return PTR_ERR(obj);
1137
1138 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
1139 if (IS_ERR(vma)) {
1140 err = PTR_ERR(vma);
1141 goto err;
1142 }
1143
1144 err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
1145 if (err)
1146 goto err;
1147
1148 engine->wa_ctx.vma = vma;
1149 return 0;
1150
1151err:
1152 i915_gem_object_put(obj);
1153 return err;
1154}
1155
1156static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
1157{
1158 i915_vma_unpin_and_release(&engine->wa_ctx.vma);
1159}
1160
1161typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1162
1163static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1164{
1165 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1166 struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
1167 &wa_ctx->per_ctx };
1168 wa_bb_func_t wa_bb_fn[2];
1169 struct page *page;
1170 void *batch, *batch_ptr;
1171 unsigned int i;
1172 int ret;
1173
1174 if (WARN_ON(engine->id != RCS || !engine->scratch))
1175 return -EINVAL;
1176
1177 switch (INTEL_GEN(engine->i915)) {
1178 case 9:
1179 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1180 wa_bb_fn[1] = gen9_init_perctx_bb;
1181 break;
1182 case 8:
1183 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1184 wa_bb_fn[1] = gen8_init_perctx_bb;
1185 break;
1186 default:
1187 MISSING_CASE(INTEL_GEN(engine->i915));
1188 return 0;
1189 }
1190
1191 ret = lrc_setup_wa_ctx(engine);
1192 if (ret) {
1193 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
1194 return ret;
1195 }
1196
1197 page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
1198 batch = batch_ptr = kmap_atomic(page);
1199
1200
1201
1202
1203
1204
1205 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1206 wa_bb[i]->offset = batch_ptr - batch;
1207 if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) {
1208 ret = -EINVAL;
1209 break;
1210 }
1211 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1212 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1213 }
1214
1215 BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
1216
1217 kunmap_atomic(batch);
1218 if (ret)
1219 lrc_destroy_wa_ctx(engine);
1220
1221 return ret;
1222}
1223
1224static u8 gtiir[] = {
1225 [RCS] = 0,
1226 [BCS] = 0,
1227 [VCS] = 1,
1228 [VCS2] = 1,
1229 [VECS] = 3,
1230};
1231
1232static int gen8_init_common_ring(struct intel_engine_cs *engine)
1233{
1234 struct drm_i915_private *dev_priv = engine->i915;
1235 struct execlist_port *port = engine->execlist_port;
1236 unsigned int n;
1237 bool submit;
1238 int ret;
1239
1240 ret = intel_mocs_init_engine(engine);
1241 if (ret)
1242 return ret;
1243
1244 intel_engine_reset_breadcrumbs(engine);
1245 intel_engine_init_hangcheck(engine);
1246
1247 I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
1248 I915_WRITE(RING_MODE_GEN7(engine),
1249 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1250 I915_WRITE(RING_HWS_PGA(engine->mmio_base),
1251 engine->status_page.ggtt_offset);
1252 POSTING_READ(RING_HWS_PGA(engine->mmio_base));
1253
1254 DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
1255
1256 GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
1257
1258
1259
1260
1261
1262
1263
1264
1265 I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
1266 GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
1267 I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
1268 GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
1269 clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
1270
1271
1272 submit = false;
1273 for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
1274 if (!port_isset(&port[n]))
1275 break;
1276
1277 DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
1278 engine->name, n,
1279 port_request(&port[n])->global_seqno);
1280
1281
1282 port_set(&port[n], port_request(&port[n]));
1283 submit = true;
1284 }
1285
1286 if (submit && !i915.enable_guc_submission)
1287 execlists_submit_ports(engine);
1288
1289 return 0;
1290}
1291
1292static int gen8_init_render_ring(struct intel_engine_cs *engine)
1293{
1294 struct drm_i915_private *dev_priv = engine->i915;
1295 int ret;
1296
1297 ret = gen8_init_common_ring(engine);
1298 if (ret)
1299 return ret;
1300
1301
1302
1303
1304
1305
1306
1307 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1308
1309 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1310
1311 return init_workarounds_ring(engine);
1312}
1313
1314static int gen9_init_render_ring(struct intel_engine_cs *engine)
1315{
1316 int ret;
1317
1318 ret = gen8_init_common_ring(engine);
1319 if (ret)
1320 return ret;
1321
1322 return init_workarounds_ring(engine);
1323}
1324
1325static void reset_common_ring(struct intel_engine_cs *engine,
1326 struct drm_i915_gem_request *request)
1327{
1328 struct execlist_port *port = engine->execlist_port;
1329 struct intel_context *ce;
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 if (!request || request->fence.error != -EIO)
1342 return;
1343
1344
1345
1346
1347
1348
1349
1350
1351 ce = &request->ctx->engine[engine->id];
1352 execlists_init_reg_state(ce->lrc_reg_state,
1353 request->ctx, engine, ce->ring);
1354
1355
1356 ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
1357 i915_ggtt_offset(ce->ring->vma);
1358 ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
1359
1360 request->ring->head = request->postfix;
1361 intel_ring_update_space(request->ring);
1362
1363
1364 if (request->ctx != port_request(port)->ctx) {
1365 i915_gem_request_put(port_request(port));
1366 port[0] = port[1];
1367 memset(&port[1], 0, sizeof(port[1]));
1368 }
1369
1370 GEM_BUG_ON(request->ctx != port_request(port)->ctx);
1371
1372
1373 request->tail =
1374 intel_ring_wrap(request->ring,
1375 request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
1376 assert_ring_tail_valid(request->ring, request->tail);
1377}
1378
1379static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
1380{
1381 struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
1382 struct intel_engine_cs *engine = req->engine;
1383 const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
1384 u32 *cs;
1385 int i;
1386
1387 cs = intel_ring_begin(req, num_lri_cmds * 2 + 2);
1388 if (IS_ERR(cs))
1389 return PTR_ERR(cs);
1390
1391 *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
1392 for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
1393 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1394
1395 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
1396 *cs++ = upper_32_bits(pd_daddr);
1397 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
1398 *cs++ = lower_32_bits(pd_daddr);
1399 }
1400
1401 *cs++ = MI_NOOP;
1402 intel_ring_advance(req, cs);
1403
1404 return 0;
1405}
1406
1407static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1408 u64 offset, u32 len,
1409 const unsigned int flags)
1410{
1411 u32 *cs;
1412 int ret;
1413
1414
1415
1416
1417
1418
1419
1420 if (req->ctx->ppgtt &&
1421 (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) &&
1422 !i915_vm_is_48bit(&req->ctx->ppgtt->base) &&
1423 !intel_vgpu_active(req->i915)) {
1424 ret = intel_logical_ring_emit_pdps(req);
1425 if (ret)
1426 return ret;
1427
1428 req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
1429 }
1430
1431 cs = intel_ring_begin(req, 4);
1432 if (IS_ERR(cs))
1433 return PTR_ERR(cs);
1434
1435
1436 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
1437 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
1438 (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
1439 *cs++ = lower_32_bits(offset);
1440 *cs++ = upper_32_bits(offset);
1441 *cs++ = MI_NOOP;
1442 intel_ring_advance(req, cs);
1443
1444 return 0;
1445}
1446
1447static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
1448{
1449 struct drm_i915_private *dev_priv = engine->i915;
1450 I915_WRITE_IMR(engine,
1451 ~(engine->irq_enable_mask | engine->irq_keep_mask));
1452 POSTING_READ_FW(RING_IMR(engine->mmio_base));
1453}
1454
1455static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
1456{
1457 struct drm_i915_private *dev_priv = engine->i915;
1458 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1459}
1460
1461static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
1462{
1463 u32 cmd, *cs;
1464
1465 cs = intel_ring_begin(request, 4);
1466 if (IS_ERR(cs))
1467 return PTR_ERR(cs);
1468
1469 cmd = MI_FLUSH_DW + 1;
1470
1471
1472
1473
1474
1475
1476 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1477
1478 if (mode & EMIT_INVALIDATE) {
1479 cmd |= MI_INVALIDATE_TLB;
1480 if (request->engine->id == VCS)
1481 cmd |= MI_INVALIDATE_BSD;
1482 }
1483
1484 *cs++ = cmd;
1485 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
1486 *cs++ = 0;
1487 *cs++ = 0;
1488 intel_ring_advance(request, cs);
1489
1490 return 0;
1491}
1492
1493static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1494 u32 mode)
1495{
1496 struct intel_engine_cs *engine = request->engine;
1497 u32 scratch_addr =
1498 i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1499 bool vf_flush_wa = false, dc_flush_wa = false;
1500 u32 *cs, flags = 0;
1501 int len;
1502
1503 flags |= PIPE_CONTROL_CS_STALL;
1504
1505 if (mode & EMIT_FLUSH) {
1506 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1507 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1508 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
1509 flags |= PIPE_CONTROL_FLUSH_ENABLE;
1510 }
1511
1512 if (mode & EMIT_INVALIDATE) {
1513 flags |= PIPE_CONTROL_TLB_INVALIDATE;
1514 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1515 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1516 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1517 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1518 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1519 flags |= PIPE_CONTROL_QW_WRITE;
1520 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1521
1522
1523
1524
1525
1526 if (IS_GEN9(request->i915))
1527 vf_flush_wa = true;
1528
1529
1530 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
1531 dc_flush_wa = true;
1532 }
1533
1534 len = 6;
1535
1536 if (vf_flush_wa)
1537 len += 6;
1538
1539 if (dc_flush_wa)
1540 len += 12;
1541
1542 cs = intel_ring_begin(request, len);
1543 if (IS_ERR(cs))
1544 return PTR_ERR(cs);
1545
1546 if (vf_flush_wa)
1547 cs = gen8_emit_pipe_control(cs, 0, 0);
1548
1549 if (dc_flush_wa)
1550 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
1551 0);
1552
1553 cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
1554
1555 if (dc_flush_wa)
1556 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
1557
1558 intel_ring_advance(request, cs);
1559
1560 return 0;
1561}
1562
1563
1564
1565
1566
1567
1568static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
1569{
1570 *cs++ = MI_NOOP;
1571 *cs++ = MI_NOOP;
1572 request->wa_tail = intel_ring_offset(request, cs);
1573}
1574
1575static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
1576{
1577
1578 BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
1579
1580 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
1581 *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
1582 *cs++ = 0;
1583 *cs++ = request->global_seqno;
1584 *cs++ = MI_USER_INTERRUPT;
1585 *cs++ = MI_NOOP;
1586 request->tail = intel_ring_offset(request, cs);
1587 assert_ring_tail_valid(request->ring, request->tail);
1588
1589 gen8_emit_wa_tail(request, cs);
1590}
1591
1592static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
1593
1594static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
1595 u32 *cs)
1596{
1597
1598 BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
1599
1600
1601
1602
1603
1604 *cs++ = GFX_OP_PIPE_CONTROL(6);
1605 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
1606 PIPE_CONTROL_QW_WRITE;
1607 *cs++ = intel_hws_seqno_address(request->engine);
1608 *cs++ = 0;
1609 *cs++ = request->global_seqno;
1610
1611 *cs++ = 0;
1612 *cs++ = MI_USER_INTERRUPT;
1613 *cs++ = MI_NOOP;
1614 request->tail = intel_ring_offset(request, cs);
1615 assert_ring_tail_valid(request->ring, request->tail);
1616
1617 gen8_emit_wa_tail(request, cs);
1618}
1619
1620static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
1621
1622static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
1623{
1624 int ret;
1625
1626 ret = intel_ring_workarounds_emit(req);
1627 if (ret)
1628 return ret;
1629
1630 ret = intel_rcs_context_init_mocs(req);
1631
1632
1633
1634
1635 if (ret)
1636 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1637
1638 return i915_gem_render_state_emit(req);
1639}
1640
1641
1642
1643
1644
1645void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1646{
1647 struct drm_i915_private *dev_priv;
1648
1649
1650
1651
1652
1653 if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->irq_tasklet.state)))
1654 tasklet_kill(&engine->irq_tasklet);
1655
1656 dev_priv = engine->i915;
1657
1658 if (engine->buffer) {
1659 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
1660 }
1661
1662 if (engine->cleanup)
1663 engine->cleanup(engine);
1664
1665 if (engine->status_page.vma) {
1666 i915_gem_object_unpin_map(engine->status_page.vma->obj);
1667 engine->status_page.vma = NULL;
1668 }
1669
1670 intel_engine_cleanup_common(engine);
1671
1672 lrc_destroy_wa_ctx(engine);
1673 engine->i915 = NULL;
1674 dev_priv->engine[engine->id] = NULL;
1675 kfree(engine);
1676}
1677
1678static void execlists_set_default_submission(struct intel_engine_cs *engine)
1679{
1680 engine->submit_request = execlists_submit_request;
1681 engine->schedule = execlists_schedule;
1682 engine->irq_tasklet.func = intel_lrc_irq_handler;
1683}
1684
1685static void
1686logical_ring_default_vfuncs(struct intel_engine_cs *engine)
1687{
1688
1689 engine->init_hw = gen8_init_common_ring;
1690 engine->reset_hw = reset_common_ring;
1691
1692 engine->context_pin = execlists_context_pin;
1693 engine->context_unpin = execlists_context_unpin;
1694
1695 engine->request_alloc = execlists_request_alloc;
1696
1697 engine->emit_flush = gen8_emit_flush;
1698 engine->emit_breadcrumb = gen8_emit_breadcrumb;
1699 engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
1700
1701 engine->set_default_submission = execlists_set_default_submission;
1702
1703 engine->irq_enable = gen8_logical_ring_enable_irq;
1704 engine->irq_disable = gen8_logical_ring_disable_irq;
1705 engine->emit_bb_start = gen8_emit_bb_start;
1706}
1707
1708static inline void
1709logical_ring_default_irqs(struct intel_engine_cs *engine)
1710{
1711 unsigned shift = engine->irq_shift;
1712 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
1713 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
1714}
1715
1716static int
1717lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
1718{
1719 const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
1720 void *hws;
1721
1722
1723 hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1724 if (IS_ERR(hws))
1725 return PTR_ERR(hws);
1726
1727 engine->status_page.page_addr = hws + hws_offset;
1728 engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
1729 engine->status_page.vma = vma;
1730
1731 return 0;
1732}
1733
1734static void
1735logical_ring_setup(struct intel_engine_cs *engine)
1736{
1737 struct drm_i915_private *dev_priv = engine->i915;
1738 enum forcewake_domains fw_domains;
1739
1740 intel_engine_setup_common(engine);
1741
1742
1743 engine->buffer = NULL;
1744
1745 fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
1746 RING_ELSP(engine),
1747 FW_REG_WRITE);
1748
1749 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1750 RING_CONTEXT_STATUS_PTR(engine),
1751 FW_REG_READ | FW_REG_WRITE);
1752
1753 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1754 RING_CONTEXT_STATUS_BUF_BASE(engine),
1755 FW_REG_READ);
1756
1757 engine->fw_domains = fw_domains;
1758
1759 tasklet_init(&engine->irq_tasklet,
1760 intel_lrc_irq_handler, (unsigned long)engine);
1761
1762 logical_ring_default_vfuncs(engine);
1763 logical_ring_default_irqs(engine);
1764}
1765
1766static int
1767logical_ring_init(struct intel_engine_cs *engine)
1768{
1769 struct i915_gem_context *dctx = engine->i915->kernel_context;
1770 int ret;
1771
1772 ret = intel_engine_init_common(engine);
1773 if (ret)
1774 goto error;
1775
1776
1777 ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
1778 if (ret) {
1779 DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
1780 goto error;
1781 }
1782
1783 return 0;
1784
1785error:
1786 intel_logical_ring_cleanup(engine);
1787 return ret;
1788}
1789
1790int logical_render_ring_init(struct intel_engine_cs *engine)
1791{
1792 struct drm_i915_private *dev_priv = engine->i915;
1793 int ret;
1794
1795 logical_ring_setup(engine);
1796
1797 if (HAS_L3_DPF(dev_priv))
1798 engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1799
1800
1801 if (INTEL_GEN(dev_priv) >= 9)
1802 engine->init_hw = gen9_init_render_ring;
1803 else
1804 engine->init_hw = gen8_init_render_ring;
1805 engine->init_context = gen8_init_rcs_context;
1806 engine->emit_flush = gen8_emit_flush_render;
1807 engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
1808 engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
1809
1810 ret = intel_engine_create_scratch(engine, PAGE_SIZE);
1811 if (ret)
1812 return ret;
1813
1814 ret = intel_init_workaround_bb(engine);
1815 if (ret) {
1816
1817
1818
1819
1820
1821 DRM_ERROR("WA batch buffer initialization failed: %d\n",
1822 ret);
1823 }
1824
1825 return logical_ring_init(engine);
1826}
1827
1828int logical_xcs_ring_init(struct intel_engine_cs *engine)
1829{
1830 logical_ring_setup(engine);
1831
1832 return logical_ring_init(engine);
1833}
1834
1835static u32
1836make_rpcs(struct drm_i915_private *dev_priv)
1837{
1838 u32 rpcs = 0;
1839
1840
1841
1842
1843
1844 if (INTEL_GEN(dev_priv) < 9)
1845 return 0;
1846
1847
1848
1849
1850
1851
1852
1853 if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
1854 rpcs |= GEN8_RPCS_S_CNT_ENABLE;
1855 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) <<
1856 GEN8_RPCS_S_CNT_SHIFT;
1857 rpcs |= GEN8_RPCS_ENABLE;
1858 }
1859
1860 if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
1861 rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
1862 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
1863 GEN8_RPCS_SS_CNT_SHIFT;
1864 rpcs |= GEN8_RPCS_ENABLE;
1865 }
1866
1867 if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
1868 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
1869 GEN8_RPCS_EU_MIN_SHIFT;
1870 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
1871 GEN8_RPCS_EU_MAX_SHIFT;
1872 rpcs |= GEN8_RPCS_ENABLE;
1873 }
1874
1875 return rpcs;
1876}
1877
1878static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
1879{
1880 u32 indirect_ctx_offset;
1881
1882 switch (INTEL_GEN(engine->i915)) {
1883 default:
1884 MISSING_CASE(INTEL_GEN(engine->i915));
1885
1886 case 10:
1887 indirect_ctx_offset =
1888 GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
1889 break;
1890 case 9:
1891 indirect_ctx_offset =
1892 GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
1893 break;
1894 case 8:
1895 indirect_ctx_offset =
1896 GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
1897 break;
1898 }
1899
1900 return indirect_ctx_offset;
1901}
1902
1903static void execlists_init_reg_state(u32 *regs,
1904 struct i915_gem_context *ctx,
1905 struct intel_engine_cs *engine,
1906 struct intel_ring *ring)
1907{
1908 struct drm_i915_private *dev_priv = engine->i915;
1909 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
1910 u32 base = engine->mmio_base;
1911 bool rcs = engine->id == RCS;
1912
1913
1914
1915
1916
1917
1918
1919
1920 regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
1921 MI_LRI_FORCE_POSTED;
1922
1923 CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
1924 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
1925 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
1926 (HAS_RESOURCE_STREAMER(dev_priv) ?
1927 CTX_CTRL_RS_CTX_ENABLE : 0)));
1928 CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
1929 CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
1930 CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
1931 CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
1932 RING_CTL_SIZE(ring->size) | RING_VALID);
1933 CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
1934 CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
1935 CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
1936 CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
1937 CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
1938 CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
1939 if (rcs) {
1940 CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
1941 CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
1942 CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
1943 RING_INDIRECT_CTX_OFFSET(base), 0);
1944
1945 if (engine->wa_ctx.vma) {
1946 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1947 u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
1948
1949 regs[CTX_RCS_INDIRECT_CTX + 1] =
1950 (ggtt_offset + wa_ctx->indirect_ctx.offset) |
1951 (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
1952
1953 regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
1954 intel_lr_indirect_ctx_offset(engine) << 6;
1955
1956 regs[CTX_BB_PER_CTX_PTR + 1] =
1957 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
1958 }
1959 }
1960
1961 regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
1962
1963 CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
1964
1965 CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
1966 CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
1967 CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
1968 CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
1969 CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
1970 CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
1971 CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
1972 CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
1973
1974 if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) {
1975
1976
1977
1978
1979 ASSIGN_CTX_PML4(ppgtt, regs);
1980 }
1981
1982 if (rcs) {
1983 regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
1984 CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
1985 make_rpcs(dev_priv));
1986
1987 i915_oa_init_reg_state(engine, ctx, regs);
1988 }
1989}
1990
1991static int
1992populate_lr_context(struct i915_gem_context *ctx,
1993 struct drm_i915_gem_object *ctx_obj,
1994 struct intel_engine_cs *engine,
1995 struct intel_ring *ring)
1996{
1997 void *vaddr;
1998 int ret;
1999
2000 ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
2001 if (ret) {
2002 DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
2003 return ret;
2004 }
2005
2006 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
2007 if (IS_ERR(vaddr)) {
2008 ret = PTR_ERR(vaddr);
2009 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
2010 return ret;
2011 }
2012 ctx_obj->mm.dirty = true;
2013
2014
2015
2016
2017 execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
2018 ctx, engine, ring);
2019
2020 i915_gem_object_unpin_map(ctx_obj);
2021
2022 return 0;
2023}
2024
2025static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2026 struct intel_engine_cs *engine)
2027{
2028 struct drm_i915_gem_object *ctx_obj;
2029 struct intel_context *ce = &ctx->engine[engine->id];
2030 struct i915_vma *vma;
2031 uint32_t context_size;
2032 struct intel_ring *ring;
2033 int ret;
2034
2035 WARN_ON(ce->state);
2036
2037 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
2038
2039
2040 context_size += PAGE_SIZE * LRC_PPHWSP_PN;
2041
2042 ctx_obj = i915_gem_object_create(ctx->i915, context_size);
2043 if (IS_ERR(ctx_obj)) {
2044 DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
2045 return PTR_ERR(ctx_obj);
2046 }
2047
2048 vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
2049 if (IS_ERR(vma)) {
2050 ret = PTR_ERR(vma);
2051 goto error_deref_obj;
2052 }
2053
2054 ring = intel_engine_create_ring(engine, ctx->ring_size);
2055 if (IS_ERR(ring)) {
2056 ret = PTR_ERR(ring);
2057 goto error_deref_obj;
2058 }
2059
2060 ret = populate_lr_context(ctx, ctx_obj, engine, ring);
2061 if (ret) {
2062 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
2063 goto error_ring_free;
2064 }
2065
2066 ce->ring = ring;
2067 ce->state = vma;
2068 ce->initialised |= engine->init_context == NULL;
2069
2070 return 0;
2071
2072error_ring_free:
2073 intel_ring_free(ring);
2074error_deref_obj:
2075 i915_gem_object_put(ctx_obj);
2076 return ret;
2077}
2078
2079void intel_lr_context_resume(struct drm_i915_private *dev_priv)
2080{
2081 struct intel_engine_cs *engine;
2082 struct i915_gem_context *ctx;
2083 enum intel_engine_id id;
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095 list_for_each_entry(ctx, &dev_priv->context_list, link) {
2096 for_each_engine(engine, dev_priv, id) {
2097 struct intel_context *ce = &ctx->engine[engine->id];
2098 u32 *reg;
2099
2100 if (!ce->state)
2101 continue;
2102
2103 reg = i915_gem_object_pin_map(ce->state->obj,
2104 I915_MAP_WB);
2105 if (WARN_ON(IS_ERR(reg)))
2106 continue;
2107
2108 reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
2109 reg[CTX_RING_HEAD+1] = 0;
2110 reg[CTX_RING_TAIL+1] = 0;
2111
2112 ce->state->obj->mm.dirty = true;
2113 i915_gem_object_unpin_map(ce->state->obj);
2114
2115 intel_ring_reset(ce->ring, 0);
2116 }
2117 }
2118}
2119