1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134#include <linux/interrupt.h>
135
136#include <drm/drmP.h>
137#include <drm/i915_drm.h>
138#include "i915_drv.h"
139#include "intel_mocs.h"
140
141#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
142#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
143#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
144
145#define RING_EXECLIST_QFULL (1 << 0x2)
146#define RING_EXECLIST1_VALID (1 << 0x3)
147#define RING_EXECLIST0_VALID (1 << 0x4)
148#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
149#define RING_EXECLIST1_ACTIVE (1 << 0x11)
150#define RING_EXECLIST0_ACTIVE (1 << 0x12)
151
152#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
153#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
154#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
155#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
156#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
157#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
158
159#define GEN8_CTX_STATUS_COMPLETED_MASK \
160 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
161 GEN8_CTX_STATUS_PREEMPTED | \
162 GEN8_CTX_STATUS_ELEMENT_SWITCH)
163
164#define CTX_LRI_HEADER_0 0x01
165#define CTX_CONTEXT_CONTROL 0x02
166#define CTX_RING_HEAD 0x04
167#define CTX_RING_TAIL 0x06
168#define CTX_RING_BUFFER_START 0x08
169#define CTX_RING_BUFFER_CONTROL 0x0a
170#define CTX_BB_HEAD_U 0x0c
171#define CTX_BB_HEAD_L 0x0e
172#define CTX_BB_STATE 0x10
173#define CTX_SECOND_BB_HEAD_U 0x12
174#define CTX_SECOND_BB_HEAD_L 0x14
175#define CTX_SECOND_BB_STATE 0x16
176#define CTX_BB_PER_CTX_PTR 0x18
177#define CTX_RCS_INDIRECT_CTX 0x1a
178#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
179#define CTX_LRI_HEADER_1 0x21
180#define CTX_CTX_TIMESTAMP 0x22
181#define CTX_PDP3_UDW 0x24
182#define CTX_PDP3_LDW 0x26
183#define CTX_PDP2_UDW 0x28
184#define CTX_PDP2_LDW 0x2a
185#define CTX_PDP1_UDW 0x2c
186#define CTX_PDP1_LDW 0x2e
187#define CTX_PDP0_UDW 0x30
188#define CTX_PDP0_LDW 0x32
189#define CTX_LRI_HEADER_2 0x41
190#define CTX_R_PWR_CLK_STATE 0x42
191#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
192
193#define GEN8_CTX_VALID (1<<0)
194#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
195#define GEN8_CTX_FORCE_RESTORE (1<<2)
196#define GEN8_CTX_L3LLC_COHERENT (1<<5)
197#define GEN8_CTX_PRIVILEGE (1<<8)
198
199#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
200 (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
201 (reg_state)[(pos)+1] = (val); \
202} while (0)
203
204#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
205 const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
206 reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
207 reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
208} while (0)
209
210#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
211 reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
212 reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
213} while (0)
214
215enum {
216 FAULT_AND_HANG = 0,
217 FAULT_AND_HALT,
218 FAULT_AND_STREAM,
219 FAULT_AND_CONTINUE
220};
221#define GEN8_CTX_ID_SHIFT 32
222#define GEN8_CTX_ID_WIDTH 21
223#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
224#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
225
226
227#define EXECLISTS_REQUEST_SIZE 64
228
229#define WA_TAIL_DWORDS 2
230
231static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
232 struct intel_engine_cs *engine);
233static int intel_lr_context_pin(struct i915_gem_context *ctx,
234 struct intel_engine_cs *engine);
235static void execlists_init_reg_state(u32 *reg_state,
236 struct i915_gem_context *ctx,
237 struct intel_engine_cs *engine,
238 struct intel_ring *ring);
239
240
241
242
243
244
245
246
247
248
249
250int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists)
251{
252
253
254
255 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && intel_vgpu_active(dev_priv))
256 return 1;
257
258 if (INTEL_GEN(dev_priv) >= 9)
259 return 1;
260
261 if (enable_execlists == 0)
262 return 0;
263
264 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
265 USES_PPGTT(dev_priv) &&
266 i915.use_mmio_flip >= 0)
267 return 1;
268
269 return 0;
270}
271
272static void
273logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
274{
275 struct drm_i915_private *dev_priv = engine->i915;
276
277 engine->disable_lite_restore_wa =
278 (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
279 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
280 (engine->id == VCS || engine->id == VCS2);
281
282 engine->ctx_desc_template = GEN8_CTX_VALID;
283 if (IS_GEN8(dev_priv))
284 engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
285 engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
286
287
288
289
290
291
292
293 if (engine->disable_lite_restore_wa)
294 engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
295}
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316static void
317intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
318 struct intel_engine_cs *engine)
319{
320 struct intel_context *ce = &ctx->engine[engine->id];
321 u64 desc;
322
323 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
324
325 desc = ctx->desc_template;
326 desc |= engine->ctx_desc_template;
327 desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
328
329 desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;
330
331 ce->lrc_desc = desc;
332}
333
334uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
335 struct intel_engine_cs *engine)
336{
337 return ctx->engine[engine->id].lrc_desc;
338}
339
340static inline void
341execlists_context_status_change(struct drm_i915_gem_request *rq,
342 unsigned long status)
343{
344
345
346
347
348 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
349 return;
350
351 atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
352}
353
354static void
355execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
356{
357 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
358 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
359 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
360 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
361}
362
363static u64 execlists_update_context(struct drm_i915_gem_request *rq)
364{
365 struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
366 struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
367 u32 *reg_state = ce->lrc_reg_state;
368
369 reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
370
371
372
373
374
375
376 if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
377 execlists_update_context_pdps(ppgtt, reg_state);
378
379 return ce->lrc_desc;
380}
381
382static void execlists_submit_ports(struct intel_engine_cs *engine)
383{
384 struct drm_i915_private *dev_priv = engine->i915;
385 struct execlist_port *port = engine->execlist_port;
386 u32 __iomem *elsp =
387 dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
388 u64 desc[2];
389
390 if (!port[0].count)
391 execlists_context_status_change(port[0].request,
392 INTEL_CONTEXT_SCHEDULE_IN);
393 desc[0] = execlists_update_context(port[0].request);
394 engine->preempt_wa = port[0].count++;
395
396 if (port[1].request) {
397 GEM_BUG_ON(port[1].count);
398 execlists_context_status_change(port[1].request,
399 INTEL_CONTEXT_SCHEDULE_IN);
400 desc[1] = execlists_update_context(port[1].request);
401 port[1].count = 1;
402 } else {
403 desc[1] = 0;
404 }
405 GEM_BUG_ON(desc[0] == desc[1]);
406
407
408 writel(upper_32_bits(desc[1]), elsp);
409 writel(lower_32_bits(desc[1]), elsp);
410
411 writel(upper_32_bits(desc[0]), elsp);
412
413 writel(lower_32_bits(desc[0]), elsp);
414}
415
416static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
417{
418 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
419 ctx->execlists_force_single_submission);
420}
421
422static bool can_merge_ctx(const struct i915_gem_context *prev,
423 const struct i915_gem_context *next)
424{
425 if (prev != next)
426 return false;
427
428 if (ctx_single_port_submission(prev))
429 return false;
430
431 return true;
432}
433
434static void execlists_dequeue(struct intel_engine_cs *engine)
435{
436 struct drm_i915_gem_request *cursor, *last;
437 struct execlist_port *port = engine->execlist_port;
438 bool submit = false;
439
440 last = port->request;
441 if (last)
442
443
444
445
446
447
448 last->tail = last->wa_tail;
449
450 GEM_BUG_ON(port[1].request);
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 spin_lock(&engine->execlist_lock);
474 list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
475
476
477
478
479
480
481
482
483
484 if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
485
486
487
488 if (port != engine->execlist_port)
489 break;
490
491
492
493
494
495
496
497 if (ctx_single_port_submission(cursor->ctx))
498 break;
499
500 GEM_BUG_ON(last->ctx == cursor->ctx);
501
502 i915_gem_request_assign(&port->request, last);
503 port++;
504 }
505 last = cursor;
506 submit = true;
507 }
508 if (submit) {
509
510 engine->execlist_queue.next = &cursor->execlist_link;
511 cursor->execlist_link.prev = &engine->execlist_queue;
512
513 i915_gem_request_assign(&port->request, last);
514 }
515 spin_unlock(&engine->execlist_lock);
516
517 if (submit)
518 execlists_submit_ports(engine);
519}
520
521static bool execlists_elsp_idle(struct intel_engine_cs *engine)
522{
523 return !engine->execlist_port[0].request;
524}
525
526static bool execlists_elsp_ready(struct intel_engine_cs *engine)
527{
528 int port;
529
530 port = 1;
531 if (engine->disable_lite_restore_wa || engine->preempt_wa)
532 port = 0;
533
534 return !engine->execlist_port[port].request;
535}
536
537
538
539
540
541static void intel_lrc_irq_handler(unsigned long data)
542{
543 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
544 struct execlist_port *port = engine->execlist_port;
545 struct drm_i915_private *dev_priv = engine->i915;
546
547 intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
548
549 if (!execlists_elsp_idle(engine)) {
550 u32 __iomem *csb_mmio =
551 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
552 u32 __iomem *buf =
553 dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
554 unsigned int csb, head, tail;
555
556 csb = readl(csb_mmio);
557 head = GEN8_CSB_READ_PTR(csb);
558 tail = GEN8_CSB_WRITE_PTR(csb);
559 if (tail < head)
560 tail += GEN8_CSB_ENTRIES;
561 while (head < tail) {
562 unsigned int idx = ++head % GEN8_CSB_ENTRIES;
563 unsigned int status = readl(buf + 2 * idx);
564
565 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
566 continue;
567
568 GEM_BUG_ON(port[0].count == 0);
569 if (--port[0].count == 0) {
570 GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
571 execlists_context_status_change(port[0].request,
572 INTEL_CONTEXT_SCHEDULE_OUT);
573
574 i915_gem_request_put(port[0].request);
575 port[0] = port[1];
576 memset(&port[1], 0, sizeof(port[1]));
577
578 engine->preempt_wa = false;
579 }
580
581 GEM_BUG_ON(port[0].count == 0 &&
582 !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
583 }
584
585 writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
586 GEN8_CSB_WRITE_PTR(csb) << 8),
587 csb_mmio);
588 }
589
590 if (execlists_elsp_ready(engine))
591 execlists_dequeue(engine);
592
593 intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
594}
595
596static void execlists_submit_request(struct drm_i915_gem_request *request)
597{
598 struct intel_engine_cs *engine = request->engine;
599 unsigned long flags;
600
601 spin_lock_irqsave(&engine->execlist_lock, flags);
602
603 list_add_tail(&request->execlist_link, &engine->execlist_queue);
604 if (execlists_elsp_idle(engine))
605 tasklet_hi_schedule(&engine->irq_tasklet);
606
607 spin_unlock_irqrestore(&engine->execlist_lock, flags);
608}
609
610int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
611{
612 struct intel_engine_cs *engine = request->engine;
613 struct intel_context *ce = &request->ctx->engine[engine->id];
614 int ret;
615
616
617
618
619
620 request->reserved_space += EXECLISTS_REQUEST_SIZE;
621
622 if (!ce->state) {
623 ret = execlists_context_deferred_alloc(request->ctx, engine);
624 if (ret)
625 return ret;
626 }
627
628 request->ring = ce->ring;
629
630 ret = intel_lr_context_pin(request->ctx, engine);
631 if (ret)
632 return ret;
633
634 if (i915.enable_guc_submission) {
635
636
637
638
639
640 ret = i915_guc_wq_reserve(request);
641 if (ret)
642 goto err_unpin;
643 }
644
645 ret = intel_ring_begin(request, 0);
646 if (ret)
647 goto err_unreserve;
648
649 if (!ce->initialised) {
650 ret = engine->init_context(request);
651 if (ret)
652 goto err_unreserve;
653
654 ce->initialised = true;
655 }
656
657
658
659
660
661
662
663
664 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
665 return 0;
666
667err_unreserve:
668 if (i915.enable_guc_submission)
669 i915_guc_wq_unreserve(request);
670err_unpin:
671 intel_lr_context_unpin(request->ctx, engine);
672 return ret;
673}
674
675
676
677
678
679
680
681
682
683
684static int
685intel_logical_ring_advance(struct drm_i915_gem_request *request)
686{
687 struct intel_ring *ring = request->ring;
688 struct intel_engine_cs *engine = request->engine;
689
690 intel_ring_advance(ring);
691 request->tail = ring->tail;
692
693
694
695
696
697
698
699 intel_ring_emit(ring, MI_NOOP);
700 intel_ring_emit(ring, MI_NOOP);
701 intel_ring_advance(ring);
702 request->wa_tail = ring->tail;
703
704
705
706
707
708
709
710 request->previous_context = engine->last_context;
711 engine->last_context = request->ctx;
712 return 0;
713}
714
715static int intel_lr_context_pin(struct i915_gem_context *ctx,
716 struct intel_engine_cs *engine)
717{
718 struct intel_context *ce = &ctx->engine[engine->id];
719 void *vaddr;
720 int ret;
721
722 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
723
724 if (ce->pin_count++)
725 return 0;
726
727 ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
728 PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
729 if (ret)
730 goto err;
731
732 vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
733 if (IS_ERR(vaddr)) {
734 ret = PTR_ERR(vaddr);
735 goto unpin_vma;
736 }
737
738 ret = intel_ring_pin(ce->ring);
739 if (ret)
740 goto unpin_map;
741
742 intel_lr_context_descriptor_update(ctx, engine);
743
744 ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
745 ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
746 i915_ggtt_offset(ce->ring->vma);
747
748 ce->state->obj->dirty = true;
749
750
751 if (i915.enable_guc_submission) {
752 struct drm_i915_private *dev_priv = ctx->i915;
753 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
754 }
755
756 i915_gem_context_get(ctx);
757 return 0;
758
759unpin_map:
760 i915_gem_object_unpin_map(ce->state->obj);
761unpin_vma:
762 __i915_vma_unpin(ce->state);
763err:
764 ce->pin_count = 0;
765 return ret;
766}
767
768void intel_lr_context_unpin(struct i915_gem_context *ctx,
769 struct intel_engine_cs *engine)
770{
771 struct intel_context *ce = &ctx->engine[engine->id];
772
773 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
774 GEM_BUG_ON(ce->pin_count == 0);
775
776 if (--ce->pin_count)
777 return;
778
779 intel_ring_unpin(ce->ring);
780
781 i915_gem_object_unpin_map(ce->state->obj);
782 i915_vma_unpin(ce->state);
783
784 i915_gem_context_put(ctx);
785}
786
787static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
788{
789 int ret, i;
790 struct intel_ring *ring = req->ring;
791 struct i915_workarounds *w = &req->i915->workarounds;
792
793 if (w->count == 0)
794 return 0;
795
796 ret = req->engine->emit_flush(req, EMIT_BARRIER);
797 if (ret)
798 return ret;
799
800 ret = intel_ring_begin(req, w->count * 2 + 2);
801 if (ret)
802 return ret;
803
804 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
805 for (i = 0; i < w->count; i++) {
806 intel_ring_emit_reg(ring, w->reg[i].addr);
807 intel_ring_emit(ring, w->reg[i].value);
808 }
809 intel_ring_emit(ring, MI_NOOP);
810
811 intel_ring_advance(ring);
812
813 ret = req->engine->emit_flush(req, EMIT_BARRIER);
814 if (ret)
815 return ret;
816
817 return 0;
818}
819
820#define wa_ctx_emit(batch, index, cmd) \
821 do { \
822 int __index = (index)++; \
823 if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
824 return -ENOSPC; \
825 } \
826 batch[__index] = (cmd); \
827 } while (0)
828
829#define wa_ctx_emit_reg(batch, index, reg) \
830 wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
849 uint32_t *batch,
850 uint32_t index)
851{
852 struct drm_i915_private *dev_priv = engine->i915;
853 uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
854
855
856
857
858
859
860
861 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
862 IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
863 l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
864
865 wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
866 MI_SRM_LRM_GLOBAL_GTT));
867 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
868 wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
869 wa_ctx_emit(batch, index, 0);
870
871 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
872 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
873 wa_ctx_emit(batch, index, l3sqc4_flush);
874
875 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
876 wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
877 PIPE_CONTROL_DC_FLUSH_ENABLE));
878 wa_ctx_emit(batch, index, 0);
879 wa_ctx_emit(batch, index, 0);
880 wa_ctx_emit(batch, index, 0);
881 wa_ctx_emit(batch, index, 0);
882
883 wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
884 MI_SRM_LRM_GLOBAL_GTT));
885 wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
886 wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
887 wa_ctx_emit(batch, index, 0);
888
889 return index;
890}
891
892static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
893 uint32_t offset,
894 uint32_t start_alignment)
895{
896 return wa_ctx->offset = ALIGN(offset, start_alignment);
897}
898
899static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
900 uint32_t offset,
901 uint32_t size_alignment)
902{
903 wa_ctx->size = offset - wa_ctx->offset;
904
905 WARN(wa_ctx->size % size_alignment,
906 "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
907 wa_ctx->size, size_alignment);
908 return 0;
909}
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
927 struct i915_wa_ctx_bb *wa_ctx,
928 uint32_t *batch,
929 uint32_t *offset)
930{
931 uint32_t scratch_addr;
932 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
933
934
935 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
936
937
938 if (IS_BROADWELL(engine->i915)) {
939 int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index);
940 if (rc < 0)
941 return rc;
942 index = rc;
943 }
944
945
946
947 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
948
949 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
950 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
951 PIPE_CONTROL_GLOBAL_GTT_IVB |
952 PIPE_CONTROL_CS_STALL |
953 PIPE_CONTROL_QW_WRITE));
954 wa_ctx_emit(batch, index, scratch_addr);
955 wa_ctx_emit(batch, index, 0);
956 wa_ctx_emit(batch, index, 0);
957 wa_ctx_emit(batch, index, 0);
958
959
960 while (index % CACHELINE_DWORDS)
961 wa_ctx_emit(batch, index, MI_NOOP);
962
963
964
965
966
967
968
969 return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
970}
971
972
973
974
975
976
977
978
979
980
981static int gen8_init_perctx_bb(struct intel_engine_cs *engine,
982 struct i915_wa_ctx_bb *wa_ctx,
983 uint32_t *batch,
984 uint32_t *offset)
985{
986 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
987
988
989 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
990
991 wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
992
993 return wa_ctx_end(wa_ctx, *offset = index, 1);
994}
995
996static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
997 struct i915_wa_ctx_bb *wa_ctx,
998 uint32_t *batch,
999 uint32_t *offset)
1000{
1001 int ret;
1002 struct drm_i915_private *dev_priv = engine->i915;
1003 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1004
1005
1006 if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) ||
1007 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1008 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
1009
1010
1011 ret = gen8_emit_flush_coherentl3_wa(engine, batch, index);
1012 if (ret < 0)
1013 return ret;
1014 index = ret;
1015
1016
1017 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1018 wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2);
1019 wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(
1020 GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE));
1021 wa_ctx_emit(batch, index, MI_NOOP);
1022
1023
1024
1025 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
1026 u32 scratch_addr =
1027 i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1028
1029 wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1030 wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
1031 PIPE_CONTROL_GLOBAL_GTT_IVB |
1032 PIPE_CONTROL_CS_STALL |
1033 PIPE_CONTROL_QW_WRITE));
1034 wa_ctx_emit(batch, index, scratch_addr);
1035 wa_ctx_emit(batch, index, 0);
1036 wa_ctx_emit(batch, index, 0);
1037 wa_ctx_emit(batch, index, 0);
1038 }
1039
1040
1041 if (HAS_POOLED_EU(engine->i915)) {
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055 u32 eu_pool_config = 0x00777000;
1056 wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE);
1057 wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE);
1058 wa_ctx_emit(batch, index, eu_pool_config);
1059 wa_ctx_emit(batch, index, 0);
1060 wa_ctx_emit(batch, index, 0);
1061 wa_ctx_emit(batch, index, 0);
1062 }
1063
1064
1065 while (index % CACHELINE_DWORDS)
1066 wa_ctx_emit(batch, index, MI_NOOP);
1067
1068 return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
1069}
1070
1071static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
1072 struct i915_wa_ctx_bb *wa_ctx,
1073 uint32_t *batch,
1074 uint32_t *offset)
1075{
1076 uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1077
1078
1079 if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_B0) ||
1080 IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) {
1081 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1082 wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
1083 wa_ctx_emit(batch, index,
1084 _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
1085 wa_ctx_emit(batch, index, MI_NOOP);
1086 }
1087
1088
1089 if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) {
1090 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4));
1091
1092 wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK);
1093 wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
1094
1095 wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1);
1096 wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
1097
1098 wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2);
1099 wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS));
1100
1101 wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2);
1102
1103 wa_ctx_emit(batch, index, 0x0);
1104 wa_ctx_emit(batch, index, MI_NOOP);
1105 }
1106
1107
1108 if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_D0) ||
1109 IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
1110 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
1111
1112 wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1113
1114 return wa_ctx_end(wa_ctx, *offset = index, 1);
1115}
1116
1117static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
1118{
1119 struct drm_i915_gem_object *obj;
1120 struct i915_vma *vma;
1121 int err;
1122
1123 obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size));
1124 if (IS_ERR(obj))
1125 return PTR_ERR(obj);
1126
1127 vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
1128 if (IS_ERR(vma)) {
1129 err = PTR_ERR(vma);
1130 goto err;
1131 }
1132
1133 err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
1134 if (err)
1135 goto err;
1136
1137 engine->wa_ctx.vma = vma;
1138 return 0;
1139
1140err:
1141 i915_gem_object_put(obj);
1142 return err;
1143}
1144
1145static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
1146{
1147 i915_vma_unpin_and_release(&engine->wa_ctx.vma);
1148}
1149
1150static int intel_init_workaround_bb(struct intel_engine_cs *engine)
1151{
1152 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1153 uint32_t *batch;
1154 uint32_t offset;
1155 struct page *page;
1156 int ret;
1157
1158 WARN_ON(engine->id != RCS);
1159
1160
1161 if (INTEL_GEN(engine->i915) > 9) {
1162 DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
1163 INTEL_GEN(engine->i915));
1164 return 0;
1165 }
1166
1167
1168 if (!engine->scratch) {
1169 DRM_ERROR("scratch page not allocated for %s\n", engine->name);
1170 return -EINVAL;
1171 }
1172
1173 ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
1174 if (ret) {
1175 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
1176 return ret;
1177 }
1178
1179 page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
1180 batch = kmap_atomic(page);
1181 offset = 0;
1182
1183 if (IS_GEN8(engine->i915)) {
1184 ret = gen8_init_indirectctx_bb(engine,
1185 &wa_ctx->indirect_ctx,
1186 batch,
1187 &offset);
1188 if (ret)
1189 goto out;
1190
1191 ret = gen8_init_perctx_bb(engine,
1192 &wa_ctx->per_ctx,
1193 batch,
1194 &offset);
1195 if (ret)
1196 goto out;
1197 } else if (IS_GEN9(engine->i915)) {
1198 ret = gen9_init_indirectctx_bb(engine,
1199 &wa_ctx->indirect_ctx,
1200 batch,
1201 &offset);
1202 if (ret)
1203 goto out;
1204
1205 ret = gen9_init_perctx_bb(engine,
1206 &wa_ctx->per_ctx,
1207 batch,
1208 &offset);
1209 if (ret)
1210 goto out;
1211 }
1212
1213out:
1214 kunmap_atomic(batch);
1215 if (ret)
1216 lrc_destroy_wa_ctx_obj(engine);
1217
1218 return ret;
1219}
1220
1221static void lrc_init_hws(struct intel_engine_cs *engine)
1222{
1223 struct drm_i915_private *dev_priv = engine->i915;
1224
1225 I915_WRITE(RING_HWS_PGA(engine->mmio_base),
1226 engine->status_page.ggtt_offset);
1227 POSTING_READ(RING_HWS_PGA(engine->mmio_base));
1228}
1229
1230static int gen8_init_common_ring(struct intel_engine_cs *engine)
1231{
1232 struct drm_i915_private *dev_priv = engine->i915;
1233 int ret;
1234
1235 ret = intel_mocs_init_engine(engine);
1236 if (ret)
1237 return ret;
1238
1239 lrc_init_hws(engine);
1240
1241 intel_engine_reset_breadcrumbs(engine);
1242
1243 I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
1244
1245 I915_WRITE(RING_MODE_GEN7(engine),
1246 _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1247 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1248
1249 DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
1250
1251 intel_engine_init_hangcheck(engine);
1252
1253 if (!execlists_elsp_idle(engine))
1254 execlists_submit_ports(engine);
1255
1256 return 0;
1257}
1258
1259static int gen8_init_render_ring(struct intel_engine_cs *engine)
1260{
1261 struct drm_i915_private *dev_priv = engine->i915;
1262 int ret;
1263
1264 ret = gen8_init_common_ring(engine);
1265 if (ret)
1266 return ret;
1267
1268
1269
1270
1271
1272
1273
1274 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1275
1276 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1277
1278 return init_workarounds_ring(engine);
1279}
1280
1281static int gen9_init_render_ring(struct intel_engine_cs *engine)
1282{
1283 int ret;
1284
1285 ret = gen8_init_common_ring(engine);
1286 if (ret)
1287 return ret;
1288
1289 return init_workarounds_ring(engine);
1290}
1291
1292static void reset_common_ring(struct intel_engine_cs *engine,
1293 struct drm_i915_gem_request *request)
1294{
1295 struct drm_i915_private *dev_priv = engine->i915;
1296 struct execlist_port *port = engine->execlist_port;
1297 struct intel_context *ce = &request->ctx->engine[engine->id];
1298
1299
1300
1301
1302
1303
1304
1305
1306 execlists_init_reg_state(ce->lrc_reg_state,
1307 request->ctx, engine, ce->ring);
1308
1309
1310 ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
1311 i915_ggtt_offset(ce->ring->vma);
1312 ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
1313
1314 request->ring->head = request->postfix;
1315 request->ring->last_retired_head = -1;
1316 intel_ring_update_space(request->ring);
1317
1318 if (i915.enable_guc_submission)
1319 return;
1320
1321
1322 I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
1323 if (request->ctx != port[0].request->ctx) {
1324 i915_gem_request_put(port[0].request);
1325 port[0] = port[1];
1326 memset(&port[1], 0, sizeof(port[1]));
1327 }
1328
1329
1330 GEM_BUG_ON(request->ctx != port[0].request->ctx);
1331 port[0].count = 0;
1332 port[1].count = 0;
1333
1334
1335 request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
1336}
1337
1338static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
1339{
1340 struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
1341 struct intel_ring *ring = req->ring;
1342 struct intel_engine_cs *engine = req->engine;
1343 const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
1344 int i, ret;
1345
1346 ret = intel_ring_begin(req, num_lri_cmds * 2 + 2);
1347 if (ret)
1348 return ret;
1349
1350 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds));
1351 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
1352 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1353
1354 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i));
1355 intel_ring_emit(ring, upper_32_bits(pd_daddr));
1356 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i));
1357 intel_ring_emit(ring, lower_32_bits(pd_daddr));
1358 }
1359
1360 intel_ring_emit(ring, MI_NOOP);
1361 intel_ring_advance(ring);
1362
1363 return 0;
1364}
1365
1366static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1367 u64 offset, u32 len,
1368 unsigned int dispatch_flags)
1369{
1370 struct intel_ring *ring = req->ring;
1371 bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
1372 int ret;
1373
1374
1375
1376
1377
1378
1379
1380 if (req->ctx->ppgtt &&
1381 (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) {
1382 if (!USES_FULL_48BIT_PPGTT(req->i915) &&
1383 !intel_vgpu_active(req->i915)) {
1384 ret = intel_logical_ring_emit_pdps(req);
1385 if (ret)
1386 return ret;
1387 }
1388
1389 req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
1390 }
1391
1392 ret = intel_ring_begin(req, 4);
1393 if (ret)
1394 return ret;
1395
1396
1397 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 |
1398 (ppgtt<<8) |
1399 (dispatch_flags & I915_DISPATCH_RS ?
1400 MI_BATCH_RESOURCE_STREAMER : 0));
1401 intel_ring_emit(ring, lower_32_bits(offset));
1402 intel_ring_emit(ring, upper_32_bits(offset));
1403 intel_ring_emit(ring, MI_NOOP);
1404 intel_ring_advance(ring);
1405
1406 return 0;
1407}
1408
1409static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
1410{
1411 struct drm_i915_private *dev_priv = engine->i915;
1412 I915_WRITE_IMR(engine,
1413 ~(engine->irq_enable_mask | engine->irq_keep_mask));
1414 POSTING_READ_FW(RING_IMR(engine->mmio_base));
1415}
1416
1417static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
1418{
1419 struct drm_i915_private *dev_priv = engine->i915;
1420 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
1421}
1422
1423static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode)
1424{
1425 struct intel_ring *ring = request->ring;
1426 u32 cmd;
1427 int ret;
1428
1429 ret = intel_ring_begin(request, 4);
1430 if (ret)
1431 return ret;
1432
1433 cmd = MI_FLUSH_DW + 1;
1434
1435
1436
1437
1438
1439
1440 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1441
1442 if (mode & EMIT_INVALIDATE) {
1443 cmd |= MI_INVALIDATE_TLB;
1444 if (request->engine->id == VCS)
1445 cmd |= MI_INVALIDATE_BSD;
1446 }
1447
1448 intel_ring_emit(ring, cmd);
1449 intel_ring_emit(ring,
1450 I915_GEM_HWS_SCRATCH_ADDR |
1451 MI_FLUSH_DW_USE_GTT);
1452 intel_ring_emit(ring, 0);
1453 intel_ring_emit(ring, 0);
1454 intel_ring_advance(ring);
1455
1456 return 0;
1457}
1458
1459static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1460 u32 mode)
1461{
1462 struct intel_ring *ring = request->ring;
1463 struct intel_engine_cs *engine = request->engine;
1464 u32 scratch_addr =
1465 i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
1466 bool vf_flush_wa = false, dc_flush_wa = false;
1467 u32 flags = 0;
1468 int ret;
1469 int len;
1470
1471 flags |= PIPE_CONTROL_CS_STALL;
1472
1473 if (mode & EMIT_FLUSH) {
1474 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1475 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1476 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
1477 flags |= PIPE_CONTROL_FLUSH_ENABLE;
1478 }
1479
1480 if (mode & EMIT_INVALIDATE) {
1481 flags |= PIPE_CONTROL_TLB_INVALIDATE;
1482 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1483 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1484 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1485 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1486 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1487 flags |= PIPE_CONTROL_QW_WRITE;
1488 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1489
1490
1491
1492
1493
1494 if (IS_GEN9(request->i915))
1495 vf_flush_wa = true;
1496
1497
1498 if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
1499 dc_flush_wa = true;
1500 }
1501
1502 len = 6;
1503
1504 if (vf_flush_wa)
1505 len += 6;
1506
1507 if (dc_flush_wa)
1508 len += 12;
1509
1510 ret = intel_ring_begin(request, len);
1511 if (ret)
1512 return ret;
1513
1514 if (vf_flush_wa) {
1515 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1516 intel_ring_emit(ring, 0);
1517 intel_ring_emit(ring, 0);
1518 intel_ring_emit(ring, 0);
1519 intel_ring_emit(ring, 0);
1520 intel_ring_emit(ring, 0);
1521 }
1522
1523 if (dc_flush_wa) {
1524 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1525 intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE);
1526 intel_ring_emit(ring, 0);
1527 intel_ring_emit(ring, 0);
1528 intel_ring_emit(ring, 0);
1529 intel_ring_emit(ring, 0);
1530 }
1531
1532 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1533 intel_ring_emit(ring, flags);
1534 intel_ring_emit(ring, scratch_addr);
1535 intel_ring_emit(ring, 0);
1536 intel_ring_emit(ring, 0);
1537 intel_ring_emit(ring, 0);
1538
1539 if (dc_flush_wa) {
1540 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1541 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL);
1542 intel_ring_emit(ring, 0);
1543 intel_ring_emit(ring, 0);
1544 intel_ring_emit(ring, 0);
1545 intel_ring_emit(ring, 0);
1546 }
1547
1548 intel_ring_advance(ring);
1549
1550 return 0;
1551}
1552
1553static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
1554{
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565 intel_flush_status_page(engine, I915_GEM_HWS_INDEX);
1566}
1567
1568
1569
1570
1571
1572
1573
1574static int gen8_emit_request(struct drm_i915_gem_request *request)
1575{
1576 struct intel_ring *ring = request->ring;
1577 int ret;
1578
1579 ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
1580 if (ret)
1581 return ret;
1582
1583
1584 BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
1585
1586 intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
1587 intel_ring_emit(ring,
1588 intel_hws_seqno_address(request->engine) |
1589 MI_FLUSH_DW_USE_GTT);
1590 intel_ring_emit(ring, 0);
1591 intel_ring_emit(ring, request->fence.seqno);
1592 intel_ring_emit(ring, MI_USER_INTERRUPT);
1593 intel_ring_emit(ring, MI_NOOP);
1594 return intel_logical_ring_advance(request);
1595}
1596
1597static int gen8_emit_request_render(struct drm_i915_gem_request *request)
1598{
1599 struct intel_ring *ring = request->ring;
1600 int ret;
1601
1602 ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
1603 if (ret)
1604 return ret;
1605
1606
1607 BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
1608
1609
1610
1611
1612
1613 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
1614 intel_ring_emit(ring,
1615 (PIPE_CONTROL_GLOBAL_GTT_IVB |
1616 PIPE_CONTROL_CS_STALL |
1617 PIPE_CONTROL_QW_WRITE));
1618 intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
1619 intel_ring_emit(ring, 0);
1620 intel_ring_emit(ring, i915_gem_request_get_seqno(request));
1621
1622 intel_ring_emit(ring, 0);
1623 intel_ring_emit(ring, MI_USER_INTERRUPT);
1624 intel_ring_emit(ring, MI_NOOP);
1625 return intel_logical_ring_advance(request);
1626}
1627
1628static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
1629{
1630 int ret;
1631
1632 ret = intel_logical_ring_workarounds_emit(req);
1633 if (ret)
1634 return ret;
1635
1636 ret = intel_rcs_context_init_mocs(req);
1637
1638
1639
1640
1641 if (ret)
1642 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1643
1644 return i915_gem_render_state_init(req);
1645}
1646
1647
1648
1649
1650
1651void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
1652{
1653 struct drm_i915_private *dev_priv;
1654
1655 if (!intel_engine_initialized(engine))
1656 return;
1657
1658
1659
1660
1661
1662 if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->irq_tasklet.state)))
1663 tasklet_kill(&engine->irq_tasklet);
1664
1665 dev_priv = engine->i915;
1666
1667 if (engine->buffer) {
1668 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
1669 }
1670
1671 if (engine->cleanup)
1672 engine->cleanup(engine);
1673
1674 intel_engine_cleanup_common(engine);
1675
1676 if (engine->status_page.vma) {
1677 i915_gem_object_unpin_map(engine->status_page.vma->obj);
1678 engine->status_page.vma = NULL;
1679 }
1680 intel_lr_context_unpin(dev_priv->kernel_context, engine);
1681
1682 lrc_destroy_wa_ctx_obj(engine);
1683 engine->i915 = NULL;
1684}
1685
1686void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
1687{
1688 struct intel_engine_cs *engine;
1689
1690 for_each_engine(engine, dev_priv)
1691 engine->submit_request = execlists_submit_request;
1692}
1693
1694static void
1695logical_ring_default_vfuncs(struct intel_engine_cs *engine)
1696{
1697
1698 engine->init_hw = gen8_init_common_ring;
1699 engine->reset_hw = reset_common_ring;
1700 engine->emit_flush = gen8_emit_flush;
1701 engine->emit_request = gen8_emit_request;
1702 engine->submit_request = execlists_submit_request;
1703
1704 engine->irq_enable = gen8_logical_ring_enable_irq;
1705 engine->irq_disable = gen8_logical_ring_disable_irq;
1706 engine->emit_bb_start = gen8_emit_bb_start;
1707 if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
1708 engine->irq_seqno_barrier = bxt_a_seqno_barrier;
1709}
1710
1711static inline void
1712logical_ring_default_irqs(struct intel_engine_cs *engine)
1713{
1714 unsigned shift = engine->irq_shift;
1715 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
1716 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
1717}
1718
1719static int
1720lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
1721{
1722 const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
1723 void *hws;
1724
1725
1726 hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1727 if (IS_ERR(hws))
1728 return PTR_ERR(hws);
1729
1730 engine->status_page.page_addr = hws + hws_offset;
1731 engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
1732 engine->status_page.vma = vma;
1733
1734 return 0;
1735}
1736
1737static void
1738logical_ring_setup(struct intel_engine_cs *engine)
1739{
1740 struct drm_i915_private *dev_priv = engine->i915;
1741 enum forcewake_domains fw_domains;
1742
1743 intel_engine_setup_common(engine);
1744
1745
1746 engine->buffer = NULL;
1747
1748 fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
1749 RING_ELSP(engine),
1750 FW_REG_WRITE);
1751
1752 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1753 RING_CONTEXT_STATUS_PTR(engine),
1754 FW_REG_READ | FW_REG_WRITE);
1755
1756 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
1757 RING_CONTEXT_STATUS_BUF_BASE(engine),
1758 FW_REG_READ);
1759
1760 engine->fw_domains = fw_domains;
1761
1762 tasklet_init(&engine->irq_tasklet,
1763 intel_lrc_irq_handler, (unsigned long)engine);
1764
1765 logical_ring_init_platform_invariants(engine);
1766 logical_ring_default_vfuncs(engine);
1767 logical_ring_default_irqs(engine);
1768}
1769
1770static int
1771logical_ring_init(struct intel_engine_cs *engine)
1772{
1773 struct i915_gem_context *dctx = engine->i915->kernel_context;
1774 int ret;
1775
1776 ret = intel_engine_init_common(engine);
1777 if (ret)
1778 goto error;
1779
1780 ret = execlists_context_deferred_alloc(dctx, engine);
1781 if (ret)
1782 goto error;
1783
1784
1785 ret = intel_lr_context_pin(dctx, engine);
1786 if (ret) {
1787 DRM_ERROR("Failed to pin context for %s: %d\n",
1788 engine->name, ret);
1789 goto error;
1790 }
1791
1792
1793 ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
1794 if (ret) {
1795 DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
1796 goto error;
1797 }
1798
1799 return 0;
1800
1801error:
1802 intel_logical_ring_cleanup(engine);
1803 return ret;
1804}
1805
1806int logical_render_ring_init(struct intel_engine_cs *engine)
1807{
1808 struct drm_i915_private *dev_priv = engine->i915;
1809 int ret;
1810
1811 logical_ring_setup(engine);
1812
1813 if (HAS_L3_DPF(dev_priv))
1814 engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1815
1816
1817 if (INTEL_GEN(dev_priv) >= 9)
1818 engine->init_hw = gen9_init_render_ring;
1819 else
1820 engine->init_hw = gen8_init_render_ring;
1821 engine->init_context = gen8_init_rcs_context;
1822 engine->emit_flush = gen8_emit_flush_render;
1823 engine->emit_request = gen8_emit_request_render;
1824
1825 ret = intel_engine_create_scratch(engine, 4096);
1826 if (ret)
1827 return ret;
1828
1829 ret = intel_init_workaround_bb(engine);
1830 if (ret) {
1831
1832
1833
1834
1835
1836 DRM_ERROR("WA batch buffer initialization failed: %d\n",
1837 ret);
1838 }
1839
1840 ret = logical_ring_init(engine);
1841 if (ret) {
1842 lrc_destroy_wa_ctx_obj(engine);
1843 }
1844
1845 return ret;
1846}
1847
1848int logical_xcs_ring_init(struct intel_engine_cs *engine)
1849{
1850 logical_ring_setup(engine);
1851
1852 return logical_ring_init(engine);
1853}
1854
1855static u32
1856make_rpcs(struct drm_i915_private *dev_priv)
1857{
1858 u32 rpcs = 0;
1859
1860
1861
1862
1863
1864 if (INTEL_GEN(dev_priv) < 9)
1865 return 0;
1866
1867
1868
1869
1870
1871
1872
1873 if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
1874 rpcs |= GEN8_RPCS_S_CNT_ENABLE;
1875 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) <<
1876 GEN8_RPCS_S_CNT_SHIFT;
1877 rpcs |= GEN8_RPCS_ENABLE;
1878 }
1879
1880 if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
1881 rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
1882 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
1883 GEN8_RPCS_SS_CNT_SHIFT;
1884 rpcs |= GEN8_RPCS_ENABLE;
1885 }
1886
1887 if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
1888 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
1889 GEN8_RPCS_EU_MIN_SHIFT;
1890 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
1891 GEN8_RPCS_EU_MAX_SHIFT;
1892 rpcs |= GEN8_RPCS_ENABLE;
1893 }
1894
1895 return rpcs;
1896}
1897
1898static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
1899{
1900 u32 indirect_ctx_offset;
1901
1902 switch (INTEL_GEN(engine->i915)) {
1903 default:
1904 MISSING_CASE(INTEL_GEN(engine->i915));
1905
1906 case 9:
1907 indirect_ctx_offset =
1908 GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
1909 break;
1910 case 8:
1911 indirect_ctx_offset =
1912 GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
1913 break;
1914 }
1915
1916 return indirect_ctx_offset;
1917}
1918
1919static void execlists_init_reg_state(u32 *reg_state,
1920 struct i915_gem_context *ctx,
1921 struct intel_engine_cs *engine,
1922 struct intel_ring *ring)
1923{
1924 struct drm_i915_private *dev_priv = engine->i915;
1925 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
1926
1927
1928
1929
1930
1931
1932 reg_state[CTX_LRI_HEADER_0] =
1933 MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED;
1934 ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL,
1935 RING_CONTEXT_CONTROL(engine),
1936 _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
1937 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
1938 (HAS_RESOURCE_STREAMER(dev_priv) ?
1939 CTX_CTRL_RS_CTX_ENABLE : 0)));
1940 ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
1941 0);
1942 ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
1943 0);
1944 ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START,
1945 RING_START(engine->mmio_base), 0);
1946 ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
1947 RING_CTL(engine->mmio_base),
1948 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
1949 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
1950 RING_BBADDR_UDW(engine->mmio_base), 0);
1951 ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
1952 RING_BBADDR(engine->mmio_base), 0);
1953 ASSIGN_CTX_REG(reg_state, CTX_BB_STATE,
1954 RING_BBSTATE(engine->mmio_base),
1955 RING_BB_PPGTT);
1956 ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U,
1957 RING_SBBADDR_UDW(engine->mmio_base), 0);
1958 ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L,
1959 RING_SBBADDR(engine->mmio_base), 0);
1960 ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE,
1961 RING_SBBSTATE(engine->mmio_base), 0);
1962 if (engine->id == RCS) {
1963 ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR,
1964 RING_BB_PER_CTX_PTR(engine->mmio_base), 0);
1965 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX,
1966 RING_INDIRECT_CTX(engine->mmio_base), 0);
1967 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET,
1968 RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0);
1969 if (engine->wa_ctx.vma) {
1970 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1971 u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
1972
1973 reg_state[CTX_RCS_INDIRECT_CTX+1] =
1974 (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
1975 (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
1976
1977 reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
1978 intel_lr_indirect_ctx_offset(engine) << 6;
1979
1980 reg_state[CTX_BB_PER_CTX_PTR+1] =
1981 (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
1982 0x01;
1983 }
1984 }
1985 reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
1986 ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP,
1987 RING_CTX_TIMESTAMP(engine->mmio_base), 0);
1988
1989 ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3),
1990 0);
1991 ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3),
1992 0);
1993 ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2),
1994 0);
1995 ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2),
1996 0);
1997 ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1),
1998 0);
1999 ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1),
2000 0);
2001 ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0),
2002 0);
2003 ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0),
2004 0);
2005
2006 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
2007
2008
2009
2010
2011 ASSIGN_CTX_PML4(ppgtt, reg_state);
2012 } else {
2013
2014
2015
2016
2017
2018 execlists_update_context_pdps(ppgtt, reg_state);
2019 }
2020
2021 if (engine->id == RCS) {
2022 reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
2023 ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
2024 make_rpcs(dev_priv));
2025 }
2026}
2027
2028static int
2029populate_lr_context(struct i915_gem_context *ctx,
2030 struct drm_i915_gem_object *ctx_obj,
2031 struct intel_engine_cs *engine,
2032 struct intel_ring *ring)
2033{
2034 void *vaddr;
2035 int ret;
2036
2037 ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
2038 if (ret) {
2039 DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
2040 return ret;
2041 }
2042
2043 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
2044 if (IS_ERR(vaddr)) {
2045 ret = PTR_ERR(vaddr);
2046 DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
2047 return ret;
2048 }
2049 ctx_obj->dirty = true;
2050
2051
2052
2053
2054 execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
2055 ctx, engine, ring);
2056
2057 i915_gem_object_unpin_map(ctx_obj);
2058
2059 return 0;
2060}
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
2077{
2078 int ret = 0;
2079
2080 WARN_ON(INTEL_GEN(engine->i915) < 8);
2081
2082 switch (engine->id) {
2083 case RCS:
2084 if (INTEL_GEN(engine->i915) >= 9)
2085 ret = GEN9_LR_CONTEXT_RENDER_SIZE;
2086 else
2087 ret = GEN8_LR_CONTEXT_RENDER_SIZE;
2088 break;
2089 case VCS:
2090 case BCS:
2091 case VECS:
2092 case VCS2:
2093 ret = GEN8_LR_CONTEXT_OTHER_SIZE;
2094 break;
2095 }
2096
2097 return ret;
2098}
2099
2100static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
2101 struct intel_engine_cs *engine)
2102{
2103 struct drm_i915_gem_object *ctx_obj;
2104 struct intel_context *ce = &ctx->engine[engine->id];
2105 struct i915_vma *vma;
2106 uint32_t context_size;
2107 struct intel_ring *ring;
2108 int ret;
2109
2110 WARN_ON(ce->state);
2111
2112 context_size = round_up(intel_lr_context_size(engine), 4096);
2113
2114
2115 context_size += PAGE_SIZE * LRC_PPHWSP_PN;
2116
2117 ctx_obj = i915_gem_object_create(&ctx->i915->drm, context_size);
2118 if (IS_ERR(ctx_obj)) {
2119 DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
2120 return PTR_ERR(ctx_obj);
2121 }
2122
2123 vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL);
2124 if (IS_ERR(vma)) {
2125 ret = PTR_ERR(vma);
2126 goto error_deref_obj;
2127 }
2128
2129 ring = intel_engine_create_ring(engine, ctx->ring_size);
2130 if (IS_ERR(ring)) {
2131 ret = PTR_ERR(ring);
2132 goto error_deref_obj;
2133 }
2134
2135 ret = populate_lr_context(ctx, ctx_obj, engine, ring);
2136 if (ret) {
2137 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
2138 goto error_ring_free;
2139 }
2140
2141 ce->ring = ring;
2142 ce->state = vma;
2143 ce->initialised = engine->init_context == NULL;
2144
2145 return 0;
2146
2147error_ring_free:
2148 intel_ring_free(ring);
2149error_deref_obj:
2150 i915_gem_object_put(ctx_obj);
2151 return ret;
2152}
2153
2154void intel_lr_context_resume(struct drm_i915_private *dev_priv)
2155{
2156 struct i915_gem_context *ctx = dev_priv->kernel_context;
2157 struct intel_engine_cs *engine;
2158
2159 for_each_engine(engine, dev_priv) {
2160 struct intel_context *ce = &ctx->engine[engine->id];
2161 void *vaddr;
2162 uint32_t *reg_state;
2163
2164 if (!ce->state)
2165 continue;
2166
2167 vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
2168 if (WARN_ON(IS_ERR(vaddr)))
2169 continue;
2170
2171 reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
2172
2173 reg_state[CTX_RING_HEAD+1] = 0;
2174 reg_state[CTX_RING_TAIL+1] = 0;
2175
2176 ce->state->obj->dirty = true;
2177 i915_gem_object_unpin_map(ce->state->obj);
2178
2179 ce->ring->head = 0;
2180 ce->ring->tail = 0;
2181 }
2182}
2183