1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/module.h>
25#include <linux/platform_device.h>
26#include <linux/pm_runtime.h>
27#include <linux/device.h>
28#include <linux/io.h>
29#include <linux/sched/signal.h>
30#include <linux/dma-fence-array.h>
31
32#include "uapi/drm/vc4_drm.h"
33#include "vc4_drv.h"
34#include "vc4_regs.h"
35#include "vc4_trace.h"
36
37static void
38vc4_queue_hangcheck(struct drm_device *dev)
39{
40 struct vc4_dev *vc4 = to_vc4_dev(dev);
41
42 mod_timer(&vc4->hangcheck.timer,
43 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
44}
45
46struct vc4_hang_state {
47 struct drm_vc4_get_hang_state user_state;
48
49 u32 bo_count;
50 struct drm_gem_object **bo;
51};
52
53static void
54vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
55{
56 unsigned int i;
57
58 for (i = 0; i < state->user_state.bo_count; i++)
59 drm_gem_object_put_unlocked(state->bo[i]);
60
61 kfree(state);
62}
63
64int
65vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
66 struct drm_file *file_priv)
67{
68 struct drm_vc4_get_hang_state *get_state = data;
69 struct drm_vc4_get_hang_state_bo *bo_state;
70 struct vc4_hang_state *kernel_state;
71 struct drm_vc4_get_hang_state *state;
72 struct vc4_dev *vc4 = to_vc4_dev(dev);
73 unsigned long irqflags;
74 u32 i;
75 int ret = 0;
76
77 if (!vc4->v3d) {
78 DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
79 return -ENODEV;
80 }
81
82 spin_lock_irqsave(&vc4->job_lock, irqflags);
83 kernel_state = vc4->hang_state;
84 if (!kernel_state) {
85 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
86 return -ENOENT;
87 }
88 state = &kernel_state->user_state;
89
90
91
92
93 if (get_state->bo_count < state->bo_count) {
94 get_state->bo_count = state->bo_count;
95 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
96 return 0;
97 }
98
99 vc4->hang_state = NULL;
100 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
101
102
103 state->bo = get_state->bo;
104 memcpy(get_state, state, sizeof(*state));
105
106 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
107 if (!bo_state) {
108 ret = -ENOMEM;
109 goto err_free;
110 }
111
112 for (i = 0; i < state->bo_count; i++) {
113 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
114 u32 handle;
115
116 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
117 &handle);
118
119 if (ret) {
120 state->bo_count = i;
121 goto err_delete_handle;
122 }
123 bo_state[i].handle = handle;
124 bo_state[i].paddr = vc4_bo->base.paddr;
125 bo_state[i].size = vc4_bo->base.base.size;
126 }
127
128 if (copy_to_user(u64_to_user_ptr(get_state->bo),
129 bo_state,
130 state->bo_count * sizeof(*bo_state)))
131 ret = -EFAULT;
132
133err_delete_handle:
134 if (ret) {
135 for (i = 0; i < state->bo_count; i++)
136 drm_gem_handle_delete(file_priv, bo_state[i].handle);
137 }
138
139err_free:
140 vc4_free_hang_state(dev, kernel_state);
141 kfree(bo_state);
142
143 return ret;
144}
145
146static void
147vc4_save_hang_state(struct drm_device *dev)
148{
149 struct vc4_dev *vc4 = to_vc4_dev(dev);
150 struct drm_vc4_get_hang_state *state;
151 struct vc4_hang_state *kernel_state;
152 struct vc4_exec_info *exec[2];
153 struct vc4_bo *bo;
154 unsigned long irqflags;
155 unsigned int i, j, k, unref_list_count;
156
157 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
158 if (!kernel_state)
159 return;
160
161 state = &kernel_state->user_state;
162
163 spin_lock_irqsave(&vc4->job_lock, irqflags);
164 exec[0] = vc4_first_bin_job(vc4);
165 exec[1] = vc4_first_render_job(vc4);
166 if (!exec[0] && !exec[1]) {
167 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
168 return;
169 }
170
171
172 state->bo_count = 0;
173 for (i = 0; i < 2; i++) {
174 if (!exec[i])
175 continue;
176
177 unref_list_count = 0;
178 list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
179 unref_list_count++;
180 state->bo_count += exec[i]->bo_count + unref_list_count;
181 }
182
183 kernel_state->bo = kcalloc(state->bo_count,
184 sizeof(*kernel_state->bo), GFP_ATOMIC);
185
186 if (!kernel_state->bo) {
187 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
188 return;
189 }
190
191 k = 0;
192 for (i = 0; i < 2; i++) {
193 if (!exec[i])
194 continue;
195
196 for (j = 0; j < exec[i]->bo_count; j++) {
197 bo = to_vc4_bo(&exec[i]->bo[j]->base);
198
199
200
201
202
203 WARN_ON(!refcount_read(&bo->usecnt));
204 refcount_inc(&bo->usecnt);
205 drm_gem_object_get(&exec[i]->bo[j]->base);
206 kernel_state->bo[k++] = &exec[i]->bo[j]->base;
207 }
208
209 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
210
211
212
213 drm_gem_object_get(&bo->base.base);
214 kernel_state->bo[k++] = &bo->base.base;
215 }
216 }
217
218 WARN_ON_ONCE(k != state->bo_count);
219
220 if (exec[0])
221 state->start_bin = exec[0]->ct0ca;
222 if (exec[1])
223 state->start_render = exec[1]->ct1ca;
224
225 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
226
227 state->ct0ca = V3D_READ(V3D_CTNCA(0));
228 state->ct0ea = V3D_READ(V3D_CTNEA(0));
229
230 state->ct1ca = V3D_READ(V3D_CTNCA(1));
231 state->ct1ea = V3D_READ(V3D_CTNEA(1));
232
233 state->ct0cs = V3D_READ(V3D_CTNCS(0));
234 state->ct1cs = V3D_READ(V3D_CTNCS(1));
235
236 state->ct0ra0 = V3D_READ(V3D_CT00RA0);
237 state->ct1ra0 = V3D_READ(V3D_CT01RA0);
238
239 state->bpca = V3D_READ(V3D_BPCA);
240 state->bpcs = V3D_READ(V3D_BPCS);
241 state->bpoa = V3D_READ(V3D_BPOA);
242 state->bpos = V3D_READ(V3D_BPOS);
243
244 state->vpmbase = V3D_READ(V3D_VPMBASE);
245
246 state->dbge = V3D_READ(V3D_DBGE);
247 state->fdbgo = V3D_READ(V3D_FDBGO);
248 state->fdbgb = V3D_READ(V3D_FDBGB);
249 state->fdbgr = V3D_READ(V3D_FDBGR);
250 state->fdbgs = V3D_READ(V3D_FDBGS);
251 state->errstat = V3D_READ(V3D_ERRSTAT);
252
253
254
255
256
257
258
259
260 for (i = 0; i < kernel_state->user_state.bo_count; i++) {
261 struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
262
263 if (bo->madv == __VC4_MADV_NOTSUPP)
264 continue;
265
266 mutex_lock(&bo->madv_lock);
267 if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
268 bo->madv = VC4_MADV_WILLNEED;
269 refcount_dec(&bo->usecnt);
270 mutex_unlock(&bo->madv_lock);
271 }
272
273 spin_lock_irqsave(&vc4->job_lock, irqflags);
274 if (vc4->hang_state) {
275 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
276 vc4_free_hang_state(dev, kernel_state);
277 } else {
278 vc4->hang_state = kernel_state;
279 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
280 }
281}
282
283static void
284vc4_reset(struct drm_device *dev)
285{
286 struct vc4_dev *vc4 = to_vc4_dev(dev);
287
288 DRM_INFO("Resetting GPU.\n");
289
290 mutex_lock(&vc4->power_lock);
291 if (vc4->power_refcount) {
292
293
294
295 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
296 pm_runtime_get_sync(&vc4->v3d->pdev->dev);
297 }
298 mutex_unlock(&vc4->power_lock);
299
300 vc4_irq_reset(dev);
301
302
303
304
305
306 vc4_queue_hangcheck(dev);
307}
308
309static void
310vc4_reset_work(struct work_struct *work)
311{
312 struct vc4_dev *vc4 =
313 container_of(work, struct vc4_dev, hangcheck.reset_work);
314
315 vc4_save_hang_state(vc4->dev);
316
317 vc4_reset(vc4->dev);
318}
319
320static void
321vc4_hangcheck_elapsed(struct timer_list *t)
322{
323 struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer);
324 struct drm_device *dev = vc4->dev;
325 uint32_t ct0ca, ct1ca;
326 unsigned long irqflags;
327 struct vc4_exec_info *bin_exec, *render_exec;
328
329 spin_lock_irqsave(&vc4->job_lock, irqflags);
330
331 bin_exec = vc4_first_bin_job(vc4);
332 render_exec = vc4_first_render_job(vc4);
333
334
335 if (!bin_exec && !render_exec) {
336 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
337 return;
338 }
339
340 ct0ca = V3D_READ(V3D_CTNCA(0));
341 ct1ca = V3D_READ(V3D_CTNCA(1));
342
343
344
345
346 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
347 (render_exec && ct1ca != render_exec->last_ct1ca)) {
348 if (bin_exec)
349 bin_exec->last_ct0ca = ct0ca;
350 if (render_exec)
351 render_exec->last_ct1ca = ct1ca;
352 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
353 vc4_queue_hangcheck(dev);
354 return;
355 }
356
357 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
358
359
360
361
362
363 schedule_work(&vc4->hangcheck.reset_work);
364}
365
366static void
367submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
368{
369 struct vc4_dev *vc4 = to_vc4_dev(dev);
370
371
372
373
374 V3D_WRITE(V3D_CTNCA(thread), start);
375 V3D_WRITE(V3D_CTNEA(thread), end);
376}
377
378int
379vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
380 bool interruptible)
381{
382 struct vc4_dev *vc4 = to_vc4_dev(dev);
383 int ret = 0;
384 unsigned long timeout_expire;
385 DEFINE_WAIT(wait);
386
387 if (vc4->finished_seqno >= seqno)
388 return 0;
389
390 if (timeout_ns == 0)
391 return -ETIME;
392
393 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
394
395 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
396 for (;;) {
397 prepare_to_wait(&vc4->job_wait_queue, &wait,
398 interruptible ? TASK_INTERRUPTIBLE :
399 TASK_UNINTERRUPTIBLE);
400
401 if (interruptible && signal_pending(current)) {
402 ret = -ERESTARTSYS;
403 break;
404 }
405
406 if (vc4->finished_seqno >= seqno)
407 break;
408
409 if (timeout_ns != ~0ull) {
410 if (time_after_eq(jiffies, timeout_expire)) {
411 ret = -ETIME;
412 break;
413 }
414 schedule_timeout(timeout_expire - jiffies);
415 } else {
416 schedule();
417 }
418 }
419
420 finish_wait(&vc4->job_wait_queue, &wait);
421 trace_vc4_wait_for_seqno_end(dev, seqno);
422
423 return ret;
424}
425
426static void
427vc4_flush_caches(struct drm_device *dev)
428{
429 struct vc4_dev *vc4 = to_vc4_dev(dev);
430
431
432
433
434
435 V3D_WRITE(V3D_L2CACTL,
436 V3D_L2CACTL_L2CCLR);
437
438 V3D_WRITE(V3D_SLCACTL,
439 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
440 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
441 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
442 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
443}
444
445static void
446vc4_flush_texture_caches(struct drm_device *dev)
447{
448 struct vc4_dev *vc4 = to_vc4_dev(dev);
449
450 V3D_WRITE(V3D_L2CACTL,
451 V3D_L2CACTL_L2CCLR);
452
453 V3D_WRITE(V3D_SLCACTL,
454 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
455 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC));
456}
457
458
459
460
461
462
463void
464vc4_submit_next_bin_job(struct drm_device *dev)
465{
466 struct vc4_dev *vc4 = to_vc4_dev(dev);
467 struct vc4_exec_info *exec;
468
469again:
470 exec = vc4_first_bin_job(vc4);
471 if (!exec)
472 return;
473
474 vc4_flush_caches(dev);
475
476
477
478
479 if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
480 vc4_perfmon_start(vc4, exec->perfmon);
481
482
483
484
485 if (exec->ct0ca != exec->ct0ea) {
486 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
487 } else {
488 struct vc4_exec_info *next;
489
490 vc4_move_job_to_render(dev, exec);
491 next = vc4_first_bin_job(vc4);
492
493
494
495
496
497
498 if (next && next->perfmon == exec->perfmon)
499 goto again;
500 }
501}
502
503void
504vc4_submit_next_render_job(struct drm_device *dev)
505{
506 struct vc4_dev *vc4 = to_vc4_dev(dev);
507 struct vc4_exec_info *exec = vc4_first_render_job(vc4);
508
509 if (!exec)
510 return;
511
512
513
514
515
516
517
518 vc4_flush_texture_caches(dev);
519
520 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
521}
522
523void
524vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
525{
526 struct vc4_dev *vc4 = to_vc4_dev(dev);
527 bool was_empty = list_empty(&vc4->render_job_list);
528
529 list_move_tail(&exec->head, &vc4->render_job_list);
530 if (was_empty)
531 vc4_submit_next_render_job(dev);
532}
533
534static void
535vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
536{
537 struct vc4_bo *bo;
538 unsigned i;
539
540 for (i = 0; i < exec->bo_count; i++) {
541 bo = to_vc4_bo(&exec->bo[i]->base);
542 bo->seqno = seqno;
543
544 reservation_object_add_shared_fence(bo->base.base.resv, exec->fence);
545 }
546
547 list_for_each_entry(bo, &exec->unref_list, unref_head) {
548 bo->seqno = seqno;
549 }
550
551 for (i = 0; i < exec->rcl_write_bo_count; i++) {
552 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
553 bo->write_seqno = seqno;
554
555 reservation_object_add_excl_fence(bo->base.base.resv, exec->fence);
556 }
557}
558
559static void
560vc4_unlock_bo_reservations(struct drm_device *dev,
561 struct vc4_exec_info *exec,
562 struct ww_acquire_ctx *acquire_ctx)
563{
564 int i;
565
566 for (i = 0; i < exec->bo_count; i++) {
567 struct drm_gem_object *bo = &exec->bo[i]->base;
568
569 ww_mutex_unlock(&bo->resv->lock);
570 }
571
572 ww_acquire_fini(acquire_ctx);
573}
574
575
576
577
578
579
580
581
582static int
583vc4_lock_bo_reservations(struct drm_device *dev,
584 struct vc4_exec_info *exec,
585 struct ww_acquire_ctx *acquire_ctx)
586{
587 int contended_lock = -1;
588 int i, ret;
589 struct drm_gem_object *bo;
590
591 ww_acquire_init(acquire_ctx, &reservation_ww_class);
592
593retry:
594 if (contended_lock != -1) {
595 bo = &exec->bo[contended_lock]->base;
596 ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
597 acquire_ctx);
598 if (ret) {
599 ww_acquire_done(acquire_ctx);
600 return ret;
601 }
602 }
603
604 for (i = 0; i < exec->bo_count; i++) {
605 if (i == contended_lock)
606 continue;
607
608 bo = &exec->bo[i]->base;
609
610 ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
611 if (ret) {
612 int j;
613
614 for (j = 0; j < i; j++) {
615 bo = &exec->bo[j]->base;
616 ww_mutex_unlock(&bo->resv->lock);
617 }
618
619 if (contended_lock != -1 && contended_lock >= i) {
620 bo = &exec->bo[contended_lock]->base;
621
622 ww_mutex_unlock(&bo->resv->lock);
623 }
624
625 if (ret == -EDEADLK) {
626 contended_lock = i;
627 goto retry;
628 }
629
630 ww_acquire_done(acquire_ctx);
631 return ret;
632 }
633 }
634
635 ww_acquire_done(acquire_ctx);
636
637
638
639
640 for (i = 0; i < exec->bo_count; i++) {
641 bo = &exec->bo[i]->base;
642
643 ret = reservation_object_reserve_shared(bo->resv, 1);
644 if (ret) {
645 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
646 return ret;
647 }
648 }
649
650 return 0;
651}
652
653
654
655
656
657
658
659
660
661
662static int
663vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
664 struct ww_acquire_ctx *acquire_ctx,
665 struct drm_syncobj *out_sync)
666{
667 struct vc4_dev *vc4 = to_vc4_dev(dev);
668 struct vc4_exec_info *renderjob;
669 uint64_t seqno;
670 unsigned long irqflags;
671 struct vc4_fence *fence;
672
673 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
674 if (!fence)
675 return -ENOMEM;
676 fence->dev = dev;
677
678 spin_lock_irqsave(&vc4->job_lock, irqflags);
679
680 seqno = ++vc4->emit_seqno;
681 exec->seqno = seqno;
682
683 dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
684 vc4->dma_fence_context, exec->seqno);
685 fence->seqno = exec->seqno;
686 exec->fence = &fence->base;
687
688 if (out_sync)
689 drm_syncobj_replace_fence(out_sync, exec->fence);
690
691 vc4_update_bo_seqnos(exec, seqno);
692
693 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
694
695 list_add_tail(&exec->head, &vc4->bin_job_list);
696
697
698
699
700
701
702 renderjob = vc4_first_render_job(vc4);
703 if (vc4_first_bin_job(vc4) == exec &&
704 (!renderjob || renderjob->perfmon == exec->perfmon)) {
705 vc4_submit_next_bin_job(dev);
706 vc4_queue_hangcheck(dev);
707 }
708
709 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
710
711 return 0;
712}
713
714
715
716
717
718
719
720
721
722
723
724
725static int
726vc4_cl_lookup_bos(struct drm_device *dev,
727 struct drm_file *file_priv,
728 struct vc4_exec_info *exec)
729{
730 struct drm_vc4_submit_cl *args = exec->args;
731 uint32_t *handles;
732 int ret = 0;
733 int i;
734
735 exec->bo_count = args->bo_handle_count;
736
737 if (!exec->bo_count) {
738
739
740
741 DRM_DEBUG("Rendering requires BOs to validate\n");
742 return -EINVAL;
743 }
744
745 exec->bo = kvmalloc_array(exec->bo_count,
746 sizeof(struct drm_gem_cma_object *),
747 GFP_KERNEL | __GFP_ZERO);
748 if (!exec->bo) {
749 DRM_ERROR("Failed to allocate validated BO pointers\n");
750 return -ENOMEM;
751 }
752
753 handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
754 if (!handles) {
755 ret = -ENOMEM;
756 DRM_ERROR("Failed to allocate incoming GEM handles\n");
757 goto fail;
758 }
759
760 if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
761 exec->bo_count * sizeof(uint32_t))) {
762 ret = -EFAULT;
763 DRM_ERROR("Failed to copy in GEM handles\n");
764 goto fail;
765 }
766
767 spin_lock(&file_priv->table_lock);
768 for (i = 0; i < exec->bo_count; i++) {
769 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
770 handles[i]);
771 if (!bo) {
772 DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
773 i, handles[i]);
774 ret = -EINVAL;
775 break;
776 }
777
778 drm_gem_object_get(bo);
779 exec->bo[i] = (struct drm_gem_cma_object *)bo;
780 }
781 spin_unlock(&file_priv->table_lock);
782
783 if (ret)
784 goto fail_put_bo;
785
786 for (i = 0; i < exec->bo_count; i++) {
787 ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
788 if (ret)
789 goto fail_dec_usecnt;
790 }
791
792 kvfree(handles);
793 return 0;
794
795fail_dec_usecnt:
796
797
798
799
800
801
802
803
804 for (i-- ; i >= 0; i--)
805 vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
806
807fail_put_bo:
808
809 for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
810 drm_gem_object_put_unlocked(&exec->bo[i]->base);
811
812fail:
813 kvfree(handles);
814 kvfree(exec->bo);
815 exec->bo = NULL;
816 return ret;
817}
818
819static int
820vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
821{
822 struct drm_vc4_submit_cl *args = exec->args;
823 struct vc4_dev *vc4 = to_vc4_dev(dev);
824 void *temp = NULL;
825 void *bin;
826 int ret = 0;
827 uint32_t bin_offset = 0;
828 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
829 16);
830 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
831 uint32_t exec_size = uniforms_offset + args->uniforms_size;
832 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
833 args->shader_rec_count);
834 struct vc4_bo *bo;
835
836 if (shader_rec_offset < args->bin_cl_size ||
837 uniforms_offset < shader_rec_offset ||
838 exec_size < uniforms_offset ||
839 args->shader_rec_count >= (UINT_MAX /
840 sizeof(struct vc4_shader_state)) ||
841 temp_size < exec_size) {
842 DRM_DEBUG("overflow in exec arguments\n");
843 ret = -EINVAL;
844 goto fail;
845 }
846
847
848
849
850
851
852
853
854 temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
855 if (!temp) {
856 DRM_ERROR("Failed to allocate storage for copying "
857 "in bin/render CLs.\n");
858 ret = -ENOMEM;
859 goto fail;
860 }
861 bin = temp + bin_offset;
862 exec->shader_rec_u = temp + shader_rec_offset;
863 exec->uniforms_u = temp + uniforms_offset;
864 exec->shader_state = temp + exec_size;
865 exec->shader_state_size = args->shader_rec_count;
866
867 if (copy_from_user(bin,
868 u64_to_user_ptr(args->bin_cl),
869 args->bin_cl_size)) {
870 ret = -EFAULT;
871 goto fail;
872 }
873
874 if (copy_from_user(exec->shader_rec_u,
875 u64_to_user_ptr(args->shader_rec),
876 args->shader_rec_size)) {
877 ret = -EFAULT;
878 goto fail;
879 }
880
881 if (copy_from_user(exec->uniforms_u,
882 u64_to_user_ptr(args->uniforms),
883 args->uniforms_size)) {
884 ret = -EFAULT;
885 goto fail;
886 }
887
888 bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
889 if (IS_ERR(bo)) {
890 DRM_ERROR("Couldn't allocate BO for binning\n");
891 ret = PTR_ERR(bo);
892 goto fail;
893 }
894 exec->exec_bo = &bo->base;
895
896 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
897 &exec->unref_list);
898
899 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
900
901 exec->bin_u = bin;
902
903 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
904 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
905 exec->shader_rec_size = args->shader_rec_size;
906
907 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
908 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
909 exec->uniforms_size = args->uniforms_size;
910
911 ret = vc4_validate_bin_cl(dev,
912 exec->exec_bo->vaddr + bin_offset,
913 bin,
914 exec);
915 if (ret)
916 goto fail;
917
918 ret = vc4_validate_shader_recs(dev, exec);
919 if (ret)
920 goto fail;
921
922 if (exec->found_tile_binning_mode_config_packet) {
923 ret = vc4_v3d_bin_bo_get(vc4, &exec->bin_bo_used);
924 if (ret)
925 goto fail;
926 }
927
928
929
930
931
932 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
933
934fail:
935 kvfree(temp);
936 return ret;
937}
938
939static void
940vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
941{
942 struct vc4_dev *vc4 = to_vc4_dev(dev);
943 unsigned long irqflags;
944 unsigned i;
945
946
947
948
949 if (exec->fence) {
950 dma_fence_signal(exec->fence);
951 dma_fence_put(exec->fence);
952 }
953
954 if (exec->bo) {
955 for (i = 0; i < exec->bo_count; i++) {
956 struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
957
958 vc4_bo_dec_usecnt(bo);
959 drm_gem_object_put_unlocked(&exec->bo[i]->base);
960 }
961 kvfree(exec->bo);
962 }
963
964 while (!list_empty(&exec->unref_list)) {
965 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
966 struct vc4_bo, unref_head);
967 list_del(&bo->unref_head);
968 drm_gem_object_put_unlocked(&bo->base.base);
969 }
970
971
972 spin_lock_irqsave(&vc4->job_lock, irqflags);
973 vc4->bin_alloc_used &= ~exec->bin_slots;
974 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
975
976
977 if (exec->bin_bo_used)
978 vc4_v3d_bin_bo_put(vc4);
979
980
981 vc4_perfmon_put(exec->perfmon);
982
983 vc4_v3d_pm_put(vc4);
984
985 kfree(exec);
986}
987
988void
989vc4_job_handle_completed(struct vc4_dev *vc4)
990{
991 unsigned long irqflags;
992 struct vc4_seqno_cb *cb, *cb_temp;
993
994 spin_lock_irqsave(&vc4->job_lock, irqflags);
995 while (!list_empty(&vc4->job_done_list)) {
996 struct vc4_exec_info *exec =
997 list_first_entry(&vc4->job_done_list,
998 struct vc4_exec_info, head);
999 list_del(&exec->head);
1000
1001 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1002 vc4_complete_exec(vc4->dev, exec);
1003 spin_lock_irqsave(&vc4->job_lock, irqflags);
1004 }
1005
1006 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
1007 if (cb->seqno <= vc4->finished_seqno) {
1008 list_del_init(&cb->work.entry);
1009 schedule_work(&cb->work);
1010 }
1011 }
1012
1013 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1014}
1015
1016static void vc4_seqno_cb_work(struct work_struct *work)
1017{
1018 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
1019
1020 cb->func(cb);
1021}
1022
1023int vc4_queue_seqno_cb(struct drm_device *dev,
1024 struct vc4_seqno_cb *cb, uint64_t seqno,
1025 void (*func)(struct vc4_seqno_cb *cb))
1026{
1027 struct vc4_dev *vc4 = to_vc4_dev(dev);
1028 int ret = 0;
1029 unsigned long irqflags;
1030
1031 cb->func = func;
1032 INIT_WORK(&cb->work, vc4_seqno_cb_work);
1033
1034 spin_lock_irqsave(&vc4->job_lock, irqflags);
1035 if (seqno > vc4->finished_seqno) {
1036 cb->seqno = seqno;
1037 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
1038 } else {
1039 schedule_work(&cb->work);
1040 }
1041 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
1042
1043 return ret;
1044}
1045
1046
1047
1048
1049
1050static void
1051vc4_job_done_work(struct work_struct *work)
1052{
1053 struct vc4_dev *vc4 =
1054 container_of(work, struct vc4_dev, job_done_work);
1055
1056 vc4_job_handle_completed(vc4);
1057}
1058
1059static int
1060vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
1061 uint64_t seqno,
1062 uint64_t *timeout_ns)
1063{
1064 unsigned long start = jiffies;
1065 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
1066
1067 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
1068 uint64_t delta = jiffies_to_nsecs(jiffies - start);
1069
1070 if (*timeout_ns >= delta)
1071 *timeout_ns -= delta;
1072 }
1073
1074 return ret;
1075}
1076
1077int
1078vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
1079 struct drm_file *file_priv)
1080{
1081 struct drm_vc4_wait_seqno *args = data;
1082
1083 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
1084 &args->timeout_ns);
1085}
1086
1087int
1088vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
1089 struct drm_file *file_priv)
1090{
1091 int ret;
1092 struct drm_vc4_wait_bo *args = data;
1093 struct drm_gem_object *gem_obj;
1094 struct vc4_bo *bo;
1095
1096 if (args->pad != 0)
1097 return -EINVAL;
1098
1099 gem_obj = drm_gem_object_lookup(file_priv, args->handle);
1100 if (!gem_obj) {
1101 DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1102 return -EINVAL;
1103 }
1104 bo = to_vc4_bo(gem_obj);
1105
1106 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
1107 &args->timeout_ns);
1108
1109 drm_gem_object_put_unlocked(gem_obj);
1110 return ret;
1111}
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125int
1126vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
1127 struct drm_file *file_priv)
1128{
1129 struct vc4_dev *vc4 = to_vc4_dev(dev);
1130 struct vc4_file *vc4file = file_priv->driver_priv;
1131 struct drm_vc4_submit_cl *args = data;
1132 struct drm_syncobj *out_sync = NULL;
1133 struct vc4_exec_info *exec;
1134 struct ww_acquire_ctx acquire_ctx;
1135 struct dma_fence *in_fence;
1136 int ret = 0;
1137
1138 if (!vc4->v3d) {
1139 DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
1140 return -ENODEV;
1141 }
1142
1143 if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
1144 VC4_SUBMIT_CL_FIXED_RCL_ORDER |
1145 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
1146 VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
1147 DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
1148 return -EINVAL;
1149 }
1150
1151 if (args->pad2 != 0) {
1152 DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
1153 return -EINVAL;
1154 }
1155
1156 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
1157 if (!exec) {
1158 DRM_ERROR("malloc failure on exec struct\n");
1159 return -ENOMEM;
1160 }
1161
1162 ret = vc4_v3d_pm_get(vc4);
1163 if (ret) {
1164 kfree(exec);
1165 return ret;
1166 }
1167
1168 exec->args = args;
1169 INIT_LIST_HEAD(&exec->unref_list);
1170
1171 ret = vc4_cl_lookup_bos(dev, file_priv, exec);
1172 if (ret)
1173 goto fail;
1174
1175 if (args->perfmonid) {
1176 exec->perfmon = vc4_perfmon_find(vc4file,
1177 args->perfmonid);
1178 if (!exec->perfmon) {
1179 ret = -ENOENT;
1180 goto fail;
1181 }
1182 }
1183
1184 if (args->in_sync) {
1185 ret = drm_syncobj_find_fence(file_priv, args->in_sync,
1186 0, 0, &in_fence);
1187 if (ret)
1188 goto fail;
1189
1190
1191
1192
1193
1194
1195 if (!dma_fence_match_context(in_fence,
1196 vc4->dma_fence_context)) {
1197 ret = dma_fence_wait(in_fence, true);
1198 if (ret) {
1199 dma_fence_put(in_fence);
1200 goto fail;
1201 }
1202 }
1203
1204 dma_fence_put(in_fence);
1205 }
1206
1207 if (exec->args->bin_cl_size != 0) {
1208 ret = vc4_get_bcl(dev, exec);
1209 if (ret)
1210 goto fail;
1211 } else {
1212 exec->ct0ca = 0;
1213 exec->ct0ea = 0;
1214 }
1215
1216 ret = vc4_get_rcl(dev, exec);
1217 if (ret)
1218 goto fail;
1219
1220 ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
1221 if (ret)
1222 goto fail;
1223
1224 if (args->out_sync) {
1225 out_sync = drm_syncobj_find(file_priv, args->out_sync);
1226 if (!out_sync) {
1227 ret = -EINVAL;
1228 goto fail;
1229 }
1230
1231
1232
1233
1234
1235
1236 }
1237
1238
1239
1240
1241 exec->args = NULL;
1242
1243 ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
1244
1245
1246
1247
1248 if (out_sync)
1249 drm_syncobj_put(out_sync);
1250
1251 if (ret)
1252 goto fail;
1253
1254
1255 args->seqno = vc4->emit_seqno;
1256
1257 return 0;
1258
1259fail:
1260 vc4_complete_exec(vc4->dev, exec);
1261
1262 return ret;
1263}
1264
1265void
1266vc4_gem_init(struct drm_device *dev)
1267{
1268 struct vc4_dev *vc4 = to_vc4_dev(dev);
1269
1270 vc4->dma_fence_context = dma_fence_context_alloc(1);
1271
1272 INIT_LIST_HEAD(&vc4->bin_job_list);
1273 INIT_LIST_HEAD(&vc4->render_job_list);
1274 INIT_LIST_HEAD(&vc4->job_done_list);
1275 INIT_LIST_HEAD(&vc4->seqno_cb_list);
1276 spin_lock_init(&vc4->job_lock);
1277
1278 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
1279 timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0);
1280
1281 INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
1282
1283 mutex_init(&vc4->power_lock);
1284
1285 INIT_LIST_HEAD(&vc4->purgeable.list);
1286 mutex_init(&vc4->purgeable.lock);
1287}
1288
1289void
1290vc4_gem_destroy(struct drm_device *dev)
1291{
1292 struct vc4_dev *vc4 = to_vc4_dev(dev);
1293
1294
1295
1296
1297 WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
1298
1299
1300
1301
1302 if (vc4->bin_bo) {
1303 drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
1304 vc4->bin_bo = NULL;
1305 }
1306
1307 if (vc4->hang_state)
1308 vc4_free_hang_state(dev, vc4->hang_state);
1309}
1310
1311int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
1312 struct drm_file *file_priv)
1313{
1314 struct drm_vc4_gem_madvise *args = data;
1315 struct drm_gem_object *gem_obj;
1316 struct vc4_bo *bo;
1317 int ret;
1318
1319 switch (args->madv) {
1320 case VC4_MADV_DONTNEED:
1321 case VC4_MADV_WILLNEED:
1322 break;
1323 default:
1324 return -EINVAL;
1325 }
1326
1327 if (args->pad != 0)
1328 return -EINVAL;
1329
1330 gem_obj = drm_gem_object_lookup(file_priv, args->handle);
1331 if (!gem_obj) {
1332 DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
1333 return -ENOENT;
1334 }
1335
1336 bo = to_vc4_bo(gem_obj);
1337
1338
1339 if (bo->madv == __VC4_MADV_NOTSUPP) {
1340 DRM_DEBUG("madvise not supported on this BO\n");
1341 ret = -EINVAL;
1342 goto out_put_gem;
1343 }
1344
1345
1346
1347
1348 if (gem_obj->import_attach) {
1349 DRM_DEBUG("madvise not supported on imported BOs\n");
1350 ret = -EINVAL;
1351 goto out_put_gem;
1352 }
1353
1354 mutex_lock(&bo->madv_lock);
1355
1356 if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
1357 !refcount_read(&bo->usecnt)) {
1358
1359
1360
1361
1362 vc4_bo_add_to_purgeable_pool(bo);
1363 } else if (args->madv == VC4_MADV_WILLNEED &&
1364 bo->madv == VC4_MADV_DONTNEED &&
1365 !refcount_read(&bo->usecnt)) {
1366
1367
1368
1369 vc4_bo_remove_from_purgeable_pool(bo);
1370 }
1371
1372
1373 args->retained = bo->madv != __VC4_MADV_PURGED;
1374
1375
1376 if (bo->madv != __VC4_MADV_PURGED)
1377 bo->madv = args->madv;
1378
1379 mutex_unlock(&bo->madv_lock);
1380
1381 ret = 0;
1382
1383out_put_gem:
1384 drm_gem_object_put_unlocked(gem_obj);
1385
1386 return ret;
1387}
1388