1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31#include <linux/seq_file.h>
32#include <linux/atomic.h>
33#include <linux/wait.h>
34#include <linux/kref.h>
35#include <linux/slab.h>
36#include <linux/firmware.h>
37#include <drm/drmP.h>
38#include "amdgpu.h"
39#include "amdgpu_trace.h"
40
41
42
43
44
45
46
47
48
49
50static struct kmem_cache *amdgpu_fence_slab;
51static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
52
53
54
55
56
57
58
59
60
61static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
62{
63 struct amdgpu_fence_driver *drv = &ring->fence_drv;
64
65 if (drv->cpu_addr)
66 *drv->cpu_addr = cpu_to_le32(seq);
67}
68
69
70
71
72
73
74
75
76
77static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
78{
79 struct amdgpu_fence_driver *drv = &ring->fence_drv;
80 u32 seq = 0;
81
82 if (drv->cpu_addr)
83 seq = le32_to_cpu(*drv->cpu_addr);
84 else
85 seq = lower_32_bits(atomic64_read(&drv->last_seq));
86
87 return seq;
88}
89
90
91
92
93
94
95
96
97
98
99
100int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
101 struct amdgpu_fence **fence)
102{
103 struct amdgpu_device *adev = ring->adev;
104
105
106 *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
107 if ((*fence) == NULL) {
108 return -ENOMEM;
109 }
110 (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
111 (*fence)->ring = ring;
112 (*fence)->owner = owner;
113 fence_init(&(*fence)->base, &amdgpu_fence_ops,
114 &ring->fence_drv.fence_queue.lock,
115 adev->fence_context + ring->idx,
116 (*fence)->seq);
117 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
118 (*fence)->seq,
119 AMDGPU_FENCE_FLAG_INT);
120 return 0;
121}
122
123
124
125
126
127
128
129
130static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
131{
132 mod_timer(&ring->fence_drv.fallback_timer,
133 jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
134}
135
136
137
138
139
140
141
142
143
144
145static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
146{
147 uint64_t seq, last_seq, last_emitted;
148 unsigned count_loop = 0;
149 bool wake = false;
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172 last_seq = atomic64_read(&ring->fence_drv.last_seq);
173 do {
174 last_emitted = ring->fence_drv.sync_seq[ring->idx];
175 seq = amdgpu_fence_read(ring);
176 seq |= last_seq & 0xffffffff00000000LL;
177 if (seq < last_seq) {
178 seq &= 0xffffffff;
179 seq |= last_emitted & 0xffffffff00000000LL;
180 }
181
182 if (seq <= last_seq || seq > last_emitted) {
183 break;
184 }
185
186
187
188
189 wake = true;
190 last_seq = seq;
191 if ((count_loop++) > 10) {
192
193
194
195
196
197 break;
198 }
199 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
200
201 if (seq < last_emitted)
202 amdgpu_fence_schedule_fallback(ring);
203
204 return wake;
205}
206
207
208
209
210
211
212
213
214
215
216void amdgpu_fence_process(struct amdgpu_ring *ring)
217{
218 if (amdgpu_fence_activity(ring))
219 wake_up_all(&ring->fence_drv.fence_queue);
220}
221
222
223
224
225
226
227
228
229static void amdgpu_fence_fallback(unsigned long arg)
230{
231 struct amdgpu_ring *ring = (void *)arg;
232
233 amdgpu_fence_process(ring);
234}
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
250{
251 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
252 return true;
253
254
255 amdgpu_fence_process(ring);
256 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
257 return true;
258
259 return false;
260}
261
262
263
264
265
266
267
268
269
270
271
272static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
273{
274 bool signaled = false;
275
276 BUG_ON(!ring);
277 if (seq > ring->fence_drv.sync_seq[ring->idx])
278 return -EINVAL;
279
280 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
281 return 0;
282
283 amdgpu_fence_schedule_fallback(ring);
284 wait_event(ring->fence_drv.fence_queue, (
285 (signaled = amdgpu_fence_seq_signaled(ring, seq))));
286
287 if (signaled)
288 return 0;
289 else
290 return -EDEADLK;
291}
292
293
294
295
296
297
298
299
300
301
302
303int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
304{
305 uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
306
307 if (seq >= ring->fence_drv.sync_seq[ring->idx])
308 return -ENOENT;
309
310 return amdgpu_fence_ring_wait_seq(ring, seq);
311}
312
313
314
315
316
317
318
319
320
321
322
323int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
324{
325 uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
326
327 if (!seq)
328 return 0;
329
330 return amdgpu_fence_ring_wait_seq(ring, seq);
331}
332
333
334
335
336
337
338
339
340
341
342unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
343{
344 uint64_t emitted;
345
346
347
348
349 amdgpu_fence_process(ring);
350 emitted = ring->fence_drv.sync_seq[ring->idx]
351 - atomic64_read(&ring->fence_drv.last_seq);
352
353 if (emitted > 0x10000000)
354 emitted = 0x10000000;
355
356 return (unsigned)emitted;
357}
358
359
360
361
362
363
364
365
366
367
368
369
370bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
371 struct amdgpu_ring *dst_ring)
372{
373 struct amdgpu_fence_driver *fdrv;
374
375 if (!fence)
376 return false;
377
378 if (fence->ring == dst_ring)
379 return false;
380
381
382 fdrv = &dst_ring->fence_drv;
383 if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
384 return false;
385
386 return true;
387}
388
389
390
391
392
393
394
395
396
397
398void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
399 struct amdgpu_ring *dst_ring)
400{
401 struct amdgpu_fence_driver *dst, *src;
402 unsigned i;
403
404 if (!fence)
405 return;
406
407 if (fence->ring == dst_ring)
408 return;
409
410
411 src = &fence->ring->fence_drv;
412 dst = &dst_ring->fence_drv;
413 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
414 if (i == dst_ring->idx)
415 continue;
416
417 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
418 }
419}
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
435 struct amdgpu_irq_src *irq_src,
436 unsigned irq_type)
437{
438 struct amdgpu_device *adev = ring->adev;
439 uint64_t index;
440
441 if (ring != &adev->uvd.ring) {
442 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
443 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
444 } else {
445
446 index = ALIGN(adev->uvd.fw->size, 8);
447 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
448 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
449 }
450 amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
451 amdgpu_irq_get(adev, irq_src, irq_type);
452
453 ring->fence_drv.irq_src = irq_src;
454 ring->fence_drv.irq_type = irq_type;
455 ring->fence_drv.initialized = true;
456
457 dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
458 "cpu addr 0x%p\n", ring->idx,
459 ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
460 return 0;
461}
462
463
464
465
466
467
468
469
470
471
472int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
473{
474 int i, r;
475
476 ring->fence_drv.cpu_addr = NULL;
477 ring->fence_drv.gpu_addr = 0;
478 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
479 ring->fence_drv.sync_seq[i] = 0;
480
481 atomic64_set(&ring->fence_drv.last_seq, 0);
482 ring->fence_drv.initialized = false;
483
484 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
485 (unsigned long)ring);
486
487 init_waitqueue_head(&ring->fence_drv.fence_queue);
488
489 if (amdgpu_enable_scheduler) {
490 long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
491 if (timeout == 0) {
492
493
494
495
496
497
498
499 timeout = MAX_SCHEDULE_TIMEOUT;
500 }
501 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
502 amdgpu_sched_hw_submission,
503 timeout, ring->name);
504 if (r) {
505 DRM_ERROR("Failed to create scheduler on ring %s.\n",
506 ring->name);
507 return r;
508 }
509 }
510
511 return 0;
512}
513
514
515
516
517
518
519
520
521
522
523
524
525
526int amdgpu_fence_driver_init(struct amdgpu_device *adev)
527{
528 if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
529 amdgpu_fence_slab = kmem_cache_create(
530 "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
531 SLAB_HWCACHE_ALIGN, NULL);
532 if (!amdgpu_fence_slab)
533 return -ENOMEM;
534 }
535 if (amdgpu_debugfs_fence_init(adev))
536 dev_err(adev->dev, "fence debugfs file creation failed\n");
537
538 return 0;
539}
540
541
542
543
544
545
546
547
548
549void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
550{
551 int i, r;
552
553 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
554 kmem_cache_destroy(amdgpu_fence_slab);
555 mutex_lock(&adev->ring_lock);
556 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
557 struct amdgpu_ring *ring = adev->rings[i];
558
559 if (!ring || !ring->fence_drv.initialized)
560 continue;
561 r = amdgpu_fence_wait_empty(ring);
562 if (r) {
563
564 amdgpu_fence_driver_force_completion(adev);
565 }
566 wake_up_all(&ring->fence_drv.fence_queue);
567 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
568 ring->fence_drv.irq_type);
569 amd_sched_fini(&ring->sched);
570 del_timer_sync(&ring->fence_drv.fallback_timer);
571 ring->fence_drv.initialized = false;
572 }
573 mutex_unlock(&adev->ring_lock);
574}
575
576
577
578
579
580
581
582
583
584void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
585{
586 int i, r;
587
588 mutex_lock(&adev->ring_lock);
589 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
590 struct amdgpu_ring *ring = adev->rings[i];
591 if (!ring || !ring->fence_drv.initialized)
592 continue;
593
594
595 r = amdgpu_fence_wait_empty(ring);
596 if (r) {
597
598 amdgpu_fence_driver_force_completion(adev);
599 }
600
601
602 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
603 ring->fence_drv.irq_type);
604 }
605 mutex_unlock(&adev->ring_lock);
606}
607
608
609
610
611
612
613
614
615
616
617
618
619
620void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
621{
622 int i;
623
624 mutex_lock(&adev->ring_lock);
625 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
626 struct amdgpu_ring *ring = adev->rings[i];
627 if (!ring || !ring->fence_drv.initialized)
628 continue;
629
630
631 amdgpu_irq_get(adev, ring->fence_drv.irq_src,
632 ring->fence_drv.irq_type);
633 }
634 mutex_unlock(&adev->ring_lock);
635}
636
637
638
639
640
641
642
643
644
645void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
646{
647 int i;
648
649 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
650 struct amdgpu_ring *ring = adev->rings[i];
651 if (!ring || !ring->fence_drv.initialized)
652 continue;
653
654 amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
655 }
656}
657
658
659
660
661
662static const char *amdgpu_fence_get_driver_name(struct fence *fence)
663{
664 return "amdgpu";
665}
666
667static const char *amdgpu_fence_get_timeline_name(struct fence *f)
668{
669 struct amdgpu_fence *fence = to_amdgpu_fence(f);
670 return (const char *)fence->ring->name;
671}
672
673
674
675
676
677
678
679
680
681static bool amdgpu_fence_is_signaled(struct fence *f)
682{
683 struct amdgpu_fence *fence = to_amdgpu_fence(f);
684 struct amdgpu_ring *ring = fence->ring;
685
686 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
687 return true;
688
689 amdgpu_fence_process(ring);
690
691 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
692 return true;
693
694 return false;
695}
696
697
698
699
700
701
702
703
704static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
705{
706 struct amdgpu_fence *fence;
707 struct amdgpu_device *adev;
708 u64 seq;
709 int ret;
710
711 fence = container_of(wait, struct amdgpu_fence, fence_wake);
712 adev = fence->ring->adev;
713
714
715
716
717
718 seq = atomic64_read(&fence->ring->fence_drv.last_seq);
719 if (seq >= fence->seq) {
720 ret = fence_signal_locked(&fence->base);
721 if (!ret)
722 FENCE_TRACE(&fence->base, "signaled from irq context\n");
723 else
724 FENCE_TRACE(&fence->base, "was already signaled\n");
725
726 __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
727 fence_put(&fence->base);
728 } else
729 FENCE_TRACE(&fence->base, "pending\n");
730 return 0;
731}
732
733
734
735
736
737
738
739
740
741static bool amdgpu_fence_enable_signaling(struct fence *f)
742{
743 struct amdgpu_fence *fence = to_amdgpu_fence(f);
744 struct amdgpu_ring *ring = fence->ring;
745
746 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
747 return false;
748
749 fence->fence_wake.flags = 0;
750 fence->fence_wake.private = NULL;
751 fence->fence_wake.func = amdgpu_fence_check_signaled;
752 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
753 fence_get(f);
754 if (!timer_pending(&ring->fence_drv.fallback_timer))
755 amdgpu_fence_schedule_fallback(ring);
756 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
757 return true;
758}
759
760static void amdgpu_fence_release(struct fence *f)
761{
762 struct amdgpu_fence *fence = to_amdgpu_fence(f);
763 kmem_cache_free(amdgpu_fence_slab, fence);
764}
765
766const struct fence_ops amdgpu_fence_ops = {
767 .get_driver_name = amdgpu_fence_get_driver_name,
768 .get_timeline_name = amdgpu_fence_get_timeline_name,
769 .enable_signaling = amdgpu_fence_enable_signaling,
770 .signaled = amdgpu_fence_is_signaled,
771 .wait = fence_default_wait,
772 .release = amdgpu_fence_release,
773};
774
775
776
777
778#if defined(CONFIG_DEBUG_FS)
779static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
780{
781 struct drm_info_node *node = (struct drm_info_node *)m->private;
782 struct drm_device *dev = node->minor->dev;
783 struct amdgpu_device *adev = dev->dev_private;
784 int i, j;
785
786 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
787 struct amdgpu_ring *ring = adev->rings[i];
788 if (!ring || !ring->fence_drv.initialized)
789 continue;
790
791 amdgpu_fence_process(ring);
792
793 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
794 seq_printf(m, "Last signaled fence 0x%016llx\n",
795 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
796 seq_printf(m, "Last emitted 0x%016llx\n",
797 ring->fence_drv.sync_seq[i]);
798
799 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
800 struct amdgpu_ring *other = adev->rings[j];
801 if (i != j && other && other->fence_drv.initialized &&
802 ring->fence_drv.sync_seq[j])
803 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
804 j, ring->fence_drv.sync_seq[j]);
805 }
806 }
807 return 0;
808}
809
810static struct drm_info_list amdgpu_debugfs_fence_list[] = {
811 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
812};
813#endif
814
815int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
816{
817#if defined(CONFIG_DEBUG_FS)
818 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
819#else
820 return 0;
821#endif
822}
823
824