1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/list_sort.h>
28#include <drm/drmP.h>
29#include <drm/amdgpu_drm.h>
30#include "amdgpu.h"
31#include "amdgpu_trace.h"
32
33#define AMDGPU_CS_MAX_PRIORITY 32u
34#define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1)
35
36
37
38
39
40struct amdgpu_cs_buckets {
41 struct list_head bucket[AMDGPU_CS_NUM_BUCKETS];
42};
43
44static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b)
45{
46 unsigned i;
47
48 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++)
49 INIT_LIST_HEAD(&b->bucket[i]);
50}
51
52static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b,
53 struct list_head *item, unsigned priority)
54{
55
56
57
58
59
60 list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]);
61}
62
63static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b,
64 struct list_head *out_list)
65{
66 unsigned i;
67
68
69 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) {
70 list_splice(&b->bucket[i], out_list);
71 }
72}
73
74int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
75 u32 ip_instance, u32 ring,
76 struct amdgpu_ring **out_ring)
77{
78
79 if (ip_instance != 0) {
80 DRM_ERROR("invalid ip instance: %d\n", ip_instance);
81 return -EINVAL;
82 }
83
84 switch (ip_type) {
85 default:
86 DRM_ERROR("unknown ip type: %d\n", ip_type);
87 return -EINVAL;
88 case AMDGPU_HW_IP_GFX:
89 if (ring < adev->gfx.num_gfx_rings) {
90 *out_ring = &adev->gfx.gfx_ring[ring];
91 } else {
92 DRM_ERROR("only %d gfx rings are supported now\n",
93 adev->gfx.num_gfx_rings);
94 return -EINVAL;
95 }
96 break;
97 case AMDGPU_HW_IP_COMPUTE:
98 if (ring < adev->gfx.num_compute_rings) {
99 *out_ring = &adev->gfx.compute_ring[ring];
100 } else {
101 DRM_ERROR("only %d compute rings are supported now\n",
102 adev->gfx.num_compute_rings);
103 return -EINVAL;
104 }
105 break;
106 case AMDGPU_HW_IP_DMA:
107 if (ring < 2) {
108 *out_ring = &adev->sdma[ring].ring;
109 } else {
110 DRM_ERROR("only two SDMA rings are supported\n");
111 return -EINVAL;
112 }
113 break;
114 case AMDGPU_HW_IP_UVD:
115 *out_ring = &adev->uvd.ring;
116 break;
117 case AMDGPU_HW_IP_VCE:
118 if (ring < 2){
119 *out_ring = &adev->vce.ring[ring];
120 } else {
121 DRM_ERROR("only two VCE rings are supported\n");
122 return -EINVAL;
123 }
124 break;
125 }
126 return 0;
127}
128
129struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
130 struct drm_file *filp,
131 struct amdgpu_ctx *ctx,
132 struct amdgpu_ib *ibs,
133 uint32_t num_ibs)
134{
135 struct amdgpu_cs_parser *parser;
136 int i;
137
138 parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
139 if (!parser)
140 return NULL;
141
142 parser->adev = adev;
143 parser->filp = filp;
144 parser->ctx = ctx;
145 parser->ibs = ibs;
146 parser->num_ibs = num_ibs;
147 for (i = 0; i < num_ibs; i++)
148 ibs[i].ctx = ctx;
149
150 return parser;
151}
152
153int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
154{
155 union drm_amdgpu_cs *cs = data;
156 uint64_t *chunk_array_user;
157 uint64_t *chunk_array;
158 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
159 unsigned size;
160 int i;
161 int ret;
162
163 if (cs->in.num_chunks == 0)
164 return 0;
165
166 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
167 if (!chunk_array)
168 return -ENOMEM;
169
170 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
171 if (!p->ctx) {
172 ret = -EINVAL;
173 goto free_chunk;
174 }
175
176 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
177
178
179 INIT_LIST_HEAD(&p->validated);
180 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
181 if (copy_from_user(chunk_array, chunk_array_user,
182 sizeof(uint64_t)*cs->in.num_chunks)) {
183 ret = -EFAULT;
184 goto put_bo_list;
185 }
186
187 p->nchunks = cs->in.num_chunks;
188 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
189 GFP_KERNEL);
190 if (!p->chunks) {
191 ret = -ENOMEM;
192 goto put_bo_list;
193 }
194
195 for (i = 0; i < p->nchunks; i++) {
196 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
197 struct drm_amdgpu_cs_chunk user_chunk;
198 uint32_t __user *cdata;
199
200 chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
201 if (copy_from_user(&user_chunk, chunk_ptr,
202 sizeof(struct drm_amdgpu_cs_chunk))) {
203 ret = -EFAULT;
204 i--;
205 goto free_partial_kdata;
206 }
207 p->chunks[i].chunk_id = user_chunk.chunk_id;
208 p->chunks[i].length_dw = user_chunk.length_dw;
209
210 size = p->chunks[i].length_dw;
211 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
212 p->chunks[i].user_ptr = cdata;
213
214 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
215 if (p->chunks[i].kdata == NULL) {
216 ret = -ENOMEM;
217 i--;
218 goto free_partial_kdata;
219 }
220 size *= sizeof(uint32_t);
221 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
222 ret = -EFAULT;
223 goto free_partial_kdata;
224 }
225
226 switch (p->chunks[i].chunk_id) {
227 case AMDGPU_CHUNK_ID_IB:
228 p->num_ibs++;
229 break;
230
231 case AMDGPU_CHUNK_ID_FENCE:
232 size = sizeof(struct drm_amdgpu_cs_chunk_fence);
233 if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
234 uint32_t handle;
235 struct drm_gem_object *gobj;
236 struct drm_amdgpu_cs_chunk_fence *fence_data;
237
238 fence_data = (void *)p->chunks[i].kdata;
239 handle = fence_data->handle;
240 gobj = drm_gem_object_lookup(p->adev->ddev,
241 p->filp, handle);
242 if (gobj == NULL) {
243 ret = -EINVAL;
244 goto free_partial_kdata;
245 }
246
247 p->uf.bo = gem_to_amdgpu_bo(gobj);
248 p->uf.offset = fence_data->offset;
249 } else {
250 ret = -EINVAL;
251 goto free_partial_kdata;
252 }
253 break;
254
255 case AMDGPU_CHUNK_ID_DEPENDENCIES:
256 break;
257
258 default:
259 ret = -EINVAL;
260 goto free_partial_kdata;
261 }
262 }
263
264
265 p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
266 if (!p->ibs) {
267 ret = -ENOMEM;
268 goto free_all_kdata;
269 }
270
271 kfree(chunk_array);
272 return 0;
273
274free_all_kdata:
275 i = p->nchunks - 1;
276free_partial_kdata:
277 for (; i >= 0; i--)
278 drm_free_large(p->chunks[i].kdata);
279 kfree(p->chunks);
280put_bo_list:
281 if (p->bo_list)
282 amdgpu_bo_list_put(p->bo_list);
283 amdgpu_ctx_put(p->ctx);
284free_chunk:
285 kfree(chunk_array);
286
287 return ret;
288}
289
290
291
292static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
293{
294 u64 real_vram_size = adev->mc.real_vram_size;
295 u64 vram_usage = atomic64_read(&adev->vram_usage);
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336 u64 half_vram = real_vram_size >> 1;
337 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
338 u64 bytes_moved_threshold = half_free_vram >> 1;
339 return max(bytes_moved_threshold, 1024*1024ull);
340}
341
342int amdgpu_cs_list_validate(struct amdgpu_device *adev,
343 struct amdgpu_vm *vm,
344 struct list_head *validated)
345{
346 struct amdgpu_bo_list_entry *lobj;
347 struct amdgpu_bo *bo;
348 u64 bytes_moved = 0, initial_bytes_moved;
349 u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
350 int r;
351
352 list_for_each_entry(lobj, validated, tv.head) {
353 bo = lobj->robj;
354 if (!bo->pin_count) {
355 u32 domain = lobj->prefered_domains;
356 u32 current_domain =
357 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
358
359
360
361
362
363
364
365
366
367 if ((lobj->allowed_domains & current_domain) != 0 &&
368 (domain & current_domain) == 0 &&
369 bytes_moved > bytes_moved_threshold) {
370
371 domain = current_domain;
372 }
373
374 retry:
375 amdgpu_ttm_placement_from_domain(bo, domain);
376 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
377 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
378 bytes_moved += atomic64_read(&adev->num_bytes_moved) -
379 initial_bytes_moved;
380
381 if (unlikely(r)) {
382 if (r != -ERESTARTSYS && domain != lobj->allowed_domains) {
383 domain = lobj->allowed_domains;
384 goto retry;
385 }
386 return r;
387 }
388 }
389 lobj->bo_va = amdgpu_vm_bo_find(vm, bo);
390 }
391 return 0;
392}
393
394static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
395{
396 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
397 struct amdgpu_cs_buckets buckets;
398 struct list_head duplicates;
399 bool need_mmap_lock = false;
400 int i, r;
401
402 if (p->bo_list) {
403 need_mmap_lock = p->bo_list->has_userptr;
404 amdgpu_cs_buckets_init(&buckets);
405 for (i = 0; i < p->bo_list->num_entries; i++)
406 amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head,
407 p->bo_list->array[i].priority);
408
409 amdgpu_cs_buckets_get_list(&buckets, &p->validated);
410 }
411
412 p->vm_bos = amdgpu_vm_get_bos(p->adev, &fpriv->vm,
413 &p->validated);
414
415 if (need_mmap_lock)
416 down_read(¤t->mm->mmap_sem);
417
418 INIT_LIST_HEAD(&duplicates);
419 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
420 if (unlikely(r != 0))
421 goto error_reserve;
422
423 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
424 if (r)
425 goto error_validate;
426
427 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates);
428
429error_validate:
430 if (r)
431 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
432
433error_reserve:
434 if (need_mmap_lock)
435 up_read(¤t->mm->mmap_sem);
436
437 return r;
438}
439
440static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
441{
442 struct amdgpu_bo_list_entry *e;
443 int r;
444
445 list_for_each_entry(e, &p->validated, tv.head) {
446 struct reservation_object *resv = e->robj->tbo.resv;
447 r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp);
448
449 if (r)
450 return r;
451 }
452 return 0;
453}
454
455static int cmp_size_smaller_first(void *priv, struct list_head *a,
456 struct list_head *b)
457{
458 struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
459 struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
460
461
462 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
463}
464
465static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
466{
467 if (!error) {
468
469
470
471
472
473
474
475
476
477
478 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
479
480 ttm_eu_fence_buffer_objects(&parser->ticket,
481 &parser->validated,
482 &parser->ibs[parser->num_ibs-1].fence->base);
483 } else if (backoff) {
484 ttm_eu_backoff_reservation(&parser->ticket,
485 &parser->validated);
486 }
487}
488
489static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
490{
491 unsigned i;
492 if (parser->ctx)
493 amdgpu_ctx_put(parser->ctx);
494 if (parser->bo_list)
495 amdgpu_bo_list_put(parser->bo_list);
496
497 drm_free_large(parser->vm_bos);
498 for (i = 0; i < parser->nchunks; i++)
499 drm_free_large(parser->chunks[i].kdata);
500 kfree(parser->chunks);
501 if (!amdgpu_enable_scheduler)
502 {
503 if (parser->ibs)
504 for (i = 0; i < parser->num_ibs; i++)
505 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
506 kfree(parser->ibs);
507 if (parser->uf.bo)
508 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
509 }
510
511 kfree(parser);
512}
513
514
515
516
517
518
519
520
521
522static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
523{
524 amdgpu_cs_parser_fini_early(parser, error, backoff);
525 amdgpu_cs_parser_fini_late(parser);
526}
527
528static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
529 struct amdgpu_vm *vm)
530{
531 struct amdgpu_device *adev = p->adev;
532 struct amdgpu_bo_va *bo_va;
533 struct amdgpu_bo *bo;
534 int i, r;
535
536 r = amdgpu_vm_update_page_directory(adev, vm);
537 if (r)
538 return r;
539
540 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
541 if (r)
542 return r;
543
544 r = amdgpu_vm_clear_freed(adev, vm);
545 if (r)
546 return r;
547
548 if (p->bo_list) {
549 for (i = 0; i < p->bo_list->num_entries; i++) {
550 struct fence *f;
551
552
553 bo = p->bo_list->array[i].robj;
554 if (!bo)
555 continue;
556
557 bo_va = p->bo_list->array[i].bo_va;
558 if (bo_va == NULL)
559 continue;
560
561 r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem);
562 if (r)
563 return r;
564
565 f = bo_va->last_pt_update;
566 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
567 if (r)
568 return r;
569 }
570 }
571
572 return amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync);
573}
574
575static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
576 struct amdgpu_cs_parser *parser)
577{
578 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
579 struct amdgpu_vm *vm = &fpriv->vm;
580 struct amdgpu_ring *ring;
581 int i, r;
582
583 if (parser->num_ibs == 0)
584 return 0;
585
586
587 for (i = 0; i < parser->num_ibs; i++) {
588 ring = parser->ibs[i].ring;
589 if (ring->funcs->parse_cs) {
590 r = amdgpu_ring_parse_cs(ring, parser, i);
591 if (r)
592 return r;
593 }
594 }
595
596 mutex_lock(&vm->mutex);
597 r = amdgpu_bo_vm_update_pte(parser, vm);
598 if (r) {
599 goto out;
600 }
601 amdgpu_cs_sync_rings(parser);
602 if (!amdgpu_enable_scheduler)
603 r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
604 parser->filp);
605
606out:
607 mutex_unlock(&vm->mutex);
608 return r;
609}
610
611static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
612{
613 if (r == -EDEADLK) {
614 r = amdgpu_gpu_reset(adev);
615 if (!r)
616 r = -EAGAIN;
617 }
618 return r;
619}
620
621static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
622 struct amdgpu_cs_parser *parser)
623{
624 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
625 struct amdgpu_vm *vm = &fpriv->vm;
626 int i, j;
627 int r;
628
629 for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) {
630 struct amdgpu_cs_chunk *chunk;
631 struct amdgpu_ib *ib;
632 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
633 struct amdgpu_ring *ring;
634
635 chunk = &parser->chunks[i];
636 ib = &parser->ibs[j];
637 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
638
639 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
640 continue;
641
642 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
643 chunk_ib->ip_instance, chunk_ib->ring,
644 &ring);
645 if (r)
646 return r;
647
648 if (ring->funcs->parse_cs) {
649 struct amdgpu_bo_va_mapping *m;
650 struct amdgpu_bo *aobj = NULL;
651 uint64_t offset;
652 uint8_t *kptr;
653
654 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
655 &aobj);
656 if (!aobj) {
657 DRM_ERROR("IB va_start is invalid\n");
658 return -EINVAL;
659 }
660
661 if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
662 (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
663 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
664 return -EINVAL;
665 }
666
667
668 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
669 if (r) {
670 return r;
671 }
672
673 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
674 kptr += chunk_ib->va_start - offset;
675
676 r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib);
677 if (r) {
678 DRM_ERROR("Failed to get ib !\n");
679 return r;
680 }
681
682 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
683 amdgpu_bo_kunmap(aobj);
684 } else {
685 r = amdgpu_ib_get(ring, vm, 0, ib);
686 if (r) {
687 DRM_ERROR("Failed to get ib !\n");
688 return r;
689 }
690
691 ib->gpu_addr = chunk_ib->va_start;
692 }
693
694 ib->length_dw = chunk_ib->ib_bytes / 4;
695 ib->flags = chunk_ib->flags;
696 ib->ctx = parser->ctx;
697 j++;
698 }
699
700 if (!parser->num_ibs)
701 return 0;
702
703
704 if (parser->bo_list) {
705 struct amdgpu_bo *gds = parser->bo_list->gds_obj;
706 struct amdgpu_bo *gws = parser->bo_list->gws_obj;
707 struct amdgpu_bo *oa = parser->bo_list->oa_obj;
708 struct amdgpu_ib *ib = &parser->ibs[0];
709
710 if (gds) {
711 ib->gds_base = amdgpu_bo_gpu_offset(gds);
712 ib->gds_size = amdgpu_bo_size(gds);
713 }
714 if (gws) {
715 ib->gws_base = amdgpu_bo_gpu_offset(gws);
716 ib->gws_size = amdgpu_bo_size(gws);
717 }
718 if (oa) {
719 ib->oa_base = amdgpu_bo_gpu_offset(oa);
720 ib->oa_size = amdgpu_bo_size(oa);
721 }
722 }
723
724 if (parser->uf.bo) {
725 struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
726
727
728 if (ib->ring->type == AMDGPU_RING_TYPE_UVD ||
729 ib->ring->type == AMDGPU_RING_TYPE_VCE)
730 return -EINVAL;
731
732 ib->user = &parser->uf;
733 }
734
735 return 0;
736}
737
738static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
739 struct amdgpu_cs_parser *p)
740{
741 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
742 struct amdgpu_ib *ib;
743 int i, j, r;
744
745 if (!p->num_ibs)
746 return 0;
747
748
749 ib = &p->ibs[0];
750 for (i = 0; i < p->nchunks; ++i) {
751 struct drm_amdgpu_cs_chunk_dep *deps;
752 struct amdgpu_cs_chunk *chunk;
753 unsigned num_deps;
754
755 chunk = &p->chunks[i];
756
757 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
758 continue;
759
760 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
761 num_deps = chunk->length_dw * 4 /
762 sizeof(struct drm_amdgpu_cs_chunk_dep);
763
764 for (j = 0; j < num_deps; ++j) {
765 struct amdgpu_ring *ring;
766 struct amdgpu_ctx *ctx;
767 struct fence *fence;
768
769 r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
770 deps[j].ip_instance,
771 deps[j].ring, &ring);
772 if (r)
773 return r;
774
775 ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
776 if (ctx == NULL)
777 return -EINVAL;
778
779 fence = amdgpu_ctx_get_fence(ctx, ring,
780 deps[j].handle);
781 if (IS_ERR(fence)) {
782 r = PTR_ERR(fence);
783 amdgpu_ctx_put(ctx);
784 return r;
785
786 } else if (fence) {
787 r = amdgpu_sync_fence(adev, &ib->sync, fence);
788 fence_put(fence);
789 amdgpu_ctx_put(ctx);
790 if (r)
791 return r;
792 }
793 }
794 }
795
796 return 0;
797}
798
799static int amdgpu_cs_free_job(struct amdgpu_job *job)
800{
801 int i;
802 if (job->ibs)
803 for (i = 0; i < job->num_ibs; i++)
804 amdgpu_ib_free(job->adev, &job->ibs[i]);
805 kfree(job->ibs);
806 if (job->uf.bo)
807 drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
808 return 0;
809}
810
811int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
812{
813 struct amdgpu_device *adev = dev->dev_private;
814 union drm_amdgpu_cs *cs = data;
815 struct amdgpu_cs_parser *parser;
816 bool reserved_buffers = false;
817 int i, r;
818
819 down_read(&adev->exclusive_lock);
820 if (!adev->accel_working) {
821 up_read(&adev->exclusive_lock);
822 return -EBUSY;
823 }
824
825 parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
826 if (!parser)
827 return -ENOMEM;
828 r = amdgpu_cs_parser_init(parser, data);
829 if (r) {
830 DRM_ERROR("Failed to initialize parser !\n");
831 kfree(parser);
832 up_read(&adev->exclusive_lock);
833 r = amdgpu_cs_handle_lockup(adev, r);
834 return r;
835 }
836
837 r = amdgpu_cs_parser_relocs(parser);
838 if (r == -ENOMEM)
839 DRM_ERROR("Not enough memory for command submission!\n");
840 else if (r && r != -ERESTARTSYS)
841 DRM_ERROR("Failed to process the buffer list %d!\n", r);
842 else if (!r) {
843 reserved_buffers = true;
844 r = amdgpu_cs_ib_fill(adev, parser);
845 }
846
847 if (!r) {
848 r = amdgpu_cs_dependencies(adev, parser);
849 if (r)
850 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
851 }
852
853 if (r)
854 goto out;
855
856 for (i = 0; i < parser->num_ibs; i++)
857 trace_amdgpu_cs(parser, i);
858
859 r = amdgpu_cs_ib_vm_chunk(adev, parser);
860 if (r)
861 goto out;
862
863 if (amdgpu_enable_scheduler && parser->num_ibs) {
864 struct amdgpu_job *job;
865 struct amdgpu_ring * ring = parser->ibs->ring;
866 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
867 if (!job)
868 return -ENOMEM;
869 job->base.sched = &ring->sched;
870 job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
871 job->adev = parser->adev;
872 job->ibs = parser->ibs;
873 job->num_ibs = parser->num_ibs;
874 job->base.owner = parser->filp;
875 mutex_init(&job->job_lock);
876 if (job->ibs[job->num_ibs - 1].user) {
877 memcpy(&job->uf, &parser->uf,
878 sizeof(struct amdgpu_user_fence));
879 job->ibs[job->num_ibs - 1].user = &job->uf;
880 }
881
882 job->free_job = amdgpu_cs_free_job;
883 mutex_lock(&job->job_lock);
884 r = amd_sched_entity_push_job(&job->base);
885 if (r) {
886 mutex_unlock(&job->job_lock);
887 amdgpu_cs_free_job(job);
888 kfree(job);
889 goto out;
890 }
891 cs->out.handle =
892 amdgpu_ctx_add_fence(parser->ctx, ring,
893 &job->base.s_fence->base);
894 parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
895
896 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
897 ttm_eu_fence_buffer_objects(&parser->ticket,
898 &parser->validated,
899 &job->base.s_fence->base);
900
901 mutex_unlock(&job->job_lock);
902 amdgpu_cs_parser_fini_late(parser);
903 up_read(&adev->exclusive_lock);
904 return 0;
905 }
906
907 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
908out:
909 amdgpu_cs_parser_fini(parser, r, reserved_buffers);
910 up_read(&adev->exclusive_lock);
911 r = amdgpu_cs_handle_lockup(adev, r);
912 return r;
913}
914
915
916
917
918
919
920
921
922
923
924int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
925 struct drm_file *filp)
926{
927 union drm_amdgpu_wait_cs *wait = data;
928 struct amdgpu_device *adev = dev->dev_private;
929 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
930 struct amdgpu_ring *ring = NULL;
931 struct amdgpu_ctx *ctx;
932 struct fence *fence;
933 long r;
934
935 r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
936 wait->in.ring, &ring);
937 if (r)
938 return r;
939
940 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
941 if (ctx == NULL)
942 return -EINVAL;
943
944 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
945 if (IS_ERR(fence))
946 r = PTR_ERR(fence);
947 else if (fence) {
948 r = fence_wait_timeout(fence, true, timeout);
949 fence_put(fence);
950 } else
951 r = 1;
952
953 amdgpu_ctx_put(ctx);
954 if (r < 0)
955 return r;
956
957 memset(wait, 0, sizeof(*wait));
958 wait->out.status = (r == 0);
959
960 return 0;
961}
962
963
964
965
966
967
968
969
970
971
972
973
974struct amdgpu_bo_va_mapping *
975amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
976 uint64_t addr, struct amdgpu_bo **bo)
977{
978 struct amdgpu_bo_list_entry *reloc;
979 struct amdgpu_bo_va_mapping *mapping;
980
981 addr /= AMDGPU_GPU_PAGE_SIZE;
982
983 list_for_each_entry(reloc, &parser->validated, tv.head) {
984 if (!reloc->bo_va)
985 continue;
986
987 list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
988 if (mapping->it.start > addr ||
989 addr > mapping->it.last)
990 continue;
991
992 *bo = reloc->bo_va->bo;
993 return mapping;
994 }
995
996 list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
997 if (mapping->it.start > addr ||
998 addr > mapping->it.last)
999 continue;
1000
1001 *bo = reloc->bo_va->bo;
1002 return mapping;
1003 }
1004 }
1005
1006 return NULL;
1007}
1008