1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/dma-buf.h>
23#include <linux/list.h>
24#include <linux/pagemap.h>
25#include <linux/sched/mm.h>
26#include <linux/sched/task.h>
27
28#include "amdgpu_object.h"
29#include "amdgpu_gem.h"
30#include "amdgpu_vm.h"
31#include "amdgpu_amdkfd.h"
32#include "amdgpu_dma_buf.h"
33#include <uapi/linux/kfd_ioctl.h>
34#include "amdgpu_xgmi.h"
35
36
37#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
38
39
40
41
42#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
43
44
45static struct {
46 uint64_t max_system_mem_limit;
47 uint64_t max_ttm_mem_limit;
48 int64_t system_mem_used;
49 int64_t ttm_mem_used;
50 spinlock_t mem_limit_lock;
51} kfd_mem_limit;
52
53
54struct amdgpu_vm_parser {
55 uint32_t domain;
56 bool wait;
57};
58
59static const char * const domain_bit_to_string[] = {
60 "CPU",
61 "GTT",
62 "VRAM",
63 "GDS",
64 "GWS",
65 "OA"
66};
67
68#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
69
70static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
71
72
73static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
74{
75 return (struct amdgpu_device *)kgd;
76}
77
78static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
79 struct kgd_mem *mem)
80{
81 struct kfd_bo_va_list *entry;
82
83 list_for_each_entry(entry, &mem->bo_va_list, bo_list)
84 if (entry->bo_va->base.vm == avm)
85 return false;
86
87 return true;
88}
89
90
91
92
93
94void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
95{
96 struct sysinfo si;
97 uint64_t mem;
98
99 si_meminfo(&si);
100 mem = si.freeram - si.freehigh;
101 mem *= si.mem_unit;
102
103 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
104 kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
105 kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
106 pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
107 (kfd_mem_limit.max_system_mem_limit >> 20),
108 (kfd_mem_limit.max_ttm_mem_limit >> 20));
109}
110
111
112
113
114
115
116
117
118
119
120
121#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
122
123static size_t amdgpu_amdkfd_acc_size(uint64_t size)
124{
125 size >>= PAGE_SHIFT;
126 size *= sizeof(dma_addr_t) + sizeof(void *);
127
128 return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) +
129 __roundup_pow_of_two(sizeof(struct ttm_tt)) +
130 PAGE_ALIGN(size);
131}
132
133static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
134 uint64_t size, u32 domain, bool sg)
135{
136 uint64_t reserved_for_pt =
137 ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
138 size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
139 int ret = 0;
140
141 acc_size = amdgpu_amdkfd_acc_size(size);
142
143 vram_needed = 0;
144 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
145
146 system_mem_needed = acc_size + size;
147 ttm_mem_needed = acc_size + size;
148 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
149
150 system_mem_needed = acc_size + size;
151 ttm_mem_needed = acc_size;
152 } else {
153
154 system_mem_needed = acc_size;
155 ttm_mem_needed = acc_size;
156 if (domain == AMDGPU_GEM_DOMAIN_VRAM)
157 vram_needed = size;
158 }
159
160 spin_lock(&kfd_mem_limit.mem_limit_lock);
161
162 if (kfd_mem_limit.system_mem_used + system_mem_needed >
163 kfd_mem_limit.max_system_mem_limit)
164 pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
165
166 if ((kfd_mem_limit.system_mem_used + system_mem_needed >
167 kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
168 (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
169 kfd_mem_limit.max_ttm_mem_limit) ||
170 (adev->kfd.vram_used + vram_needed >
171 adev->gmc.real_vram_size - reserved_for_pt)) {
172 ret = -ENOMEM;
173 } else {
174 kfd_mem_limit.system_mem_used += system_mem_needed;
175 kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
176 adev->kfd.vram_used += vram_needed;
177 }
178
179 spin_unlock(&kfd_mem_limit.mem_limit_lock);
180 return ret;
181}
182
183static void unreserve_mem_limit(struct amdgpu_device *adev,
184 uint64_t size, u32 domain, bool sg)
185{
186 size_t acc_size;
187
188 acc_size = amdgpu_amdkfd_acc_size(size);
189
190 spin_lock(&kfd_mem_limit.mem_limit_lock);
191 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
192 kfd_mem_limit.system_mem_used -= (acc_size + size);
193 kfd_mem_limit.ttm_mem_used -= (acc_size + size);
194 } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
195 kfd_mem_limit.system_mem_used -= (acc_size + size);
196 kfd_mem_limit.ttm_mem_used -= acc_size;
197 } else {
198 kfd_mem_limit.system_mem_used -= acc_size;
199 kfd_mem_limit.ttm_mem_used -= acc_size;
200 if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
201 adev->kfd.vram_used -= size;
202 WARN_ONCE(adev->kfd.vram_used < 0,
203 "kfd VRAM memory accounting unbalanced");
204 }
205 }
206 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
207 "kfd system memory accounting unbalanced");
208 WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
209 "kfd TTM memory accounting unbalanced");
210
211 spin_unlock(&kfd_mem_limit.mem_limit_lock);
212}
213
214void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
215{
216 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
217 u32 domain = bo->preferred_domains;
218 bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
219
220 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
221 domain = AMDGPU_GEM_DOMAIN_CPU;
222 sg = false;
223 }
224
225 unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
226}
227
228
229
230
231
232
233
234
235
236
237
238static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
239 struct amdgpu_amdkfd_fence *ef)
240{
241 struct dma_resv *resv = bo->tbo.base.resv;
242 struct dma_resv_list *old, *new;
243 unsigned int i, j, k;
244
245 if (!ef)
246 return -EINVAL;
247
248 old = dma_resv_get_list(resv);
249 if (!old)
250 return 0;
251
252 new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
253 if (!new)
254 return -ENOMEM;
255
256
257
258
259 for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
260 struct dma_fence *f;
261
262 f = rcu_dereference_protected(old->shared[i],
263 dma_resv_held(resv));
264
265 if (f->context == ef->base.context)
266 RCU_INIT_POINTER(new->shared[--j], f);
267 else
268 RCU_INIT_POINTER(new->shared[k++], f);
269 }
270 new->shared_max = old->shared_max;
271 new->shared_count = k;
272
273
274 write_seqcount_begin(&resv->seq);
275 RCU_INIT_POINTER(resv->fence, new);
276 write_seqcount_end(&resv->seq);
277
278
279 for (i = j, k = 0; i < old->shared_count; ++i) {
280 struct dma_fence *f;
281
282 f = rcu_dereference_protected(new->shared[i],
283 dma_resv_held(resv));
284 dma_fence_put(f);
285 }
286 kfree_rcu(old, rcu);
287
288 return 0;
289}
290
291int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
292{
293 struct amdgpu_bo *root = bo;
294 struct amdgpu_vm_bo_base *vm_bo;
295 struct amdgpu_vm *vm;
296 struct amdkfd_process_info *info;
297 struct amdgpu_amdkfd_fence *ef;
298 int ret;
299
300
301 while (root->parent)
302 root = root->parent;
303
304 vm_bo = root->vm_bo;
305 if (!vm_bo)
306 return 0;
307
308 vm = vm_bo->vm;
309 if (!vm)
310 return 0;
311
312 info = vm->process_info;
313 if (!info || !info->eviction_fence)
314 return 0;
315
316 ef = container_of(dma_fence_get(&info->eviction_fence->base),
317 struct amdgpu_amdkfd_fence, base);
318
319 BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
320 ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
321 dma_resv_unlock(bo->tbo.base.resv);
322
323 dma_fence_put(&ef->base);
324 return ret;
325}
326
327static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
328 bool wait)
329{
330 struct ttm_operation_ctx ctx = { false, false };
331 int ret;
332
333 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
334 "Called with userptr BO"))
335 return -EINVAL;
336
337 amdgpu_bo_placement_from_domain(bo, domain);
338
339 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
340 if (ret)
341 goto validate_fail;
342 if (wait)
343 amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
344
345validate_fail:
346 return ret;
347}
348
349static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
350{
351 struct amdgpu_vm_parser *p = param;
352
353 return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
354}
355
356
357
358
359
360
361
362
363static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
364{
365 struct amdgpu_bo *pd = vm->root.base.bo;
366 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
367 struct amdgpu_vm_parser param;
368 int ret;
369
370 param.domain = AMDGPU_GEM_DOMAIN_VRAM;
371 param.wait = false;
372
373 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
374 ¶m);
375 if (ret) {
376 pr_err("failed to validate PT BOs\n");
377 return ret;
378 }
379
380 ret = amdgpu_amdkfd_validate(¶m, pd);
381 if (ret) {
382 pr_err("failed to validate PD\n");
383 return ret;
384 }
385
386 vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
387
388 if (vm->use_cpu_for_update) {
389 ret = amdgpu_bo_kmap(pd, NULL);
390 if (ret) {
391 pr_err("failed to kmap PD, ret=%d\n", ret);
392 return ret;
393 }
394 }
395
396 return 0;
397}
398
399static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
400{
401 struct amdgpu_bo *pd = vm->root.base.bo;
402 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
403 int ret;
404
405 ret = amdgpu_vm_update_pdes(adev, vm, false);
406 if (ret)
407 return ret;
408
409 return amdgpu_sync_fence(sync, vm->last_update);
410}
411
412static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
413{
414 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
415 bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
416 bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
417 uint32_t mapping_flags;
418 uint64_t pte_flags;
419 bool snoop = false;
420
421 mapping_flags = AMDGPU_VM_PAGE_READABLE;
422 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
423 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
424 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
425 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
426
427 switch (adev->asic_type) {
428 case CHIP_ARCTURUS:
429 if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
430 if (bo_adev == adev)
431 mapping_flags |= coherent ?
432 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
433 else
434 mapping_flags |= AMDGPU_VM_MTYPE_UC;
435 } else {
436 mapping_flags |= coherent ?
437 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
438 }
439 break;
440 case CHIP_ALDEBARAN:
441 if (coherent && uncached) {
442 if (adev->gmc.xgmi.connected_to_cpu ||
443 !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
444 snoop = true;
445 mapping_flags |= AMDGPU_VM_MTYPE_UC;
446 } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
447 if (bo_adev == adev) {
448 mapping_flags |= coherent ?
449 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
450 if (adev->gmc.xgmi.connected_to_cpu)
451 snoop = true;
452 } else {
453 mapping_flags |= AMDGPU_VM_MTYPE_UC;
454 if (amdgpu_xgmi_same_hive(adev, bo_adev))
455 snoop = true;
456 }
457 } else {
458 snoop = true;
459 mapping_flags |= coherent ?
460 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
461 }
462 break;
463 default:
464 mapping_flags |= coherent ?
465 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
466 }
467
468 pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
469 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
470
471 return pte_flags;
472}
473
474
475
476
477
478
479
480
481
482
483
484
485
486static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
487 struct amdgpu_vm *vm, bool is_aql,
488 struct kfd_bo_va_list **p_bo_va_entry)
489{
490 int ret;
491 struct kfd_bo_va_list *bo_va_entry;
492 struct amdgpu_bo *bo = mem->bo;
493 uint64_t va = mem->va;
494 struct list_head *list_bo_va = &mem->bo_va_list;
495 unsigned long bo_size = bo->tbo.base.size;
496
497 if (!va) {
498 pr_err("Invalid VA when adding BO to VM\n");
499 return -EINVAL;
500 }
501
502 if (is_aql)
503 va += bo_size;
504
505 bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
506 if (!bo_va_entry)
507 return -ENOMEM;
508
509 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
510 va + bo_size, vm);
511
512
513 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
514 if (!bo_va_entry->bo_va) {
515 ret = -EINVAL;
516 pr_err("Failed to add BO object to VM. ret == %d\n",
517 ret);
518 goto err_vmadd;
519 }
520
521 bo_va_entry->va = va;
522 bo_va_entry->pte_flags = get_pte_flags(adev, mem);
523 bo_va_entry->kgd_dev = (void *)adev;
524 list_add(&bo_va_entry->bo_list, list_bo_va);
525
526 if (p_bo_va_entry)
527 *p_bo_va_entry = bo_va_entry;
528
529
530 ret = vm_validate_pt_pd_bos(vm);
531 if (ret) {
532 pr_err("validate_pt_pd_bos() failed\n");
533 goto err_alloc_pts;
534 }
535
536 return 0;
537
538err_alloc_pts:
539 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
540 list_del(&bo_va_entry->bo_list);
541err_vmadd:
542 kfree(bo_va_entry);
543 return ret;
544}
545
546static void remove_bo_from_vm(struct amdgpu_device *adev,
547 struct kfd_bo_va_list *entry, unsigned long size)
548{
549 pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
550 entry->va,
551 entry->va + size, entry);
552 amdgpu_vm_bo_rmv(adev, entry->bo_va);
553 list_del(&entry->bo_list);
554 kfree(entry);
555}
556
557static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
558 struct amdkfd_process_info *process_info,
559 bool userptr)
560{
561 struct ttm_validate_buffer *entry = &mem->validate_list;
562 struct amdgpu_bo *bo = mem->bo;
563
564 INIT_LIST_HEAD(&entry->head);
565 entry->num_shared = 1;
566 entry->bo = &bo->tbo;
567 mutex_lock(&process_info->lock);
568 if (userptr)
569 list_add_tail(&entry->head, &process_info->userptr_valid_list);
570 else
571 list_add_tail(&entry->head, &process_info->kfd_bo_list);
572 mutex_unlock(&process_info->lock);
573}
574
575static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
576 struct amdkfd_process_info *process_info)
577{
578 struct ttm_validate_buffer *bo_list_entry;
579
580 bo_list_entry = &mem->validate_list;
581 mutex_lock(&process_info->lock);
582 list_del(&bo_list_entry->head);
583 mutex_unlock(&process_info->lock);
584}
585
586
587
588
589
590
591
592
593
594
595
596
597
598static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
599{
600 struct amdkfd_process_info *process_info = mem->process_info;
601 struct amdgpu_bo *bo = mem->bo;
602 struct ttm_operation_ctx ctx = { true, false };
603 int ret = 0;
604
605 mutex_lock(&process_info->lock);
606
607 ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
608 if (ret) {
609 pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
610 goto out;
611 }
612
613 ret = amdgpu_mn_register(bo, user_addr);
614 if (ret) {
615 pr_err("%s: Failed to register MMU notifier: %d\n",
616 __func__, ret);
617 goto out;
618 }
619
620 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
621 if (ret) {
622 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
623 goto unregister_out;
624 }
625
626 ret = amdgpu_bo_reserve(bo, true);
627 if (ret) {
628 pr_err("%s: Failed to reserve BO\n", __func__);
629 goto release_out;
630 }
631 amdgpu_bo_placement_from_domain(bo, mem->domain);
632 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
633 if (ret)
634 pr_err("%s: failed to validate BO\n", __func__);
635 amdgpu_bo_unreserve(bo);
636
637release_out:
638 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
639unregister_out:
640 if (ret)
641 amdgpu_mn_unregister(bo);
642out:
643 mutex_unlock(&process_info->lock);
644 return ret;
645}
646
647
648
649
650
651
652struct bo_vm_reservation_context {
653 struct amdgpu_bo_list_entry kfd_bo;
654 unsigned int n_vms;
655 struct amdgpu_bo_list_entry *vm_pd;
656 struct ww_acquire_ctx ticket;
657 struct list_head list, duplicates;
658 struct amdgpu_sync *sync;
659 bool reserved;
660};
661
662enum bo_vm_match {
663 BO_VM_NOT_MAPPED = 0,
664 BO_VM_MAPPED,
665 BO_VM_ALL,
666};
667
668
669
670
671
672
673
674static int reserve_bo_and_vm(struct kgd_mem *mem,
675 struct amdgpu_vm *vm,
676 struct bo_vm_reservation_context *ctx)
677{
678 struct amdgpu_bo *bo = mem->bo;
679 int ret;
680
681 WARN_ON(!vm);
682
683 ctx->reserved = false;
684 ctx->n_vms = 1;
685 ctx->sync = &mem->sync;
686
687 INIT_LIST_HEAD(&ctx->list);
688 INIT_LIST_HEAD(&ctx->duplicates);
689
690 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
691 if (!ctx->vm_pd)
692 return -ENOMEM;
693
694 ctx->kfd_bo.priority = 0;
695 ctx->kfd_bo.tv.bo = &bo->tbo;
696 ctx->kfd_bo.tv.num_shared = 1;
697 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
698
699 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
700
701 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
702 false, &ctx->duplicates);
703 if (ret) {
704 pr_err("Failed to reserve buffers in ttm.\n");
705 kfree(ctx->vm_pd);
706 ctx->vm_pd = NULL;
707 return ret;
708 }
709
710 ctx->reserved = true;
711 return 0;
712}
713
714
715
716
717
718
719
720
721
722
723
724static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
725 struct amdgpu_vm *vm, enum bo_vm_match map_type,
726 struct bo_vm_reservation_context *ctx)
727{
728 struct amdgpu_bo *bo = mem->bo;
729 struct kfd_bo_va_list *entry;
730 unsigned int i;
731 int ret;
732
733 ctx->reserved = false;
734 ctx->n_vms = 0;
735 ctx->vm_pd = NULL;
736 ctx->sync = &mem->sync;
737
738 INIT_LIST_HEAD(&ctx->list);
739 INIT_LIST_HEAD(&ctx->duplicates);
740
741 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
742 if ((vm && vm != entry->bo_va->base.vm) ||
743 (entry->is_mapped != map_type
744 && map_type != BO_VM_ALL))
745 continue;
746
747 ctx->n_vms++;
748 }
749
750 if (ctx->n_vms != 0) {
751 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
752 GFP_KERNEL);
753 if (!ctx->vm_pd)
754 return -ENOMEM;
755 }
756
757 ctx->kfd_bo.priority = 0;
758 ctx->kfd_bo.tv.bo = &bo->tbo;
759 ctx->kfd_bo.tv.num_shared = 1;
760 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
761
762 i = 0;
763 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
764 if ((vm && vm != entry->bo_va->base.vm) ||
765 (entry->is_mapped != map_type
766 && map_type != BO_VM_ALL))
767 continue;
768
769 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
770 &ctx->vm_pd[i]);
771 i++;
772 }
773
774 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
775 false, &ctx->duplicates);
776 if (ret) {
777 pr_err("Failed to reserve buffers in ttm.\n");
778 kfree(ctx->vm_pd);
779 ctx->vm_pd = NULL;
780 return ret;
781 }
782
783 ctx->reserved = true;
784 return 0;
785}
786
787
788
789
790
791
792
793
794
795
796
797static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
798 bool wait, bool intr)
799{
800 int ret = 0;
801
802 if (wait)
803 ret = amdgpu_sync_wait(ctx->sync, intr);
804
805 if (ctx->reserved)
806 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
807 kfree(ctx->vm_pd);
808
809 ctx->sync = NULL;
810
811 ctx->reserved = false;
812 ctx->vm_pd = NULL;
813
814 return ret;
815}
816
817static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
818 struct kfd_bo_va_list *entry,
819 struct amdgpu_sync *sync)
820{
821 struct amdgpu_bo_va *bo_va = entry->bo_va;
822 struct amdgpu_vm *vm = bo_va->base.vm;
823
824 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
825
826 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
827
828 amdgpu_sync_fence(sync, bo_va->last_pt_update);
829
830 return 0;
831}
832
833static int update_gpuvm_pte(struct amdgpu_device *adev,
834 struct kfd_bo_va_list *entry,
835 struct amdgpu_sync *sync)
836{
837 int ret;
838 struct amdgpu_bo_va *bo_va = entry->bo_va;
839
840
841 ret = amdgpu_vm_bo_update(adev, bo_va, false);
842 if (ret) {
843 pr_err("amdgpu_vm_bo_update failed\n");
844 return ret;
845 }
846
847 return amdgpu_sync_fence(sync, bo_va->last_pt_update);
848}
849
850static int map_bo_to_gpuvm(struct amdgpu_device *adev,
851 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
852 bool no_update_pte)
853{
854 int ret;
855
856
857 ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
858 amdgpu_bo_size(entry->bo_va->base.bo),
859 entry->pte_flags);
860 if (ret) {
861 pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
862 entry->va, ret);
863 return ret;
864 }
865
866 if (no_update_pte)
867 return 0;
868
869 ret = update_gpuvm_pte(adev, entry, sync);
870 if (ret) {
871 pr_err("update_gpuvm_pte() failed\n");
872 goto update_gpuvm_pte_failed;
873 }
874
875 return 0;
876
877update_gpuvm_pte_failed:
878 unmap_bo_from_gpuvm(adev, entry, sync);
879 return ret;
880}
881
882static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
883{
884 struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
885
886 if (!sg)
887 return NULL;
888 if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
889 kfree(sg);
890 return NULL;
891 }
892 sg->sgl->dma_address = addr;
893 sg->sgl->length = size;
894#ifdef CONFIG_NEED_SG_DMA_LENGTH
895 sg->sgl->dma_length = size;
896#endif
897 return sg;
898}
899
900static int process_validate_vms(struct amdkfd_process_info *process_info)
901{
902 struct amdgpu_vm *peer_vm;
903 int ret;
904
905 list_for_each_entry(peer_vm, &process_info->vm_list_head,
906 vm_list_node) {
907 ret = vm_validate_pt_pd_bos(peer_vm);
908 if (ret)
909 return ret;
910 }
911
912 return 0;
913}
914
915static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
916 struct amdgpu_sync *sync)
917{
918 struct amdgpu_vm *peer_vm;
919 int ret;
920
921 list_for_each_entry(peer_vm, &process_info->vm_list_head,
922 vm_list_node) {
923 struct amdgpu_bo *pd = peer_vm->root.base.bo;
924
925 ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
926 AMDGPU_SYNC_NE_OWNER,
927 AMDGPU_FENCE_OWNER_KFD);
928 if (ret)
929 return ret;
930 }
931
932 return 0;
933}
934
935static int process_update_pds(struct amdkfd_process_info *process_info,
936 struct amdgpu_sync *sync)
937{
938 struct amdgpu_vm *peer_vm;
939 int ret;
940
941 list_for_each_entry(peer_vm, &process_info->vm_list_head,
942 vm_list_node) {
943 ret = vm_update_pds(peer_vm, sync);
944 if (ret)
945 return ret;
946 }
947
948 return 0;
949}
950
951static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
952 struct dma_fence **ef)
953{
954 struct amdkfd_process_info *info = NULL;
955 int ret;
956
957 if (!*process_info) {
958 info = kzalloc(sizeof(*info), GFP_KERNEL);
959 if (!info)
960 return -ENOMEM;
961
962 mutex_init(&info->lock);
963 INIT_LIST_HEAD(&info->vm_list_head);
964 INIT_LIST_HEAD(&info->kfd_bo_list);
965 INIT_LIST_HEAD(&info->userptr_valid_list);
966 INIT_LIST_HEAD(&info->userptr_inval_list);
967
968 info->eviction_fence =
969 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
970 current->mm);
971 if (!info->eviction_fence) {
972 pr_err("Failed to create eviction fence\n");
973 ret = -ENOMEM;
974 goto create_evict_fence_fail;
975 }
976
977 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
978 atomic_set(&info->evicted_bos, 0);
979 INIT_DELAYED_WORK(&info->restore_userptr_work,
980 amdgpu_amdkfd_restore_userptr_worker);
981
982 *process_info = info;
983 *ef = dma_fence_get(&info->eviction_fence->base);
984 }
985
986 vm->process_info = *process_info;
987
988
989 ret = amdgpu_bo_reserve(vm->root.base.bo, true);
990 if (ret)
991 goto reserve_pd_fail;
992 ret = vm_validate_pt_pd_bos(vm);
993 if (ret) {
994 pr_err("validate_pt_pd_bos() failed\n");
995 goto validate_pd_fail;
996 }
997 ret = amdgpu_bo_sync_wait(vm->root.base.bo,
998 AMDGPU_FENCE_OWNER_KFD, false);
999 if (ret)
1000 goto wait_pd_fail;
1001 ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
1002 if (ret)
1003 goto reserve_shared_fail;
1004 amdgpu_bo_fence(vm->root.base.bo,
1005 &vm->process_info->eviction_fence->base, true);
1006 amdgpu_bo_unreserve(vm->root.base.bo);
1007
1008
1009 mutex_lock(&vm->process_info->lock);
1010 list_add_tail(&vm->vm_list_node,
1011 &(vm->process_info->vm_list_head));
1012 vm->process_info->n_vms++;
1013 mutex_unlock(&vm->process_info->lock);
1014
1015 return 0;
1016
1017reserve_shared_fail:
1018wait_pd_fail:
1019validate_pd_fail:
1020 amdgpu_bo_unreserve(vm->root.base.bo);
1021reserve_pd_fail:
1022 vm->process_info = NULL;
1023 if (info) {
1024
1025 dma_fence_put(&info->eviction_fence->base);
1026 dma_fence_put(*ef);
1027 *ef = NULL;
1028 *process_info = NULL;
1029 put_pid(info->pid);
1030create_evict_fence_fail:
1031 mutex_destroy(&info->lock);
1032 kfree(info);
1033 }
1034 return ret;
1035}
1036
1037int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
1038 struct file *filp, u32 pasid,
1039 void **vm, void **process_info,
1040 struct dma_fence **ef)
1041{
1042 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1043 struct drm_file *drm_priv = filp->private_data;
1044 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
1045 struct amdgpu_vm *avm = &drv_priv->vm;
1046 int ret;
1047
1048
1049 if (avm->process_info)
1050 return -EINVAL;
1051
1052
1053 ret = amdgpu_vm_make_compute(adev, avm, pasid);
1054 if (ret)
1055 return ret;
1056
1057
1058 ret = init_kfd_vm(avm, process_info, ef);
1059 if (ret)
1060 return ret;
1061
1062 *vm = (void *)avm;
1063
1064 return 0;
1065}
1066
1067void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
1068 struct amdgpu_vm *vm)
1069{
1070 struct amdkfd_process_info *process_info = vm->process_info;
1071 struct amdgpu_bo *pd = vm->root.base.bo;
1072
1073 if (!process_info)
1074 return;
1075
1076
1077 amdgpu_bo_reserve(pd, false);
1078 amdgpu_bo_fence(pd, NULL, false);
1079 amdgpu_bo_unreserve(pd);
1080
1081
1082 mutex_lock(&process_info->lock);
1083 process_info->n_vms--;
1084 list_del(&vm->vm_list_node);
1085 mutex_unlock(&process_info->lock);
1086
1087 vm->process_info = NULL;
1088
1089
1090 if (!process_info->n_vms) {
1091 WARN_ON(!list_empty(&process_info->kfd_bo_list));
1092 WARN_ON(!list_empty(&process_info->userptr_valid_list));
1093 WARN_ON(!list_empty(&process_info->userptr_inval_list));
1094
1095 dma_fence_put(&process_info->eviction_fence->base);
1096 cancel_delayed_work_sync(&process_info->restore_userptr_work);
1097 put_pid(process_info->pid);
1098 mutex_destroy(&process_info->lock);
1099 kfree(process_info);
1100 }
1101}
1102
1103void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
1104{
1105 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1106 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1107
1108 if (WARN_ON(!kgd || !vm))
1109 return;
1110
1111 pr_debug("Releasing process vm %p\n", vm);
1112
1113
1114
1115
1116
1117
1118
1119 amdgpu_vm_release_compute(adev, avm);
1120}
1121
1122uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
1123{
1124 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1125 struct amdgpu_bo *pd = avm->root.base.bo;
1126 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
1127
1128 if (adev->asic_type < CHIP_VEGA10)
1129 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
1130 return avm->pd_phys_addr;
1131}
1132
1133int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1134 struct kgd_dev *kgd, uint64_t va, uint64_t size,
1135 void *vm, struct kgd_mem **mem,
1136 uint64_t *offset, uint32_t flags)
1137{
1138 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1139 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1140 enum ttm_bo_type bo_type = ttm_bo_type_device;
1141 struct sg_table *sg = NULL;
1142 uint64_t user_addr = 0;
1143 struct amdgpu_bo *bo;
1144 struct drm_gem_object *gobj;
1145 u32 domain, alloc_domain;
1146 u64 alloc_flags;
1147 int ret;
1148
1149
1150
1151
1152 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
1153 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1154 alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
1155 alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
1156 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
1157 AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1158 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
1159 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1160 alloc_flags = 0;
1161 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1162 domain = AMDGPU_GEM_DOMAIN_GTT;
1163 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1164 alloc_flags = 0;
1165 if (!offset || !*offset)
1166 return -EINVAL;
1167 user_addr = untagged_addr(*offset);
1168 } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
1169 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1170 domain = AMDGPU_GEM_DOMAIN_GTT;
1171 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1172 bo_type = ttm_bo_type_sg;
1173 alloc_flags = 0;
1174 if (size > UINT_MAX)
1175 return -EINVAL;
1176 sg = create_doorbell_sg(*offset, size);
1177 if (!sg)
1178 return -ENOMEM;
1179 } else {
1180 return -EINVAL;
1181 }
1182
1183 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1184 if (!*mem) {
1185 ret = -ENOMEM;
1186 goto err;
1187 }
1188 INIT_LIST_HEAD(&(*mem)->bo_va_list);
1189 mutex_init(&(*mem)->lock);
1190 (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1191
1192
1193
1194
1195
1196 if ((*mem)->aql_queue)
1197 size = size >> 1;
1198
1199 (*mem)->alloc_flags = flags;
1200
1201 amdgpu_sync_create(&(*mem)->sync);
1202
1203 ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
1204 if (ret) {
1205 pr_debug("Insufficient memory\n");
1206 goto err_reserve_limit;
1207 }
1208
1209 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
1210 va, size, domain_string(alloc_domain));
1211
1212 ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
1213 bo_type, NULL, &gobj);
1214 if (ret) {
1215 pr_debug("Failed to create BO on domain %s. ret %d\n",
1216 domain_string(alloc_domain), ret);
1217 goto err_bo_create;
1218 }
1219 bo = gem_to_amdgpu_bo(gobj);
1220 if (bo_type == ttm_bo_type_sg) {
1221 bo->tbo.sg = sg;
1222 bo->tbo.ttm->sg = sg;
1223 }
1224 bo->kfd_bo = *mem;
1225 (*mem)->bo = bo;
1226 if (user_addr)
1227 bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
1228
1229 (*mem)->va = va;
1230 (*mem)->domain = domain;
1231 (*mem)->mapped_to_gpu_memory = 0;
1232 (*mem)->process_info = avm->process_info;
1233 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
1234
1235 if (user_addr) {
1236 ret = init_user_pages(*mem, user_addr);
1237 if (ret)
1238 goto allocate_init_user_pages_failed;
1239 }
1240
1241 if (offset)
1242 *offset = amdgpu_bo_mmap_offset(bo);
1243
1244 return 0;
1245
1246allocate_init_user_pages_failed:
1247 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
1248 amdgpu_bo_unref(&bo);
1249
1250 goto err_reserve_limit;
1251err_bo_create:
1252 unreserve_mem_limit(adev, size, alloc_domain, !!sg);
1253err_reserve_limit:
1254 mutex_destroy(&(*mem)->lock);
1255 kfree(*mem);
1256err:
1257 if (sg) {
1258 sg_free_table(sg);
1259 kfree(sg);
1260 }
1261 return ret;
1262}
1263
1264int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1265 struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
1266{
1267 struct amdkfd_process_info *process_info = mem->process_info;
1268 unsigned long bo_size = mem->bo->tbo.base.size;
1269 struct kfd_bo_va_list *entry, *tmp;
1270 struct bo_vm_reservation_context ctx;
1271 struct ttm_validate_buffer *bo_list_entry;
1272 unsigned int mapped_to_gpu_memory;
1273 int ret;
1274 bool is_imported = false;
1275
1276 mutex_lock(&mem->lock);
1277 mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
1278 is_imported = mem->is_imported;
1279 mutex_unlock(&mem->lock);
1280
1281
1282
1283
1284 if (mapped_to_gpu_memory > 0) {
1285 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1286 mem->va, bo_size);
1287 return -EBUSY;
1288 }
1289
1290
1291 bo_list_entry = &mem->validate_list;
1292 mutex_lock(&process_info->lock);
1293 list_del(&bo_list_entry->head);
1294 mutex_unlock(&process_info->lock);
1295
1296
1297 amdgpu_mn_unregister(mem->bo);
1298
1299 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1300 if (unlikely(ret))
1301 return ret;
1302
1303
1304
1305
1306
1307 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1308 process_info->eviction_fence);
1309 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1310 mem->va + bo_size * (1 + mem->aql_queue));
1311
1312
1313 list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
1314 remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
1315 entry, bo_size);
1316
1317 ret = unreserve_bo_and_vms(&ctx, false, false);
1318
1319
1320 amdgpu_sync_free(&mem->sync);
1321
1322
1323
1324
1325 if (mem->bo->tbo.sg) {
1326 sg_free_table(mem->bo->tbo.sg);
1327 kfree(mem->bo->tbo.sg);
1328 }
1329
1330
1331
1332
1333 if (size) {
1334 if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
1335 (!is_imported))
1336 *size = bo_size;
1337 else
1338 *size = 0;
1339 }
1340
1341
1342 drm_gem_object_put(&mem->bo->tbo.base);
1343 mutex_destroy(&mem->lock);
1344 kfree(mem);
1345
1346 return ret;
1347}
1348
1349int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1350 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1351{
1352 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1353 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1354 int ret;
1355 struct amdgpu_bo *bo;
1356 uint32_t domain;
1357 struct kfd_bo_va_list *entry;
1358 struct bo_vm_reservation_context ctx;
1359 struct kfd_bo_va_list *bo_va_entry = NULL;
1360 struct kfd_bo_va_list *bo_va_entry_aql = NULL;
1361 unsigned long bo_size;
1362 bool is_invalid_userptr = false;
1363
1364 bo = mem->bo;
1365 if (!bo) {
1366 pr_err("Invalid BO when mapping memory to GPU\n");
1367 return -EINVAL;
1368 }
1369
1370
1371
1372
1373
1374 mutex_lock(&mem->process_info->lock);
1375
1376
1377
1378
1379
1380 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1381 mmap_write_lock(current->mm);
1382 is_invalid_userptr = atomic_read(&mem->invalid);
1383 mmap_write_unlock(current->mm);
1384 }
1385
1386 mutex_lock(&mem->lock);
1387
1388 domain = mem->domain;
1389 bo_size = bo->tbo.base.size;
1390
1391 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
1392 mem->va,
1393 mem->va + bo_size * (1 + mem->aql_queue),
1394 vm, domain_string(domain));
1395
1396 ret = reserve_bo_and_vm(mem, vm, &ctx);
1397 if (unlikely(ret))
1398 goto out;
1399
1400
1401
1402
1403
1404
1405 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1406 bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
1407 is_invalid_userptr = true;
1408
1409 if (check_if_add_bo_to_vm(avm, mem)) {
1410 ret = add_bo_to_vm(adev, mem, avm, false,
1411 &bo_va_entry);
1412 if (ret)
1413 goto add_bo_to_vm_failed;
1414 if (mem->aql_queue) {
1415 ret = add_bo_to_vm(adev, mem, avm,
1416 true, &bo_va_entry_aql);
1417 if (ret)
1418 goto add_bo_to_vm_failed_aql;
1419 }
1420 } else {
1421 ret = vm_validate_pt_pd_bos(avm);
1422 if (unlikely(ret))
1423 goto add_bo_to_vm_failed;
1424 }
1425
1426 if (mem->mapped_to_gpu_memory == 0 &&
1427 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1428
1429
1430
1431
1432 ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
1433 if (ret) {
1434 pr_debug("Validate failed\n");
1435 goto map_bo_to_gpuvm_failed;
1436 }
1437 }
1438
1439 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1440 if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
1441 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
1442 entry->va, entry->va + bo_size,
1443 entry);
1444
1445 ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
1446 is_invalid_userptr);
1447 if (ret) {
1448 pr_err("Failed to map bo to gpuvm\n");
1449 goto map_bo_to_gpuvm_failed;
1450 }
1451
1452 ret = vm_update_pds(vm, ctx.sync);
1453 if (ret) {
1454 pr_err("Failed to update page directories\n");
1455 goto map_bo_to_gpuvm_failed;
1456 }
1457
1458 entry->is_mapped = true;
1459 mem->mapped_to_gpu_memory++;
1460 pr_debug("\t INC mapping count %d\n",
1461 mem->mapped_to_gpu_memory);
1462 }
1463 }
1464
1465 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
1466 amdgpu_bo_fence(bo,
1467 &avm->process_info->eviction_fence->base,
1468 true);
1469 ret = unreserve_bo_and_vms(&ctx, false, false);
1470
1471 goto out;
1472
1473map_bo_to_gpuvm_failed:
1474 if (bo_va_entry_aql)
1475 remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
1476add_bo_to_vm_failed_aql:
1477 if (bo_va_entry)
1478 remove_bo_from_vm(adev, bo_va_entry, bo_size);
1479add_bo_to_vm_failed:
1480 unreserve_bo_and_vms(&ctx, false, false);
1481out:
1482 mutex_unlock(&mem->process_info->lock);
1483 mutex_unlock(&mem->lock);
1484 return ret;
1485}
1486
1487int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1488 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1489{
1490 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1491 struct amdkfd_process_info *process_info =
1492 ((struct amdgpu_vm *)vm)->process_info;
1493 unsigned long bo_size = mem->bo->tbo.base.size;
1494 struct kfd_bo_va_list *entry;
1495 struct bo_vm_reservation_context ctx;
1496 int ret;
1497
1498 mutex_lock(&mem->lock);
1499
1500 ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
1501 if (unlikely(ret))
1502 goto out;
1503
1504 if (ctx.n_vms == 0) {
1505 ret = -EINVAL;
1506 goto unreserve_out;
1507 }
1508
1509 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
1510 if (unlikely(ret))
1511 goto unreserve_out;
1512
1513 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
1514 mem->va,
1515 mem->va + bo_size * (1 + mem->aql_queue),
1516 vm);
1517
1518 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1519 if (entry->bo_va->base.vm == vm && entry->is_mapped) {
1520 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
1521 entry->va,
1522 entry->va + bo_size,
1523 entry);
1524
1525 ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
1526 if (ret == 0) {
1527 entry->is_mapped = false;
1528 } else {
1529 pr_err("failed to unmap VA 0x%llx\n",
1530 mem->va);
1531 goto unreserve_out;
1532 }
1533
1534 mem->mapped_to_gpu_memory--;
1535 pr_debug("\t DEC mapping count %d\n",
1536 mem->mapped_to_gpu_memory);
1537 }
1538 }
1539
1540
1541
1542
1543 if (mem->mapped_to_gpu_memory == 0 &&
1544 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
1545 !mem->bo->tbo.pin_count)
1546 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1547 process_info->eviction_fence);
1548
1549unreserve_out:
1550 unreserve_bo_and_vms(&ctx, false, false);
1551out:
1552 mutex_unlock(&mem->lock);
1553 return ret;
1554}
1555
1556int amdgpu_amdkfd_gpuvm_sync_memory(
1557 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
1558{
1559 struct amdgpu_sync sync;
1560 int ret;
1561
1562 amdgpu_sync_create(&sync);
1563
1564 mutex_lock(&mem->lock);
1565 amdgpu_sync_clone(&mem->sync, &sync);
1566 mutex_unlock(&mem->lock);
1567
1568 ret = amdgpu_sync_wait(&sync, intr);
1569 amdgpu_sync_free(&sync);
1570 return ret;
1571}
1572
1573int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
1574 struct kgd_mem *mem, void **kptr, uint64_t *size)
1575{
1576 int ret;
1577 struct amdgpu_bo *bo = mem->bo;
1578
1579 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1580 pr_err("userptr can't be mapped to kernel\n");
1581 return -EINVAL;
1582 }
1583
1584
1585
1586
1587 mutex_lock(&mem->process_info->lock);
1588
1589 ret = amdgpu_bo_reserve(bo, true);
1590 if (ret) {
1591 pr_err("Failed to reserve bo. ret %d\n", ret);
1592 goto bo_reserve_failed;
1593 }
1594
1595 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
1596 if (ret) {
1597 pr_err("Failed to pin bo. ret %d\n", ret);
1598 goto pin_failed;
1599 }
1600
1601 ret = amdgpu_bo_kmap(bo, kptr);
1602 if (ret) {
1603 pr_err("Failed to map bo to kernel. ret %d\n", ret);
1604 goto kmap_failed;
1605 }
1606
1607 amdgpu_amdkfd_remove_eviction_fence(
1608 bo, mem->process_info->eviction_fence);
1609 list_del_init(&mem->validate_list.head);
1610
1611 if (size)
1612 *size = amdgpu_bo_size(bo);
1613
1614 amdgpu_bo_unreserve(bo);
1615
1616 mutex_unlock(&mem->process_info->lock);
1617 return 0;
1618
1619kmap_failed:
1620 amdgpu_bo_unpin(bo);
1621pin_failed:
1622 amdgpu_bo_unreserve(bo);
1623bo_reserve_failed:
1624 mutex_unlock(&mem->process_info->lock);
1625
1626 return ret;
1627}
1628
1629int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
1630 struct kfd_vm_fault_info *mem)
1631{
1632 struct amdgpu_device *adev;
1633
1634 adev = (struct amdgpu_device *)kgd;
1635 if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
1636 *mem = *adev->gmc.vm_fault_info;
1637 mb();
1638 atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1639 }
1640 return 0;
1641}
1642
1643int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
1644 struct dma_buf *dma_buf,
1645 uint64_t va, void *vm,
1646 struct kgd_mem **mem, uint64_t *size,
1647 uint64_t *mmap_offset)
1648{
1649 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
1650 struct drm_gem_object *obj;
1651 struct amdgpu_bo *bo;
1652 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1653
1654 if (dma_buf->ops != &amdgpu_dmabuf_ops)
1655
1656 return -EINVAL;
1657
1658 obj = dma_buf->priv;
1659 if (drm_to_adev(obj->dev) != adev)
1660
1661 return -EINVAL;
1662
1663 bo = gem_to_amdgpu_bo(obj);
1664 if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
1665 AMDGPU_GEM_DOMAIN_GTT)))
1666
1667 return -EINVAL;
1668
1669 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1670 if (!*mem)
1671 return -ENOMEM;
1672
1673 if (size)
1674 *size = amdgpu_bo_size(bo);
1675
1676 if (mmap_offset)
1677 *mmap_offset = amdgpu_bo_mmap_offset(bo);
1678
1679 INIT_LIST_HEAD(&(*mem)->bo_va_list);
1680 mutex_init(&(*mem)->lock);
1681
1682 (*mem)->alloc_flags =
1683 ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1684 KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
1685 | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
1686 | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
1687
1688 drm_gem_object_get(&bo->tbo.base);
1689 (*mem)->bo = bo;
1690 (*mem)->va = va;
1691 (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
1692 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
1693 (*mem)->mapped_to_gpu_memory = 0;
1694 (*mem)->process_info = avm->process_info;
1695 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
1696 amdgpu_sync_create(&(*mem)->sync);
1697 (*mem)->is_imported = true;
1698
1699 return 0;
1700}
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
1714 struct mm_struct *mm)
1715{
1716 struct amdkfd_process_info *process_info = mem->process_info;
1717 int evicted_bos;
1718 int r = 0;
1719
1720 atomic_inc(&mem->invalid);
1721 evicted_bos = atomic_inc_return(&process_info->evicted_bos);
1722 if (evicted_bos == 1) {
1723
1724 r = kgd2kfd_quiesce_mm(mm);
1725 if (r)
1726 pr_err("Failed to quiesce KFD\n");
1727 schedule_delayed_work(&process_info->restore_userptr_work,
1728 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
1729 }
1730
1731 return r;
1732}
1733
1734
1735
1736
1737
1738
1739
1740static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
1741 struct mm_struct *mm)
1742{
1743 struct kgd_mem *mem, *tmp_mem;
1744 struct amdgpu_bo *bo;
1745 struct ttm_operation_ctx ctx = { false, false };
1746 int invalid, ret;
1747
1748
1749
1750
1751 list_for_each_entry_safe(mem, tmp_mem,
1752 &process_info->userptr_valid_list,
1753 validate_list.head) {
1754 if (!atomic_read(&mem->invalid))
1755 continue;
1756
1757 bo = mem->bo;
1758
1759 if (amdgpu_bo_reserve(bo, true))
1760 return -EAGAIN;
1761 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
1762 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1763 amdgpu_bo_unreserve(bo);
1764 if (ret) {
1765 pr_err("%s: Failed to invalidate userptr BO\n",
1766 __func__);
1767 return -EAGAIN;
1768 }
1769
1770 list_move_tail(&mem->validate_list.head,
1771 &process_info->userptr_inval_list);
1772 }
1773
1774 if (list_empty(&process_info->userptr_inval_list))
1775 return 0;
1776
1777
1778 list_for_each_entry(mem, &process_info->userptr_inval_list,
1779 validate_list.head) {
1780 invalid = atomic_read(&mem->invalid);
1781 if (!invalid)
1782
1783
1784
1785 continue;
1786
1787 bo = mem->bo;
1788
1789
1790 ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
1791 if (ret) {
1792 pr_debug("%s: Failed to get user pages: %d\n",
1793 __func__, ret);
1794
1795
1796 return ret;
1797 }
1798
1799
1800
1801
1802
1803 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1804
1805
1806
1807
1808 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
1809 return -EAGAIN;
1810 }
1811
1812 return 0;
1813}
1814
1815
1816
1817
1818
1819
1820
1821static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1822{
1823 struct amdgpu_bo_list_entry *pd_bo_list_entries;
1824 struct list_head resv_list, duplicates;
1825 struct ww_acquire_ctx ticket;
1826 struct amdgpu_sync sync;
1827
1828 struct amdgpu_vm *peer_vm;
1829 struct kgd_mem *mem, *tmp_mem;
1830 struct amdgpu_bo *bo;
1831 struct ttm_operation_ctx ctx = { false, false };
1832 int i, ret;
1833
1834 pd_bo_list_entries = kcalloc(process_info->n_vms,
1835 sizeof(struct amdgpu_bo_list_entry),
1836 GFP_KERNEL);
1837 if (!pd_bo_list_entries) {
1838 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1839 ret = -ENOMEM;
1840 goto out_no_mem;
1841 }
1842
1843 INIT_LIST_HEAD(&resv_list);
1844 INIT_LIST_HEAD(&duplicates);
1845
1846
1847 i = 0;
1848 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1849 vm_list_node)
1850 amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
1851 &pd_bo_list_entries[i++]);
1852
1853 list_for_each_entry(mem, &process_info->userptr_inval_list,
1854 validate_list.head) {
1855 list_add_tail(&mem->resv_list.head, &resv_list);
1856 mem->resv_list.bo = mem->validate_list.bo;
1857 mem->resv_list.num_shared = mem->validate_list.num_shared;
1858 }
1859
1860
1861 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
1862 WARN(!list_empty(&duplicates), "Duplicates should be empty");
1863 if (ret)
1864 goto out_free;
1865
1866 amdgpu_sync_create(&sync);
1867
1868 ret = process_validate_vms(process_info);
1869 if (ret)
1870 goto unreserve_out;
1871
1872
1873 list_for_each_entry_safe(mem, tmp_mem,
1874 &process_info->userptr_inval_list,
1875 validate_list.head) {
1876 struct kfd_bo_va_list *bo_va_entry;
1877
1878 bo = mem->bo;
1879
1880
1881 if (bo->tbo.ttm->pages[0]) {
1882 amdgpu_bo_placement_from_domain(bo, mem->domain);
1883 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1884 if (ret) {
1885 pr_err("%s: failed to validate BO\n", __func__);
1886 goto unreserve_out;
1887 }
1888 }
1889
1890 list_move_tail(&mem->validate_list.head,
1891 &process_info->userptr_valid_list);
1892
1893
1894
1895
1896
1897
1898
1899 list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
1900 if (!bo_va_entry->is_mapped)
1901 continue;
1902
1903 ret = update_gpuvm_pte((struct amdgpu_device *)
1904 bo_va_entry->kgd_dev,
1905 bo_va_entry, &sync);
1906 if (ret) {
1907 pr_err("%s: update PTE failed\n", __func__);
1908
1909 atomic_inc(&mem->invalid);
1910 goto unreserve_out;
1911 }
1912 }
1913 }
1914
1915
1916 ret = process_update_pds(process_info, &sync);
1917
1918unreserve_out:
1919 ttm_eu_backoff_reservation(&ticket, &resv_list);
1920 amdgpu_sync_wait(&sync, false);
1921 amdgpu_sync_free(&sync);
1922out_free:
1923 kfree(pd_bo_list_entries);
1924out_no_mem:
1925
1926 return ret;
1927}
1928
1929
1930
1931
1932
1933
1934
1935static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
1936{
1937 struct delayed_work *dwork = to_delayed_work(work);
1938 struct amdkfd_process_info *process_info =
1939 container_of(dwork, struct amdkfd_process_info,
1940 restore_userptr_work);
1941 struct task_struct *usertask;
1942 struct mm_struct *mm;
1943 int evicted_bos;
1944
1945 evicted_bos = atomic_read(&process_info->evicted_bos);
1946 if (!evicted_bos)
1947 return;
1948
1949
1950 usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
1951 if (!usertask)
1952 return;
1953 mm = get_task_mm(usertask);
1954 if (!mm) {
1955 put_task_struct(usertask);
1956 return;
1957 }
1958
1959 mutex_lock(&process_info->lock);
1960
1961 if (update_invalid_user_pages(process_info, mm))
1962 goto unlock_out;
1963
1964
1965
1966
1967 if (!list_empty(&process_info->userptr_inval_list)) {
1968 if (atomic_read(&process_info->evicted_bos) != evicted_bos)
1969 goto unlock_out;
1970
1971 if (validate_invalid_user_pages(process_info))
1972 goto unlock_out;
1973 }
1974
1975
1976
1977
1978
1979 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
1980 evicted_bos)
1981 goto unlock_out;
1982 evicted_bos = 0;
1983 if (kgd2kfd_resume_mm(mm)) {
1984 pr_err("%s: Failed to resume KFD\n", __func__);
1985
1986
1987
1988 }
1989
1990unlock_out:
1991 mutex_unlock(&process_info->lock);
1992 mmput(mm);
1993 put_task_struct(usertask);
1994
1995
1996 if (evicted_bos)
1997 schedule_delayed_work(&process_info->restore_userptr_work,
1998 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
1999}
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
2020{
2021 struct amdgpu_bo_list_entry *pd_bo_list;
2022 struct amdkfd_process_info *process_info = info;
2023 struct amdgpu_vm *peer_vm;
2024 struct kgd_mem *mem;
2025 struct bo_vm_reservation_context ctx;
2026 struct amdgpu_amdkfd_fence *new_fence;
2027 int ret = 0, i;
2028 struct list_head duplicate_save;
2029 struct amdgpu_sync sync_obj;
2030 unsigned long failed_size = 0;
2031 unsigned long total_size = 0;
2032
2033 INIT_LIST_HEAD(&duplicate_save);
2034 INIT_LIST_HEAD(&ctx.list);
2035 INIT_LIST_HEAD(&ctx.duplicates);
2036
2037 pd_bo_list = kcalloc(process_info->n_vms,
2038 sizeof(struct amdgpu_bo_list_entry),
2039 GFP_KERNEL);
2040 if (!pd_bo_list)
2041 return -ENOMEM;
2042
2043 i = 0;
2044 mutex_lock(&process_info->lock);
2045 list_for_each_entry(peer_vm, &process_info->vm_list_head,
2046 vm_list_node)
2047 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
2048
2049
2050
2051
2052 list_for_each_entry(mem, &process_info->kfd_bo_list,
2053 validate_list.head) {
2054
2055 list_add_tail(&mem->resv_list.head, &ctx.list);
2056 mem->resv_list.bo = mem->validate_list.bo;
2057 mem->resv_list.num_shared = mem->validate_list.num_shared;
2058 }
2059
2060 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
2061 false, &duplicate_save);
2062 if (ret) {
2063 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
2064 goto ttm_reserve_fail;
2065 }
2066
2067 amdgpu_sync_create(&sync_obj);
2068
2069
2070 ret = process_validate_vms(process_info);
2071 if (ret)
2072 goto validate_map_fail;
2073
2074 ret = process_sync_pds_resv(process_info, &sync_obj);
2075 if (ret) {
2076 pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
2077 goto validate_map_fail;
2078 }
2079
2080
2081 list_for_each_entry(mem, &process_info->kfd_bo_list,
2082 validate_list.head) {
2083
2084 struct amdgpu_bo *bo = mem->bo;
2085 uint32_t domain = mem->domain;
2086 struct kfd_bo_va_list *bo_va_entry;
2087
2088 total_size += amdgpu_bo_size(bo);
2089
2090 ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
2091 if (ret) {
2092 pr_debug("Memory eviction: Validate BOs failed\n");
2093 failed_size += amdgpu_bo_size(bo);
2094 ret = amdgpu_amdkfd_bo_validate(bo,
2095 AMDGPU_GEM_DOMAIN_GTT, false);
2096 if (ret) {
2097 pr_debug("Memory eviction: Try again\n");
2098 goto validate_map_fail;
2099 }
2100 }
2101 ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
2102 if (ret) {
2103 pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
2104 goto validate_map_fail;
2105 }
2106 list_for_each_entry(bo_va_entry, &mem->bo_va_list,
2107 bo_list) {
2108 ret = update_gpuvm_pte((struct amdgpu_device *)
2109 bo_va_entry->kgd_dev,
2110 bo_va_entry,
2111 &sync_obj);
2112 if (ret) {
2113 pr_debug("Memory eviction: update PTE failed. Try again\n");
2114 goto validate_map_fail;
2115 }
2116 }
2117 }
2118
2119 if (failed_size)
2120 pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
2121
2122
2123 ret = process_update_pds(process_info, &sync_obj);
2124 if (ret) {
2125 pr_debug("Memory eviction: update PDs failed. Try again\n");
2126 goto validate_map_fail;
2127 }
2128
2129
2130 amdgpu_sync_wait(&sync_obj, false);
2131
2132
2133
2134
2135
2136 new_fence = amdgpu_amdkfd_fence_create(
2137 process_info->eviction_fence->base.context,
2138 process_info->eviction_fence->mm);
2139 if (!new_fence) {
2140 pr_err("Failed to create eviction fence\n");
2141 ret = -ENOMEM;
2142 goto validate_map_fail;
2143 }
2144 dma_fence_put(&process_info->eviction_fence->base);
2145 process_info->eviction_fence = new_fence;
2146 *ef = dma_fence_get(&new_fence->base);
2147
2148
2149 list_for_each_entry(mem, &process_info->kfd_bo_list,
2150 validate_list.head)
2151 amdgpu_bo_fence(mem->bo,
2152 &process_info->eviction_fence->base, true);
2153
2154
2155 list_for_each_entry(peer_vm, &process_info->vm_list_head,
2156 vm_list_node) {
2157 struct amdgpu_bo *bo = peer_vm->root.base.bo;
2158
2159 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
2160 }
2161
2162validate_map_fail:
2163 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
2164 amdgpu_sync_free(&sync_obj);
2165ttm_reserve_fail:
2166 mutex_unlock(&process_info->lock);
2167 kfree(pd_bo_list);
2168 return ret;
2169}
2170
2171int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2172{
2173 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2174 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2175 int ret;
2176
2177 if (!info || !gws)
2178 return -EINVAL;
2179
2180 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2181 if (!*mem)
2182 return -ENOMEM;
2183
2184 mutex_init(&(*mem)->lock);
2185 INIT_LIST_HEAD(&(*mem)->bo_va_list);
2186 (*mem)->bo = amdgpu_bo_ref(gws_bo);
2187 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2188 (*mem)->process_info = process_info;
2189 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2190 amdgpu_sync_create(&(*mem)->sync);
2191
2192
2193
2194 mutex_lock(&(*mem)->process_info->lock);
2195 ret = amdgpu_bo_reserve(gws_bo, false);
2196 if (unlikely(ret)) {
2197 pr_err("Reserve gws bo failed %d\n", ret);
2198 goto bo_reservation_failure;
2199 }
2200
2201 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2202 if (ret) {
2203 pr_err("GWS BO validate failed %d\n", ret);
2204 goto bo_validation_failure;
2205 }
2206
2207
2208
2209
2210 ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
2211 if (ret)
2212 goto reserve_shared_fail;
2213 amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
2214 amdgpu_bo_unreserve(gws_bo);
2215 mutex_unlock(&(*mem)->process_info->lock);
2216
2217 return ret;
2218
2219reserve_shared_fail:
2220bo_validation_failure:
2221 amdgpu_bo_unreserve(gws_bo);
2222bo_reservation_failure:
2223 mutex_unlock(&(*mem)->process_info->lock);
2224 amdgpu_sync_free(&(*mem)->sync);
2225 remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2226 amdgpu_bo_unref(&gws_bo);
2227 mutex_destroy(&(*mem)->lock);
2228 kfree(*mem);
2229 *mem = NULL;
2230 return ret;
2231}
2232
2233int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2234{
2235 int ret;
2236 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2237 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2238 struct amdgpu_bo *gws_bo = kgd_mem->bo;
2239
2240
2241
2242
2243 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2244
2245 ret = amdgpu_bo_reserve(gws_bo, false);
2246 if (unlikely(ret)) {
2247 pr_err("Reserve gws bo failed %d\n", ret);
2248
2249 return ret;
2250 }
2251 amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2252 process_info->eviction_fence);
2253 amdgpu_bo_unreserve(gws_bo);
2254 amdgpu_sync_free(&kgd_mem->sync);
2255 amdgpu_bo_unref(&gws_bo);
2256 mutex_destroy(&kgd_mem->lock);
2257 kfree(mem);
2258 return 0;
2259}
2260
2261
2262int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
2263 struct tile_config *config)
2264{
2265 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
2266
2267 config->gb_addr_config = adev->gfx.config.gb_addr_config;
2268 config->tile_config_ptr = adev->gfx.config.tile_mode_array;
2269 config->num_tile_configs =
2270 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2271 config->macro_tile_config_ptr =
2272 adev->gfx.config.macrotile_mode_array;
2273 config->num_macro_tile_configs =
2274 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2275
2276
2277 config->num_banks = adev->gfx.config.num_banks;
2278 config->num_ranks = adev->gfx.config.num_ranks;
2279
2280 return 0;
2281}
2282