1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/types.h>
25#include <linux/sched/task.h>
26#include "amdgpu_sync.h"
27#include "amdgpu_object.h"
28#include "amdgpu_vm.h"
29#include "amdgpu_mn.h"
30#include "amdgpu.h"
31#include "amdgpu_xgmi.h"
32#include "kfd_priv.h"
33#include "kfd_svm.h"
34#include "kfd_migrate.h"
35
36#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
37
38
39
40
41#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000
42
43static void svm_range_evict_svm_bo_worker(struct work_struct *work);
44static bool
45svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
46 const struct mmu_notifier_range *range,
47 unsigned long cur_seq);
48
49static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
50 .invalidate = svm_range_cpu_invalidate_pagetables,
51};
52
53
54
55
56
57
58
59
60
61
62static void svm_range_unlink(struct svm_range *prange)
63{
64 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
65 prange, prange->start, prange->last);
66
67 if (prange->svm_bo) {
68 spin_lock(&prange->svm_bo->list_lock);
69 list_del(&prange->svm_bo_list);
70 spin_unlock(&prange->svm_bo->list_lock);
71 }
72
73 list_del(&prange->list);
74 if (prange->it_node.start != 0 && prange->it_node.last != 0)
75 interval_tree_remove(&prange->it_node, &prange->svms->objects);
76}
77
78static void
79svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
80{
81 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
82 prange, prange->start, prange->last);
83
84 mmu_interval_notifier_insert_locked(&prange->notifier, mm,
85 prange->start << PAGE_SHIFT,
86 prange->npages << PAGE_SHIFT,
87 &svm_range_mn_ops);
88}
89
90
91
92
93
94
95
96
97
98static void svm_range_add_to_svms(struct svm_range *prange)
99{
100 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
101 prange, prange->start, prange->last);
102
103 list_add_tail(&prange->list, &prange->svms->list);
104 prange->it_node.start = prange->start;
105 prange->it_node.last = prange->last;
106 interval_tree_insert(&prange->it_node, &prange->svms->objects);
107}
108
109static void svm_range_remove_notifier(struct svm_range *prange)
110{
111 pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
112 prange->svms, prange,
113 prange->notifier.interval_tree.start >> PAGE_SHIFT,
114 prange->notifier.interval_tree.last >> PAGE_SHIFT);
115
116 if (prange->notifier.interval_tree.start != 0 &&
117 prange->notifier.interval_tree.last != 0)
118 mmu_interval_notifier_remove(&prange->notifier);
119}
120
121static bool
122svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
123{
124 return dma_addr && !dma_mapping_error(dev, dma_addr) &&
125 !(dma_addr & SVM_RANGE_VRAM_DOMAIN);
126}
127
128static int
129svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
130 unsigned long offset, unsigned long npages,
131 unsigned long *hmm_pfns, uint32_t gpuidx)
132{
133 enum dma_data_direction dir = DMA_BIDIRECTIONAL;
134 dma_addr_t *addr = prange->dma_addr[gpuidx];
135 struct device *dev = adev->dev;
136 struct page *page;
137 int i, r;
138
139 if (!addr) {
140 addr = kvmalloc_array(prange->npages, sizeof(*addr),
141 GFP_KERNEL | __GFP_ZERO);
142 if (!addr)
143 return -ENOMEM;
144 prange->dma_addr[gpuidx] = addr;
145 }
146
147 addr += offset;
148 for (i = 0; i < npages; i++) {
149 if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
150 dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
151
152 page = hmm_pfn_to_page(hmm_pfns[i]);
153 if (is_zone_device_page(page)) {
154 struct amdgpu_device *bo_adev =
155 amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
156
157 addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
158 bo_adev->vm_manager.vram_base_offset -
159 bo_adev->kfd.dev->pgmap.range.start;
160 addr[i] |= SVM_RANGE_VRAM_DOMAIN;
161 pr_debug("vram address detected: 0x%llx\n", addr[i]);
162 continue;
163 }
164 addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
165 r = dma_mapping_error(dev, addr[i]);
166 if (r) {
167 pr_debug("failed %d dma_map_page\n", r);
168 return r;
169 }
170 pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
171 addr[i] >> PAGE_SHIFT, page_to_pfn(page));
172 }
173 return 0;
174}
175
176static int
177svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
178 unsigned long offset, unsigned long npages,
179 unsigned long *hmm_pfns)
180{
181 struct kfd_process *p;
182 uint32_t gpuidx;
183 int r;
184
185 p = container_of(prange->svms, struct kfd_process, svms);
186
187 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
188 struct kfd_process_device *pdd;
189 struct amdgpu_device *adev;
190
191 pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
192 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
193 if (!pdd) {
194 pr_debug("failed to find device idx %d\n", gpuidx);
195 return -EINVAL;
196 }
197 adev = (struct amdgpu_device *)pdd->dev->kgd;
198
199 r = svm_range_dma_map_dev(adev, prange, offset, npages,
200 hmm_pfns, gpuidx);
201 if (r)
202 break;
203 }
204
205 return r;
206}
207
208void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
209 unsigned long offset, unsigned long npages)
210{
211 enum dma_data_direction dir = DMA_BIDIRECTIONAL;
212 int i;
213
214 if (!dma_addr)
215 return;
216
217 for (i = offset; i < offset + npages; i++) {
218 if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
219 continue;
220 pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
221 dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
222 dma_addr[i] = 0;
223 }
224}
225
226void svm_range_free_dma_mappings(struct svm_range *prange)
227{
228 struct kfd_process_device *pdd;
229 dma_addr_t *dma_addr;
230 struct device *dev;
231 struct kfd_process *p;
232 uint32_t gpuidx;
233
234 p = container_of(prange->svms, struct kfd_process, svms);
235
236 for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
237 dma_addr = prange->dma_addr[gpuidx];
238 if (!dma_addr)
239 continue;
240
241 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
242 if (!pdd) {
243 pr_debug("failed to find device idx %d\n", gpuidx);
244 continue;
245 }
246 dev = &pdd->dev->pdev->dev;
247 svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
248 kvfree(dma_addr);
249 prange->dma_addr[gpuidx] = NULL;
250 }
251}
252
253static void svm_range_free(struct svm_range *prange)
254{
255 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
256 prange->start, prange->last);
257
258 svm_range_vram_node_free(prange);
259 svm_range_free_dma_mappings(prange);
260 mutex_destroy(&prange->lock);
261 mutex_destroy(&prange->migrate_mutex);
262 kfree(prange);
263}
264
265static void
266svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
267 uint8_t *granularity, uint32_t *flags)
268{
269 *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
270 *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
271 *granularity = 9;
272 *flags =
273 KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
274}
275
276static struct
277svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
278 uint64_t last)
279{
280 uint64_t size = last - start + 1;
281 struct svm_range *prange;
282 struct kfd_process *p;
283
284 prange = kzalloc(sizeof(*prange), GFP_KERNEL);
285 if (!prange)
286 return NULL;
287 prange->npages = size;
288 prange->svms = svms;
289 prange->start = start;
290 prange->last = last;
291 INIT_LIST_HEAD(&prange->list);
292 INIT_LIST_HEAD(&prange->update_list);
293 INIT_LIST_HEAD(&prange->remove_list);
294 INIT_LIST_HEAD(&prange->insert_list);
295 INIT_LIST_HEAD(&prange->svm_bo_list);
296 INIT_LIST_HEAD(&prange->deferred_list);
297 INIT_LIST_HEAD(&prange->child_list);
298 atomic_set(&prange->invalid, 0);
299 prange->validate_timestamp = 0;
300 mutex_init(&prange->migrate_mutex);
301 mutex_init(&prange->lock);
302
303 p = container_of(svms, struct kfd_process, svms);
304 if (p->xnack_enabled)
305 bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
306 MAX_GPU_INSTANCE);
307
308 svm_range_set_default_attributes(&prange->preferred_loc,
309 &prange->prefetch_loc,
310 &prange->granularity, &prange->flags);
311
312 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
313
314 return prange;
315}
316
317static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
318{
319 if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref))
320 return false;
321
322 return true;
323}
324
325static void svm_range_bo_release(struct kref *kref)
326{
327 struct svm_range_bo *svm_bo;
328
329 svm_bo = container_of(kref, struct svm_range_bo, kref);
330 spin_lock(&svm_bo->list_lock);
331 while (!list_empty(&svm_bo->range_list)) {
332 struct svm_range *prange =
333 list_first_entry(&svm_bo->range_list,
334 struct svm_range, svm_bo_list);
335
336
337
338 list_del_init(&prange->svm_bo_list);
339 spin_unlock(&svm_bo->list_lock);
340
341 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
342 prange->start, prange->last);
343 mutex_lock(&prange->lock);
344 prange->svm_bo = NULL;
345 mutex_unlock(&prange->lock);
346
347 spin_lock(&svm_bo->list_lock);
348 }
349 spin_unlock(&svm_bo->list_lock);
350 if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
351
352
353
354
355 dma_fence_signal(&svm_bo->eviction_fence->base);
356 cancel_work_sync(&svm_bo->eviction_work);
357 }
358 dma_fence_put(&svm_bo->eviction_fence->base);
359 amdgpu_bo_unref(&svm_bo->bo);
360 kfree(svm_bo);
361}
362
363void svm_range_bo_unref(struct svm_range_bo *svm_bo)
364{
365 if (!svm_bo)
366 return;
367
368 kref_put(&svm_bo->kref, svm_range_bo_release);
369}
370
371static bool
372svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
373{
374 struct amdgpu_device *bo_adev;
375
376 mutex_lock(&prange->lock);
377 if (!prange->svm_bo) {
378 mutex_unlock(&prange->lock);
379 return false;
380 }
381 if (prange->ttm_res) {
382
383 mutex_unlock(&prange->lock);
384 return true;
385 }
386 if (svm_bo_ref_unless_zero(prange->svm_bo)) {
387
388
389
390
391
392 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
393 if (bo_adev != adev) {
394 mutex_unlock(&prange->lock);
395
396 spin_lock(&prange->svm_bo->list_lock);
397 list_del_init(&prange->svm_bo_list);
398 spin_unlock(&prange->svm_bo->list_lock);
399
400 svm_range_bo_unref(prange->svm_bo);
401 return false;
402 }
403 if (READ_ONCE(prange->svm_bo->evicting)) {
404 struct dma_fence *f;
405 struct svm_range_bo *svm_bo;
406
407
408
409 mutex_unlock(&prange->lock);
410 svm_bo = prange->svm_bo;
411 f = dma_fence_get(&svm_bo->eviction_fence->base);
412 svm_range_bo_unref(prange->svm_bo);
413
414
415
416 dma_fence_wait(f, false);
417 dma_fence_put(f);
418 } else {
419
420
421
422 mutex_unlock(&prange->lock);
423 pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
424 prange->svms, prange->start, prange->last);
425
426 prange->ttm_res = prange->svm_bo->bo->tbo.resource;
427 return true;
428 }
429
430 } else {
431 mutex_unlock(&prange->lock);
432 }
433
434
435
436
437
438
439 while (!list_empty_careful(&prange->svm_bo_list))
440 ;
441
442 return false;
443}
444
445static struct svm_range_bo *svm_range_bo_new(void)
446{
447 struct svm_range_bo *svm_bo;
448
449 svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
450 if (!svm_bo)
451 return NULL;
452
453 kref_init(&svm_bo->kref);
454 INIT_LIST_HEAD(&svm_bo->range_list);
455 spin_lock_init(&svm_bo->list_lock);
456
457 return svm_bo;
458}
459
460int
461svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
462 bool clear)
463{
464 struct amdgpu_bo_param bp;
465 struct svm_range_bo *svm_bo;
466 struct amdgpu_bo_user *ubo;
467 struct amdgpu_bo *bo;
468 struct kfd_process *p;
469 struct mm_struct *mm;
470 int r;
471
472 p = container_of(prange->svms, struct kfd_process, svms);
473 pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
474 prange->start, prange->last);
475
476 if (svm_range_validate_svm_bo(adev, prange))
477 return 0;
478
479 svm_bo = svm_range_bo_new();
480 if (!svm_bo) {
481 pr_debug("failed to alloc svm bo\n");
482 return -ENOMEM;
483 }
484 mm = get_task_mm(p->lead_thread);
485 if (!mm) {
486 pr_debug("failed to get mm\n");
487 kfree(svm_bo);
488 return -ESRCH;
489 }
490 svm_bo->svms = prange->svms;
491 svm_bo->eviction_fence =
492 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
493 mm,
494 svm_bo);
495 mmput(mm);
496 INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
497 svm_bo->evicting = 0;
498 memset(&bp, 0, sizeof(bp));
499 bp.size = prange->npages * PAGE_SIZE;
500 bp.byte_align = PAGE_SIZE;
501 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
502 bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
503 bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
504 bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
505 bp.type = ttm_bo_type_device;
506 bp.resv = NULL;
507
508 r = amdgpu_bo_create_user(adev, &bp, &ubo);
509 if (r) {
510 pr_debug("failed %d to create bo\n", r);
511 goto create_bo_failed;
512 }
513 bo = &ubo->bo;
514 r = amdgpu_bo_reserve(bo, true);
515 if (r) {
516 pr_debug("failed %d to reserve bo\n", r);
517 goto reserve_bo_failed;
518 }
519
520 r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
521 if (r) {
522 pr_debug("failed %d to reserve bo\n", r);
523 amdgpu_bo_unreserve(bo);
524 goto reserve_bo_failed;
525 }
526 amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);
527
528 amdgpu_bo_unreserve(bo);
529
530 svm_bo->bo = bo;
531 prange->svm_bo = svm_bo;
532 prange->ttm_res = bo->tbo.resource;
533 prange->offset = 0;
534
535 spin_lock(&svm_bo->list_lock);
536 list_add(&prange->svm_bo_list, &svm_bo->range_list);
537 spin_unlock(&svm_bo->list_lock);
538
539 return 0;
540
541reserve_bo_failed:
542 amdgpu_bo_unref(&bo);
543create_bo_failed:
544 dma_fence_put(&svm_bo->eviction_fence->base);
545 kfree(svm_bo);
546 prange->ttm_res = NULL;
547
548 return r;
549}
550
551void svm_range_vram_node_free(struct svm_range *prange)
552{
553 svm_range_bo_unref(prange->svm_bo);
554 prange->ttm_res = NULL;
555}
556
557struct amdgpu_device *
558svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
559{
560 struct kfd_process_device *pdd;
561 struct kfd_process *p;
562 int32_t gpu_idx;
563
564 p = container_of(prange->svms, struct kfd_process, svms);
565
566 gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
567 if (gpu_idx < 0) {
568 pr_debug("failed to get device by id 0x%x\n", gpu_id);
569 return NULL;
570 }
571 pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
572 if (!pdd) {
573 pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
574 return NULL;
575 }
576
577 return (struct amdgpu_device *)pdd->dev->kgd;
578}
579
580struct kfd_process_device *
581svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
582{
583 struct kfd_process *p;
584 int32_t gpu_idx, gpuid;
585 int r;
586
587 p = container_of(prange->svms, struct kfd_process, svms);
588
589 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
590 if (r) {
591 pr_debug("failed to get device id by adev %p\n", adev);
592 return NULL;
593 }
594
595 return kfd_process_device_from_gpuidx(p, gpu_idx);
596}
597
598static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
599{
600 struct ttm_operation_ctx ctx = { false, false };
601
602 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
603
604 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
605}
606
607static int
608svm_range_check_attr(struct kfd_process *p,
609 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
610{
611 uint32_t i;
612
613 for (i = 0; i < nattr; i++) {
614 uint32_t val = attrs[i].value;
615 int gpuidx = MAX_GPU_INSTANCE;
616
617 switch (attrs[i].type) {
618 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
619 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
620 val != KFD_IOCTL_SVM_LOCATION_UNDEFINED)
621 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
622 break;
623 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
624 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM)
625 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
626 break;
627 case KFD_IOCTL_SVM_ATTR_ACCESS:
628 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
629 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
630 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
631 break;
632 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
633 break;
634 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
635 break;
636 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
637 break;
638 default:
639 pr_debug("unknown attr type 0x%x\n", attrs[i].type);
640 return -EINVAL;
641 }
642
643 if (gpuidx < 0) {
644 pr_debug("no GPU 0x%x found\n", val);
645 return -EINVAL;
646 } else if (gpuidx < MAX_GPU_INSTANCE &&
647 !test_bit(gpuidx, p->svms.bitmap_supported)) {
648 pr_debug("GPU 0x%x not supported\n", val);
649 return -EINVAL;
650 }
651 }
652
653 return 0;
654}
655
656static void
657svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
658 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
659{
660 uint32_t i;
661 int gpuidx;
662
663 for (i = 0; i < nattr; i++) {
664 switch (attrs[i].type) {
665 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
666 prange->preferred_loc = attrs[i].value;
667 break;
668 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
669 prange->prefetch_loc = attrs[i].value;
670 break;
671 case KFD_IOCTL_SVM_ATTR_ACCESS:
672 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
673 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
674 gpuidx = kfd_process_gpuidx_from_gpuid(p,
675 attrs[i].value);
676 if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
677 bitmap_clear(prange->bitmap_access, gpuidx, 1);
678 bitmap_clear(prange->bitmap_aip, gpuidx, 1);
679 } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
680 bitmap_set(prange->bitmap_access, gpuidx, 1);
681 bitmap_clear(prange->bitmap_aip, gpuidx, 1);
682 } else {
683 bitmap_clear(prange->bitmap_access, gpuidx, 1);
684 bitmap_set(prange->bitmap_aip, gpuidx, 1);
685 }
686 break;
687 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
688 prange->flags |= attrs[i].value;
689 break;
690 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
691 prange->flags &= ~attrs[i].value;
692 break;
693 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
694 prange->granularity = attrs[i].value;
695 break;
696 default:
697 WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
698 }
699 }
700}
701
702
703
704
705
706
707
708
709
710
711static void svm_range_debug_dump(struct svm_range_list *svms)
712{
713 struct interval_tree_node *node;
714 struct svm_range *prange;
715
716 pr_debug("dump svms 0x%p list\n", svms);
717 pr_debug("range\tstart\tpage\tend\t\tlocation\n");
718
719 list_for_each_entry(prange, &svms->list, list) {
720 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
721 prange, prange->start, prange->npages,
722 prange->start + prange->npages - 1,
723 prange->actual_loc);
724 }
725
726 pr_debug("dump svms 0x%p interval tree\n", svms);
727 pr_debug("range\tstart\tpage\tend\t\tlocation\n");
728 node = interval_tree_iter_first(&svms->objects, 0, ~0ULL);
729 while (node) {
730 prange = container_of(node, struct svm_range, it_node);
731 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
732 prange, prange->start, prange->npages,
733 prange->start + prange->npages - 1,
734 prange->actual_loc);
735 node = interval_tree_iter_next(node, 0, ~0ULL);
736 }
737}
738
739static bool
740svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
741{
742 return (old->prefetch_loc == new->prefetch_loc &&
743 old->flags == new->flags &&
744 old->granularity == new->granularity);
745}
746
747static int
748svm_range_split_array(void *ppnew, void *ppold, size_t size,
749 uint64_t old_start, uint64_t old_n,
750 uint64_t new_start, uint64_t new_n)
751{
752 unsigned char *new, *old, *pold;
753 uint64_t d;
754
755 if (!ppold)
756 return 0;
757 pold = *(unsigned char **)ppold;
758 if (!pold)
759 return 0;
760
761 new = kvmalloc_array(new_n, size, GFP_KERNEL);
762 if (!new)
763 return -ENOMEM;
764
765 d = (new_start - old_start) * size;
766 memcpy(new, pold + d, new_n * size);
767
768 old = kvmalloc_array(old_n, size, GFP_KERNEL);
769 if (!old) {
770 kvfree(new);
771 return -ENOMEM;
772 }
773
774 d = (new_start == old_start) ? new_n * size : 0;
775 memcpy(old, pold + d, old_n * size);
776
777 kvfree(pold);
778 *(void **)ppold = old;
779 *(void **)ppnew = new;
780
781 return 0;
782}
783
784static int
785svm_range_split_pages(struct svm_range *new, struct svm_range *old,
786 uint64_t start, uint64_t last)
787{
788 uint64_t npages = last - start + 1;
789 int i, r;
790
791 for (i = 0; i < MAX_GPU_INSTANCE; i++) {
792 r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
793 sizeof(*old->dma_addr[i]), old->start,
794 npages, new->start, new->npages);
795 if (r)
796 return r;
797 }
798
799 return 0;
800}
801
802static int
803svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
804 uint64_t start, uint64_t last)
805{
806 uint64_t npages = last - start + 1;
807
808 pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n",
809 new->svms, new, new->start, start, last);
810
811 if (new->start == old->start) {
812 new->offset = old->offset;
813 old->offset += new->npages;
814 } else {
815 new->offset = old->offset + npages;
816 }
817
818 new->svm_bo = svm_range_bo_ref(old->svm_bo);
819 new->ttm_res = old->ttm_res;
820
821 spin_lock(&new->svm_bo->list_lock);
822 list_add(&new->svm_bo_list, &new->svm_bo->range_list);
823 spin_unlock(&new->svm_bo->list_lock);
824
825 return 0;
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843static int
844svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
845 uint64_t start, uint64_t last)
846{
847 int r;
848
849 pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n",
850 new->svms, new->start, old->start, old->last, start, last);
851
852 if (new->start < old->start ||
853 new->last > old->last) {
854 WARN_ONCE(1, "invalid new range start or last\n");
855 return -EINVAL;
856 }
857
858 r = svm_range_split_pages(new, old, start, last);
859 if (r)
860 return r;
861
862 if (old->actual_loc && old->ttm_res) {
863 r = svm_range_split_nodes(new, old, start, last);
864 if (r)
865 return r;
866 }
867
868 old->npages = last - start + 1;
869 old->start = start;
870 old->last = last;
871 new->flags = old->flags;
872 new->preferred_loc = old->preferred_loc;
873 new->prefetch_loc = old->prefetch_loc;
874 new->actual_loc = old->actual_loc;
875 new->granularity = old->granularity;
876 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
877 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
878
879 return 0;
880}
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902static int
903svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
904 struct svm_range **new)
905{
906 uint64_t old_start = prange->start;
907 uint64_t old_last = prange->last;
908 struct svm_range_list *svms;
909 int r = 0;
910
911 pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms,
912 old_start, old_last, start, last);
913
914 if (old_start != start && old_last != last)
915 return -EINVAL;
916 if (start < old_start || last > old_last)
917 return -EINVAL;
918
919 svms = prange->svms;
920 if (old_start == start)
921 *new = svm_range_new(svms, last + 1, old_last);
922 else
923 *new = svm_range_new(svms, old_start, start - 1);
924 if (!*new)
925 return -ENOMEM;
926
927 r = svm_range_split_adjust(*new, prange, start, last);
928 if (r) {
929 pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
930 r, old_start, old_last, start, last);
931 svm_range_free(*new);
932 *new = NULL;
933 }
934
935 return r;
936}
937
938static int
939svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
940 uint64_t new_last, struct list_head *insert_list)
941{
942 struct svm_range *tail;
943 int r = svm_range_split(prange, prange->start, new_last, &tail);
944
945 if (!r)
946 list_add(&tail->insert_list, insert_list);
947 return r;
948}
949
950static int
951svm_range_split_head(struct svm_range *prange, struct svm_range *new,
952 uint64_t new_start, struct list_head *insert_list)
953{
954 struct svm_range *head;
955 int r = svm_range_split(prange, new_start, prange->last, &head);
956
957 if (!r)
958 list_add(&head->insert_list, insert_list);
959 return r;
960}
961
962static void
963svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
964 struct svm_range *pchild, enum svm_work_list_ops op)
965{
966 pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
967 pchild, pchild->start, pchild->last, prange, op);
968
969 pchild->work_item.mm = mm;
970 pchild->work_item.op = op;
971 list_add_tail(&pchild->child_list, &prange->child_list);
972}
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991int
992svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
993 unsigned long addr, struct svm_range *parent,
994 struct svm_range *prange)
995{
996 struct svm_range *head, *tail;
997 unsigned long start, last, size;
998 int r;
999
1000
1001
1002
1003
1004 size = 1UL << prange->granularity;
1005 start = ALIGN_DOWN(addr, size);
1006 last = ALIGN(addr + 1, size) - 1;
1007
1008 pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
1009 prange->svms, prange->start, prange->last, start, last, size);
1010
1011 if (start > prange->start) {
1012 r = svm_range_split(prange, start, prange->last, &head);
1013 if (r)
1014 return r;
1015 svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
1016 }
1017
1018 if (last < prange->last) {
1019 r = svm_range_split(prange, prange->start, last, &tail);
1020 if (r)
1021 return r;
1022 svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
1023 }
1024
1025
1026 if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
1027 prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
1028 pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
1029 prange, prange->start, prange->last,
1030 SVM_OP_ADD_RANGE_AND_MAP);
1031 }
1032 return 0;
1033}
1034
1035static uint64_t
1036svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
1037 int domain)
1038{
1039 struct amdgpu_device *bo_adev;
1040 uint32_t flags = prange->flags;
1041 uint32_t mapping_flags = 0;
1042 uint64_t pte_flags;
1043 bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
1044 bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
1045
1046 if (domain == SVM_RANGE_VRAM_DOMAIN)
1047 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
1048
1049 switch (adev->asic_type) {
1050 case CHIP_ARCTURUS:
1051 if (domain == SVM_RANGE_VRAM_DOMAIN) {
1052 if (bo_adev == adev) {
1053 mapping_flags |= coherent ?
1054 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
1055 } else {
1056 mapping_flags |= coherent ?
1057 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1058 if (amdgpu_xgmi_same_hive(adev, bo_adev))
1059 snoop = true;
1060 }
1061 } else {
1062 mapping_flags |= coherent ?
1063 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1064 }
1065 break;
1066 case CHIP_ALDEBARAN:
1067 if (domain == SVM_RANGE_VRAM_DOMAIN) {
1068 if (bo_adev == adev) {
1069 mapping_flags |= coherent ?
1070 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
1071 if (adev->gmc.xgmi.connected_to_cpu)
1072 snoop = true;
1073 } else {
1074 mapping_flags |= coherent ?
1075 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1076 if (amdgpu_xgmi_same_hive(adev, bo_adev))
1077 snoop = true;
1078 }
1079 } else {
1080 mapping_flags |= coherent ?
1081 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1082 }
1083 break;
1084 default:
1085 mapping_flags |= coherent ?
1086 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1087 }
1088
1089 mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
1090
1091 if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
1092 mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
1093 if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
1094 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
1095
1096 pte_flags = AMDGPU_PTE_VALID;
1097 pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
1098 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
1099
1100 pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
1101 return pte_flags;
1102}
1103
1104static int
1105svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1106 uint64_t start, uint64_t last,
1107 struct dma_fence **fence)
1108{
1109 uint64_t init_pte_value = 0;
1110
1111 pr_debug("[0x%llx 0x%llx]\n", start, last);
1112
1113 return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
1114 start, last, init_pte_value, 0,
1115 NULL, NULL, fence, NULL);
1116}
1117
1118static int
1119svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
1120 unsigned long last)
1121{
1122 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
1123 struct kfd_process_device *pdd;
1124 struct dma_fence *fence = NULL;
1125 struct amdgpu_device *adev;
1126 struct kfd_process *p;
1127 uint32_t gpuidx;
1128 int r = 0;
1129
1130 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
1131 MAX_GPU_INSTANCE);
1132 p = container_of(prange->svms, struct kfd_process, svms);
1133
1134 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
1135 pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
1136 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1137 if (!pdd) {
1138 pr_debug("failed to find device idx %d\n", gpuidx);
1139 return -EINVAL;
1140 }
1141 adev = (struct amdgpu_device *)pdd->dev->kgd;
1142
1143 r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
1144 start, last, &fence);
1145 if (r)
1146 break;
1147
1148 if (fence) {
1149 r = dma_fence_wait(fence, false);
1150 dma_fence_put(fence);
1151 fence = NULL;
1152 if (r)
1153 break;
1154 }
1155 amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
1156 p->pasid, TLB_FLUSH_HEAVYWEIGHT);
1157 }
1158
1159 return r;
1160}
1161
1162static int
1163svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1164 struct svm_range *prange, unsigned long offset,
1165 unsigned long npages, bool readonly, dma_addr_t *dma_addr,
1166 struct amdgpu_device *bo_adev, struct dma_fence **fence)
1167{
1168 struct amdgpu_bo_va bo_va;
1169 bool table_freed = false;
1170 uint64_t pte_flags;
1171 unsigned long last_start;
1172 int last_domain;
1173 int r = 0;
1174 int64_t i, j;
1175
1176 last_start = prange->start + offset;
1177
1178 pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
1179 last_start, last_start + npages - 1, readonly);
1180
1181 if (prange->svm_bo && prange->ttm_res)
1182 bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
1183
1184 for (i = offset; i < offset + npages; i++) {
1185 last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
1186 dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
1187
1188
1189
1190
1191 if (i < offset + npages - 1 &&
1192 last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
1193 continue;
1194
1195 pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
1196 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
1197
1198 pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
1199 if (readonly)
1200 pte_flags &= ~AMDGPU_PTE_WRITEABLE;
1201
1202 pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
1203 prange->svms, last_start, prange->start + i,
1204 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
1205 pte_flags);
1206
1207 r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
1208 NULL, last_start,
1209 prange->start + i, pte_flags,
1210 last_start - prange->start,
1211 NULL, dma_addr,
1212 &vm->last_update,
1213 &table_freed);
1214
1215 for (j = last_start - prange->start; j <= i; j++)
1216 dma_addr[j] |= last_domain;
1217
1218 if (r) {
1219 pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
1220 goto out;
1221 }
1222 last_start = prange->start + i + 1;
1223 }
1224
1225 r = amdgpu_vm_update_pdes(adev, vm, false);
1226 if (r) {
1227 pr_debug("failed %d to update directories 0x%lx\n", r,
1228 prange->start);
1229 goto out;
1230 }
1231
1232 if (fence)
1233 *fence = dma_fence_get(vm->last_update);
1234
1235 if (table_freed) {
1236 struct kfd_process *p;
1237
1238 p = container_of(prange->svms, struct kfd_process, svms);
1239 amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
1240 p->pasid, TLB_FLUSH_LEGACY);
1241 }
1242out:
1243 return r;
1244}
1245
1246static int
1247svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
1248 unsigned long npages, bool readonly,
1249 unsigned long *bitmap, bool wait)
1250{
1251 struct kfd_process_device *pdd;
1252 struct amdgpu_device *bo_adev;
1253 struct amdgpu_device *adev;
1254 struct kfd_process *p;
1255 struct dma_fence *fence = NULL;
1256 uint32_t gpuidx;
1257 int r = 0;
1258
1259 if (prange->svm_bo && prange->ttm_res)
1260 bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
1261 else
1262 bo_adev = NULL;
1263
1264 p = container_of(prange->svms, struct kfd_process, svms);
1265 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
1266 pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
1267 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1268 if (!pdd) {
1269 pr_debug("failed to find device idx %d\n", gpuidx);
1270 return -EINVAL;
1271 }
1272 adev = (struct amdgpu_device *)pdd->dev->kgd;
1273
1274 pdd = kfd_bind_process_to_device(pdd->dev, p);
1275 if (IS_ERR(pdd))
1276 return -EINVAL;
1277
1278 if (bo_adev && adev != bo_adev &&
1279 !amdgpu_xgmi_same_hive(adev, bo_adev)) {
1280 pr_debug("cannot map to device idx %d\n", gpuidx);
1281 continue;
1282 }
1283
1284 r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
1285 prange, offset, npages, readonly,
1286 prange->dma_addr[gpuidx],
1287 bo_adev, wait ? &fence : NULL);
1288 if (r)
1289 break;
1290
1291 if (fence) {
1292 r = dma_fence_wait(fence, false);
1293 dma_fence_put(fence);
1294 fence = NULL;
1295 if (r) {
1296 pr_debug("failed %d to dma fence wait\n", r);
1297 break;
1298 }
1299 }
1300 }
1301
1302 return r;
1303}
1304
1305struct svm_validate_context {
1306 struct kfd_process *process;
1307 struct svm_range *prange;
1308 bool intr;
1309 unsigned long bitmap[MAX_GPU_INSTANCE];
1310 struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
1311 struct list_head validate_list;
1312 struct ww_acquire_ctx ticket;
1313};
1314
1315static int svm_range_reserve_bos(struct svm_validate_context *ctx)
1316{
1317 struct kfd_process_device *pdd;
1318 struct amdgpu_device *adev;
1319 struct amdgpu_vm *vm;
1320 uint32_t gpuidx;
1321 int r;
1322
1323 INIT_LIST_HEAD(&ctx->validate_list);
1324 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
1325 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
1326 if (!pdd) {
1327 pr_debug("failed to find device idx %d\n", gpuidx);
1328 return -EINVAL;
1329 }
1330 adev = (struct amdgpu_device *)pdd->dev->kgd;
1331 vm = drm_priv_to_vm(pdd->drm_priv);
1332
1333 ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
1334 ctx->tv[gpuidx].num_shared = 4;
1335 list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
1336 }
1337 if (ctx->prange->svm_bo && ctx->prange->ttm_res) {
1338 ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo;
1339 ctx->tv[MAX_GPU_INSTANCE].num_shared = 1;
1340 list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list);
1341 }
1342
1343 r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
1344 ctx->intr, NULL);
1345 if (r) {
1346 pr_debug("failed %d to reserve bo\n", r);
1347 return r;
1348 }
1349
1350 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
1351 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
1352 if (!pdd) {
1353 pr_debug("failed to find device idx %d\n", gpuidx);
1354 r = -EINVAL;
1355 goto unreserve_out;
1356 }
1357 adev = (struct amdgpu_device *)pdd->dev->kgd;
1358
1359 r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
1360 svm_range_bo_validate, NULL);
1361 if (r) {
1362 pr_debug("failed %d validate pt bos\n", r);
1363 goto unreserve_out;
1364 }
1365 }
1366
1367 return 0;
1368
1369unreserve_out:
1370 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
1371 return r;
1372}
1373
1374static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
1375{
1376 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
1377}
1378
1379static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
1380{
1381 struct kfd_process_device *pdd;
1382 struct amdgpu_device *adev;
1383
1384 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1385 adev = (struct amdgpu_device *)pdd->dev->kgd;
1386
1387 return SVM_ADEV_PGMAP_OWNER(adev);
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414static int svm_range_validate_and_map(struct mm_struct *mm,
1415 struct svm_range *prange,
1416 int32_t gpuidx, bool intr, bool wait)
1417{
1418 struct svm_validate_context ctx;
1419 unsigned long start, end, addr;
1420 struct kfd_process *p;
1421 void *owner;
1422 int32_t idx;
1423 int r = 0;
1424
1425 ctx.process = container_of(prange->svms, struct kfd_process, svms);
1426 ctx.prange = prange;
1427 ctx.intr = intr;
1428
1429 if (gpuidx < MAX_GPU_INSTANCE) {
1430 bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
1431 bitmap_set(ctx.bitmap, gpuidx, 1);
1432 } else if (ctx.process->xnack_enabled) {
1433 bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
1434
1435
1436
1437
1438
1439 if (prange->actual_loc) {
1440 gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
1441 prange->actual_loc);
1442 if (gpuidx < 0) {
1443 WARN_ONCE(1, "failed get device by id 0x%x\n",
1444 prange->actual_loc);
1445 return -EINVAL;
1446 }
1447 if (test_bit(gpuidx, prange->bitmap_access))
1448 bitmap_set(ctx.bitmap, gpuidx, 1);
1449 }
1450 } else {
1451 bitmap_or(ctx.bitmap, prange->bitmap_access,
1452 prange->bitmap_aip, MAX_GPU_INSTANCE);
1453 }
1454
1455 if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
1456 return 0;
1457
1458 if (prange->actual_loc && !prange->ttm_res) {
1459
1460
1461
1462 WARN(1, "VRAM BO missing during validation\n");
1463 return -EINVAL;
1464 }
1465
1466 svm_range_reserve_bos(&ctx);
1467
1468 p = container_of(prange->svms, struct kfd_process, svms);
1469 owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
1470 MAX_GPU_INSTANCE));
1471 for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
1472 if (kfd_svm_page_owner(p, idx) != owner) {
1473 owner = NULL;
1474 break;
1475 }
1476 }
1477
1478 start = prange->start << PAGE_SHIFT;
1479 end = (prange->last + 1) << PAGE_SHIFT;
1480 for (addr = start; addr < end && !r; ) {
1481 struct hmm_range *hmm_range;
1482 struct vm_area_struct *vma;
1483 unsigned long next;
1484 unsigned long offset;
1485 unsigned long npages;
1486 bool readonly;
1487
1488 vma = find_vma(mm, addr);
1489 if (!vma || addr < vma->vm_start) {
1490 r = -EFAULT;
1491 goto unreserve_out;
1492 }
1493 readonly = !(vma->vm_flags & VM_WRITE);
1494
1495 next = min(vma->vm_end, end);
1496 npages = (next - addr) >> PAGE_SHIFT;
1497 r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
1498 addr, npages, &hmm_range,
1499 readonly, true, owner);
1500 if (r) {
1501 pr_debug("failed %d to get svm range pages\n", r);
1502 goto unreserve_out;
1503 }
1504
1505 offset = (addr - start) >> PAGE_SHIFT;
1506 r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
1507 hmm_range->hmm_pfns);
1508 if (r) {
1509 pr_debug("failed %d to dma map range\n", r);
1510 goto unreserve_out;
1511 }
1512
1513 svm_range_lock(prange);
1514 if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
1515 pr_debug("hmm update the range, need validate again\n");
1516 r = -EAGAIN;
1517 goto unlock_out;
1518 }
1519 if (!list_empty(&prange->child_list)) {
1520 pr_debug("range split by unmap in parallel, validate again\n");
1521 r = -EAGAIN;
1522 goto unlock_out;
1523 }
1524
1525 r = svm_range_map_to_gpus(prange, offset, npages, readonly,
1526 ctx.bitmap, wait);
1527
1528unlock_out:
1529 svm_range_unlock(prange);
1530
1531 addr = next;
1532 }
1533
1534 if (addr == end)
1535 prange->validated_once = true;
1536
1537unreserve_out:
1538 svm_range_unreserve_bos(&ctx);
1539
1540 if (!r)
1541 prange->validate_timestamp = ktime_to_us(ktime_get());
1542
1543 return r;
1544}
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555static void
1556svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
1557 struct mm_struct *mm)
1558{
1559retry_flush_work:
1560 flush_work(&svms->deferred_list_work);
1561 mmap_write_lock(mm);
1562
1563 if (list_empty(&svms->deferred_range_list))
1564 return;
1565 mmap_write_unlock(mm);
1566 pr_debug("retry flush\n");
1567 goto retry_flush_work;
1568}
1569
1570static void svm_range_restore_work(struct work_struct *work)
1571{
1572 struct delayed_work *dwork = to_delayed_work(work);
1573 struct amdkfd_process_info *process_info;
1574 struct svm_range_list *svms;
1575 struct svm_range *prange;
1576 struct kfd_process *p;
1577 struct mm_struct *mm;
1578 int evicted_ranges;
1579 int invalid;
1580 int r;
1581
1582 svms = container_of(dwork, struct svm_range_list, restore_work);
1583 evicted_ranges = atomic_read(&svms->evicted_ranges);
1584 if (!evicted_ranges)
1585 return;
1586
1587 pr_debug("restore svm ranges\n");
1588
1589
1590
1591
1592 p = container_of(svms, struct kfd_process, svms);
1593 process_info = p->kgd_process_info;
1594 mm = p->mm;
1595 if (!mm)
1596 return;
1597
1598 mutex_lock(&process_info->lock);
1599 svm_range_list_lock_and_flush_work(svms, mm);
1600 mutex_lock(&svms->lock);
1601
1602 evicted_ranges = atomic_read(&svms->evicted_ranges);
1603
1604 list_for_each_entry(prange, &svms->list, list) {
1605 invalid = atomic_read(&prange->invalid);
1606 if (!invalid)
1607 continue;
1608
1609 pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
1610 prange->svms, prange, prange->start, prange->last,
1611 invalid);
1612
1613
1614
1615
1616 mutex_lock(&prange->migrate_mutex);
1617
1618 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
1619 false, true);
1620 if (r)
1621 pr_debug("failed %d to map 0x%lx to gpus\n", r,
1622 prange->start);
1623
1624 mutex_unlock(&prange->migrate_mutex);
1625 if (r)
1626 goto out_reschedule;
1627
1628 if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
1629 goto out_reschedule;
1630 }
1631
1632 if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
1633 evicted_ranges)
1634 goto out_reschedule;
1635
1636 evicted_ranges = 0;
1637
1638 r = kgd2kfd_resume_mm(mm);
1639 if (r) {
1640
1641
1642
1643 pr_debug("failed %d to resume KFD\n", r);
1644 }
1645
1646 pr_debug("restore svm ranges successfully\n");
1647
1648out_reschedule:
1649 mutex_unlock(&svms->lock);
1650 mmap_write_unlock(mm);
1651 mutex_unlock(&process_info->lock);
1652
1653
1654 if (evicted_ranges) {
1655 pr_debug("reschedule to restore svm range\n");
1656 schedule_delayed_work(&svms->restore_work,
1657 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
1658 }
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672static int
1673svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
1674 unsigned long start, unsigned long last)
1675{
1676 struct svm_range_list *svms = prange->svms;
1677 struct svm_range *pchild;
1678 struct kfd_process *p;
1679 int r = 0;
1680
1681 p = container_of(svms, struct kfd_process, svms);
1682
1683 pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
1684 svms, prange->start, prange->last, start, last);
1685
1686 if (!p->xnack_enabled) {
1687 int evicted_ranges;
1688
1689 list_for_each_entry(pchild, &prange->child_list, child_list) {
1690 mutex_lock_nested(&pchild->lock, 1);
1691 if (pchild->start <= last && pchild->last >= start) {
1692 pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
1693 pchild->start, pchild->last);
1694 atomic_inc(&pchild->invalid);
1695 }
1696 mutex_unlock(&pchild->lock);
1697 }
1698
1699 if (prange->start <= last && prange->last >= start)
1700 atomic_inc(&prange->invalid);
1701
1702 evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
1703 if (evicted_ranges != 1)
1704 return r;
1705
1706 pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
1707 prange->svms, prange->start, prange->last);
1708
1709
1710 r = kgd2kfd_quiesce_mm(mm);
1711 if (r)
1712 pr_debug("failed to quiesce KFD\n");
1713
1714 pr_debug("schedule to restore svm %p ranges\n", svms);
1715 schedule_delayed_work(&svms->restore_work,
1716 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
1717 } else {
1718 unsigned long s, l;
1719
1720 pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
1721 prange->svms, start, last);
1722 list_for_each_entry(pchild, &prange->child_list, child_list) {
1723 mutex_lock_nested(&pchild->lock, 1);
1724 s = max(start, pchild->start);
1725 l = min(last, pchild->last);
1726 if (l >= s)
1727 svm_range_unmap_from_gpus(pchild, s, l);
1728 mutex_unlock(&pchild->lock);
1729 }
1730 s = max(start, prange->start);
1731 l = min(last, prange->last);
1732 if (l >= s)
1733 svm_range_unmap_from_gpus(prange, s, l);
1734 }
1735
1736 return r;
1737}
1738
1739static struct svm_range *svm_range_clone(struct svm_range *old)
1740{
1741 struct svm_range *new;
1742
1743 new = svm_range_new(old->svms, old->start, old->last);
1744 if (!new)
1745 return NULL;
1746
1747 if (old->svm_bo) {
1748 new->ttm_res = old->ttm_res;
1749 new->offset = old->offset;
1750 new->svm_bo = svm_range_bo_ref(old->svm_bo);
1751 spin_lock(&new->svm_bo->list_lock);
1752 list_add(&new->svm_bo_list, &new->svm_bo->range_list);
1753 spin_unlock(&new->svm_bo->list_lock);
1754 }
1755 new->flags = old->flags;
1756 new->preferred_loc = old->preferred_loc;
1757 new->prefetch_loc = old->prefetch_loc;
1758 new->actual_loc = old->actual_loc;
1759 new->granularity = old->granularity;
1760 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
1761 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
1762
1763 return new;
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794static int
1795svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
1796 unsigned long start, unsigned long last,
1797 struct list_head *update_list,
1798 struct list_head *insert_list,
1799 struct list_head *remove_list,
1800 unsigned long *left)
1801{
1802 struct interval_tree_node *node;
1803 struct svm_range *prange;
1804 struct svm_range *tmp;
1805 int r = 0;
1806
1807 INIT_LIST_HEAD(update_list);
1808 INIT_LIST_HEAD(insert_list);
1809 INIT_LIST_HEAD(remove_list);
1810
1811 node = interval_tree_iter_first(&svms->objects, start, last);
1812 while (node) {
1813 struct interval_tree_node *next;
1814 struct svm_range *old;
1815 unsigned long next_start;
1816
1817 pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
1818 node->last);
1819
1820 old = container_of(node, struct svm_range, it_node);
1821 next = interval_tree_iter_next(node, start, last);
1822 next_start = min(node->last, last) + 1;
1823
1824 if (node->start < start || node->last > last) {
1825
1826 prange = svm_range_clone(old);
1827 if (!prange) {
1828 r = -ENOMEM;
1829 goto out;
1830 }
1831
1832 list_add(&old->remove_list, remove_list);
1833 list_add(&prange->insert_list, insert_list);
1834
1835 if (node->start < start) {
1836 pr_debug("change old range start\n");
1837 r = svm_range_split_head(prange, new, start,
1838 insert_list);
1839 if (r)
1840 goto out;
1841 }
1842 if (node->last > last) {
1843 pr_debug("change old range last\n");
1844 r = svm_range_split_tail(prange, new, last,
1845 insert_list);
1846 if (r)
1847 goto out;
1848 }
1849 } else {
1850
1851
1852
1853 prange = old;
1854 }
1855
1856 if (!svm_range_is_same_attrs(prange, new))
1857 list_add(&prange->update_list, update_list);
1858
1859
1860 if (node->start > start) {
1861 prange = svm_range_new(prange->svms, start,
1862 node->start - 1);
1863 if (!prange) {
1864 r = -ENOMEM;
1865 goto out;
1866 }
1867
1868 list_add(&prange->insert_list, insert_list);
1869 list_add(&prange->update_list, update_list);
1870 }
1871
1872 node = next;
1873 start = next_start;
1874 }
1875
1876 if (left && start <= last)
1877 *left = last - start + 1;
1878
1879out:
1880 if (r)
1881 list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
1882 svm_range_free(prange);
1883
1884 return r;
1885}
1886
1887static void
1888svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
1889 struct svm_range *prange)
1890{
1891 unsigned long start;
1892 unsigned long last;
1893
1894 start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
1895 last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
1896
1897 if (prange->start == start && prange->last == last)
1898 return;
1899
1900 pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
1901 prange->svms, prange, start, last, prange->start,
1902 prange->last);
1903
1904 if (start != 0 && last != 0) {
1905 interval_tree_remove(&prange->it_node, &prange->svms->objects);
1906 svm_range_remove_notifier(prange);
1907 }
1908 prange->it_node.start = prange->start;
1909 prange->it_node.last = prange->last;
1910
1911 interval_tree_insert(&prange->it_node, &prange->svms->objects);
1912 svm_range_add_notifier_locked(mm, prange);
1913}
1914
1915static void
1916svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
1917{
1918 struct mm_struct *mm = prange->work_item.mm;
1919
1920 switch (prange->work_item.op) {
1921 case SVM_OP_NULL:
1922 pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
1923 svms, prange, prange->start, prange->last);
1924 break;
1925 case SVM_OP_UNMAP_RANGE:
1926 pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
1927 svms, prange, prange->start, prange->last);
1928 svm_range_unlink(prange);
1929 svm_range_remove_notifier(prange);
1930 svm_range_free(prange);
1931 break;
1932 case SVM_OP_UPDATE_RANGE_NOTIFIER:
1933 pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
1934 svms, prange, prange->start, prange->last);
1935 svm_range_update_notifier_and_interval_tree(mm, prange);
1936 break;
1937 case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
1938 pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
1939 svms, prange, prange->start, prange->last);
1940 svm_range_update_notifier_and_interval_tree(mm, prange);
1941
1942 break;
1943 case SVM_OP_ADD_RANGE:
1944 pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
1945 prange->start, prange->last);
1946 svm_range_add_to_svms(prange);
1947 svm_range_add_notifier_locked(mm, prange);
1948 break;
1949 case SVM_OP_ADD_RANGE_AND_MAP:
1950 pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
1951 prange, prange->start, prange->last);
1952 svm_range_add_to_svms(prange);
1953 svm_range_add_notifier_locked(mm, prange);
1954
1955 break;
1956 default:
1957 WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
1958 prange->work_item.op);
1959 }
1960}
1961
1962static void svm_range_drain_retry_fault(struct svm_range_list *svms)
1963{
1964 struct kfd_process_device *pdd;
1965 struct amdgpu_device *adev;
1966 struct kfd_process *p;
1967 uint32_t i;
1968
1969 p = container_of(svms, struct kfd_process, svms);
1970
1971 for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
1972 pdd = p->pdds[i];
1973 if (!pdd)
1974 continue;
1975
1976 pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
1977 adev = (struct amdgpu_device *)pdd->dev->kgd;
1978
1979 amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
1980 pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
1981 }
1982}
1983
1984static void svm_range_deferred_list_work(struct work_struct *work)
1985{
1986 struct svm_range_list *svms;
1987 struct svm_range *prange;
1988 struct mm_struct *mm;
1989
1990 svms = container_of(work, struct svm_range_list, deferred_list_work);
1991 pr_debug("enter svms 0x%p\n", svms);
1992
1993 spin_lock(&svms->deferred_list_lock);
1994 while (!list_empty(&svms->deferred_range_list)) {
1995 prange = list_first_entry(&svms->deferred_range_list,
1996 struct svm_range, deferred_list);
1997 spin_unlock(&svms->deferred_list_lock);
1998 pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
1999 prange->start, prange->last, prange->work_item.op);
2000
2001
2002 if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
2003 svm_range_drain_retry_fault(prange->svms);
2004
2005 mm = prange->work_item.mm;
2006 mmap_write_lock(mm);
2007 mutex_lock(&svms->lock);
2008
2009
2010
2011
2012
2013
2014 spin_lock(&svms->deferred_list_lock);
2015 list_del_init(&prange->deferred_list);
2016 spin_unlock(&svms->deferred_list_lock);
2017
2018 mutex_lock(&prange->migrate_mutex);
2019 while (!list_empty(&prange->child_list)) {
2020 struct svm_range *pchild;
2021
2022 pchild = list_first_entry(&prange->child_list,
2023 struct svm_range, child_list);
2024 pr_debug("child prange 0x%p op %d\n", pchild,
2025 pchild->work_item.op);
2026 list_del_init(&pchild->child_list);
2027 svm_range_handle_list_op(svms, pchild);
2028 }
2029 mutex_unlock(&prange->migrate_mutex);
2030
2031 svm_range_handle_list_op(svms, prange);
2032 mutex_unlock(&svms->lock);
2033 mmap_write_unlock(mm);
2034
2035 spin_lock(&svms->deferred_list_lock);
2036 }
2037 spin_unlock(&svms->deferred_list_lock);
2038
2039 pr_debug("exit svms 0x%p\n", svms);
2040}
2041
2042void
2043svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
2044 struct mm_struct *mm, enum svm_work_list_ops op)
2045{
2046 spin_lock(&svms->deferred_list_lock);
2047
2048 if (!list_empty(&prange->deferred_list)) {
2049 pr_debug("update exist prange 0x%p work op %d\n", prange, op);
2050 WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
2051 if (op != SVM_OP_NULL &&
2052 prange->work_item.op != SVM_OP_UNMAP_RANGE)
2053 prange->work_item.op = op;
2054 } else {
2055 prange->work_item.op = op;
2056 prange->work_item.mm = mm;
2057 list_add_tail(&prange->deferred_list,
2058 &prange->svms->deferred_range_list);
2059 pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
2060 prange, prange->start, prange->last, op);
2061 }
2062 spin_unlock(&svms->deferred_list_lock);
2063}
2064
2065void schedule_deferred_list_work(struct svm_range_list *svms)
2066{
2067 spin_lock(&svms->deferred_list_lock);
2068 if (!list_empty(&svms->deferred_range_list))
2069 schedule_work(&svms->deferred_list_work);
2070 spin_unlock(&svms->deferred_list_lock);
2071}
2072
2073static void
2074svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
2075 struct svm_range *prange, unsigned long start,
2076 unsigned long last)
2077{
2078 struct svm_range *head;
2079 struct svm_range *tail;
2080
2081 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
2082 pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
2083 prange->start, prange->last);
2084 return;
2085 }
2086 if (start > prange->last || last < prange->start)
2087 return;
2088
2089 head = tail = prange;
2090 if (start > prange->start)
2091 svm_range_split(prange, prange->start, start - 1, &tail);
2092 if (last < tail->last)
2093 svm_range_split(tail, last + 1, tail->last, &head);
2094
2095 if (head != prange && tail != prange) {
2096 svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
2097 svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
2098 } else if (tail != prange) {
2099 svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
2100 } else if (head != prange) {
2101 svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
2102 } else if (parent != prange) {
2103 prange->work_item.op = SVM_OP_UNMAP_RANGE;
2104 }
2105}
2106
2107static void
2108svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
2109 unsigned long start, unsigned long last)
2110{
2111 struct svm_range_list *svms;
2112 struct svm_range *pchild;
2113 struct kfd_process *p;
2114 unsigned long s, l;
2115 bool unmap_parent;
2116
2117 p = kfd_lookup_process_by_mm(mm);
2118 if (!p)
2119 return;
2120 svms = &p->svms;
2121
2122 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
2123 prange, prange->start, prange->last, start, last);
2124
2125 unmap_parent = start <= prange->start && last >= prange->last;
2126
2127 list_for_each_entry(pchild, &prange->child_list, child_list) {
2128 mutex_lock_nested(&pchild->lock, 1);
2129 s = max(start, pchild->start);
2130 l = min(last, pchild->last);
2131 if (l >= s)
2132 svm_range_unmap_from_gpus(pchild, s, l);
2133 svm_range_unmap_split(mm, prange, pchild, start, last);
2134 mutex_unlock(&pchild->lock);
2135 }
2136 s = max(start, prange->start);
2137 l = min(last, prange->last);
2138 if (l >= s)
2139 svm_range_unmap_from_gpus(prange, s, l);
2140 svm_range_unmap_split(mm, prange, prange, start, last);
2141
2142 if (unmap_parent)
2143 svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
2144 else
2145 svm_range_add_list_work(svms, prange, mm,
2146 SVM_OP_UPDATE_RANGE_NOTIFIER);
2147 schedule_deferred_list_work(svms);
2148
2149 kfd_unref_process(p);
2150}
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169static bool
2170svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
2171 const struct mmu_notifier_range *range,
2172 unsigned long cur_seq)
2173{
2174 struct svm_range *prange;
2175 unsigned long start;
2176 unsigned long last;
2177
2178 if (range->event == MMU_NOTIFY_RELEASE)
2179 return true;
2180
2181 start = mni->interval_tree.start;
2182 last = mni->interval_tree.last;
2183 start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
2184 last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
2185 pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
2186 start, last, range->start >> PAGE_SHIFT,
2187 (range->end - 1) >> PAGE_SHIFT,
2188 mni->interval_tree.start >> PAGE_SHIFT,
2189 mni->interval_tree.last >> PAGE_SHIFT, range->event);
2190
2191 prange = container_of(mni, struct svm_range, notifier);
2192
2193 svm_range_lock(prange);
2194 mmu_interval_set_seq(mni, cur_seq);
2195
2196 switch (range->event) {
2197 case MMU_NOTIFY_UNMAP:
2198 svm_range_unmap_from_cpu(mni->mm, prange, start, last);
2199 break;
2200 default:
2201 svm_range_evict(prange, mni->mm, start, last);
2202 break;
2203 }
2204
2205 svm_range_unlock(prange);
2206
2207 return true;
2208}
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220struct svm_range *
2221svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
2222 struct svm_range **parent)
2223{
2224 struct interval_tree_node *node;
2225 struct svm_range *prange;
2226 struct svm_range *pchild;
2227
2228 node = interval_tree_iter_first(&svms->objects, addr, addr);
2229 if (!node)
2230 return NULL;
2231
2232 prange = container_of(node, struct svm_range, it_node);
2233 pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n",
2234 addr, prange->start, prange->last, node->start, node->last);
2235
2236 if (addr >= prange->start && addr <= prange->last) {
2237 if (parent)
2238 *parent = prange;
2239 return prange;
2240 }
2241 list_for_each_entry(pchild, &prange->child_list, child_list)
2242 if (addr >= pchild->start && addr <= pchild->last) {
2243 pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n",
2244 addr, pchild->start, pchild->last);
2245 if (parent)
2246 *parent = prange;
2247 return pchild;
2248 }
2249
2250 return NULL;
2251}
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274static int32_t
2275svm_range_best_restore_location(struct svm_range *prange,
2276 struct amdgpu_device *adev,
2277 int32_t *gpuidx)
2278{
2279 struct amdgpu_device *bo_adev;
2280 struct kfd_process *p;
2281 uint32_t gpuid;
2282 int r;
2283
2284 p = container_of(prange->svms, struct kfd_process, svms);
2285
2286 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
2287 if (r < 0) {
2288 pr_debug("failed to get gpuid from kgd\n");
2289 return -1;
2290 }
2291
2292 if (prange->preferred_loc == gpuid)
2293 return prange->preferred_loc;
2294
2295 if (test_bit(*gpuidx, prange->bitmap_access))
2296 return gpuid;
2297
2298 if (test_bit(*gpuidx, prange->bitmap_aip)) {
2299 if (!prange->actual_loc)
2300 return 0;
2301
2302 bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
2303 if (amdgpu_xgmi_same_hive(adev, bo_adev))
2304 return prange->actual_loc;
2305 else
2306 return 0;
2307 }
2308
2309 return -1;
2310}
2311static int
2312svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
2313 unsigned long *start, unsigned long *last)
2314{
2315 struct vm_area_struct *vma;
2316 struct interval_tree_node *node;
2317 unsigned long start_limit, end_limit;
2318
2319 vma = find_vma(p->mm, addr << PAGE_SHIFT);
2320 if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
2321 pr_debug("VMA does not exist in address [0x%llx]\n", addr);
2322 return -EFAULT;
2323 }
2324 start_limit = max(vma->vm_start >> PAGE_SHIFT,
2325 (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
2326 end_limit = min(vma->vm_end >> PAGE_SHIFT,
2327 (unsigned long)ALIGN(addr + 1, 2UL << 8));
2328
2329 node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
2330 if (node) {
2331 end_limit = min(end_limit, node->start);
2332
2333 node = container_of(rb_prev(&node->rb),
2334 struct interval_tree_node, rb);
2335 } else {
2336
2337
2338
2339 node = container_of(rb_last(&p->svms.objects.rb_root),
2340 struct interval_tree_node, rb);
2341 }
2342 if (node) {
2343 if (node->last >= addr) {
2344 WARN(1, "Overlap with prev node and page fault addr\n");
2345 return -EFAULT;
2346 }
2347 start_limit = max(start_limit, node->last + 1);
2348 }
2349
2350 *start = start_limit;
2351 *last = end_limit - 1;
2352
2353 pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
2354 vma->vm_start >> PAGE_SHIFT, *start,
2355 vma->vm_end >> PAGE_SHIFT, *last);
2356
2357 return 0;
2358
2359}
2360static struct
2361svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
2362 struct kfd_process *p,
2363 struct mm_struct *mm,
2364 int64_t addr)
2365{
2366 struct svm_range *prange = NULL;
2367 unsigned long start, last;
2368 uint32_t gpuid, gpuidx;
2369
2370 if (svm_range_get_range_boundaries(p, addr, &start, &last))
2371 return NULL;
2372
2373 prange = svm_range_new(&p->svms, start, last);
2374 if (!prange) {
2375 pr_debug("Failed to create prange in address [0x%llx]\n", addr);
2376 return NULL;
2377 }
2378 if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
2379 pr_debug("failed to get gpuid from kgd\n");
2380 svm_range_free(prange);
2381 return NULL;
2382 }
2383
2384 svm_range_add_to_svms(prange);
2385 svm_range_add_notifier_locked(mm, prange);
2386
2387 return prange;
2388}
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402static bool svm_range_skip_recover(struct svm_range *prange)
2403{
2404 struct svm_range_list *svms = prange->svms;
2405
2406 spin_lock(&svms->deferred_list_lock);
2407 if (list_empty(&prange->deferred_list) &&
2408 list_empty(&prange->child_list)) {
2409 spin_unlock(&svms->deferred_list_lock);
2410 return false;
2411 }
2412 spin_unlock(&svms->deferred_list_lock);
2413
2414 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
2415 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
2416 svms, prange, prange->start, prange->last);
2417 return true;
2418 }
2419 if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
2420 prange->work_item.op == SVM_OP_ADD_RANGE) {
2421 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
2422 svms, prange, prange->start, prange->last);
2423 return true;
2424 }
2425 return false;
2426}
2427
2428static void
2429svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
2430 int32_t gpuidx)
2431{
2432 struct kfd_process_device *pdd;
2433
2434
2435
2436
2437
2438 if (gpuidx == MAX_GPU_INSTANCE) {
2439 uint32_t gpuid;
2440 int r;
2441
2442 r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
2443 if (r < 0)
2444 return;
2445 }
2446
2447
2448
2449
2450 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
2451 if (pdd)
2452 WRITE_ONCE(pdd->faults, pdd->faults + 1);
2453}
2454
2455static bool
2456svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
2457{
2458 unsigned long requested = VM_READ;
2459 struct vm_area_struct *vma;
2460
2461 if (write_fault)
2462 requested |= VM_WRITE;
2463
2464 vma = find_vma(mm, addr << PAGE_SHIFT);
2465 if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
2466 pr_debug("address 0x%llx VMA is removed\n", addr);
2467 return true;
2468 }
2469
2470 pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
2471 vma->vm_flags);
2472 return (vma->vm_flags & requested) == requested;
2473}
2474
2475int
2476svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
2477 uint64_t addr, bool write_fault)
2478{
2479 struct mm_struct *mm = NULL;
2480 struct svm_range_list *svms;
2481 struct svm_range *prange;
2482 struct kfd_process *p;
2483 uint64_t timestamp;
2484 int32_t best_loc;
2485 int32_t gpuidx = MAX_GPU_INSTANCE;
2486 bool write_locked = false;
2487 int r = 0;
2488
2489 if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
2490 pr_debug("device does not support SVM\n");
2491 return -EFAULT;
2492 }
2493
2494 p = kfd_lookup_process_by_pasid(pasid);
2495 if (!p) {
2496 pr_debug("kfd process not founded pasid 0x%x\n", pasid);
2497 return -ESRCH;
2498 }
2499 if (!p->xnack_enabled) {
2500 pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
2501 r = -EFAULT;
2502 goto out;
2503 }
2504 svms = &p->svms;
2505
2506 pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
2507
2508 mm = get_task_mm(p->lead_thread);
2509 if (!mm) {
2510 pr_debug("svms 0x%p failed to get mm\n", svms);
2511 r = -ESRCH;
2512 goto out;
2513 }
2514
2515 mmap_read_lock(mm);
2516retry_write_locked:
2517 mutex_lock(&svms->lock);
2518 prange = svm_range_from_addr(svms, addr, NULL);
2519 if (!prange) {
2520 pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
2521 svms, addr);
2522 if (!write_locked) {
2523
2524
2525
2526
2527 mutex_unlock(&svms->lock);
2528 mmap_read_unlock(mm);
2529 mmap_write_lock(mm);
2530 write_locked = true;
2531 goto retry_write_locked;
2532 }
2533 prange = svm_range_create_unregistered_range(adev, p, mm, addr);
2534 if (!prange) {
2535 pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
2536 svms, addr);
2537 mmap_write_downgrade(mm);
2538 r = -EFAULT;
2539 goto out_unlock_svms;
2540 }
2541 }
2542 if (write_locked)
2543 mmap_write_downgrade(mm);
2544
2545 mutex_lock(&prange->migrate_mutex);
2546
2547 if (svm_range_skip_recover(prange)) {
2548 amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
2549 goto out_unlock_range;
2550 }
2551
2552 timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
2553
2554 if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
2555 pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
2556 svms, prange->start, prange->last);
2557 goto out_unlock_range;
2558 }
2559
2560 if (!svm_fault_allowed(mm, addr, write_fault)) {
2561 pr_debug("fault addr 0x%llx no %s permission\n", addr,
2562 write_fault ? "write" : "read");
2563 r = -EPERM;
2564 goto out_unlock_range;
2565 }
2566
2567 best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
2568 if (best_loc == -1) {
2569 pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
2570 svms, prange->start, prange->last);
2571 r = -EACCES;
2572 goto out_unlock_range;
2573 }
2574
2575 pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
2576 svms, prange->start, prange->last, best_loc,
2577 prange->actual_loc);
2578
2579 if (prange->actual_loc != best_loc) {
2580 if (best_loc) {
2581 r = svm_migrate_to_vram(prange, best_loc, mm);
2582 if (r) {
2583 pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
2584 r, addr);
2585
2586
2587
2588 if (prange->actual_loc)
2589 r = svm_migrate_vram_to_ram(prange, mm);
2590 else
2591 r = 0;
2592 }
2593 } else {
2594 r = svm_migrate_vram_to_ram(prange, mm);
2595 }
2596 if (r) {
2597 pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
2598 r, svms, prange->start, prange->last);
2599 goto out_unlock_range;
2600 }
2601 }
2602
2603 r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
2604 if (r)
2605 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
2606 r, svms, prange->start, prange->last);
2607
2608out_unlock_range:
2609 mutex_unlock(&prange->migrate_mutex);
2610out_unlock_svms:
2611 mutex_unlock(&svms->lock);
2612 mmap_read_unlock(mm);
2613
2614 svm_range_count_fault(adev, p, gpuidx);
2615
2616 mmput(mm);
2617out:
2618 kfd_unref_process(p);
2619
2620 if (r == -EAGAIN) {
2621 pr_debug("recover vm fault later\n");
2622 amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
2623 r = 0;
2624 }
2625 return r;
2626}
2627
2628void svm_range_list_fini(struct kfd_process *p)
2629{
2630 struct svm_range *prange;
2631 struct svm_range *next;
2632
2633 pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
2634
2635
2636 flush_work(&p->svms.deferred_list_work);
2637
2638 list_for_each_entry_safe(prange, next, &p->svms.list, list) {
2639 svm_range_unlink(prange);
2640 svm_range_remove_notifier(prange);
2641 svm_range_free(prange);
2642 }
2643
2644 mutex_destroy(&p->svms.lock);
2645
2646 pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
2647}
2648
2649int svm_range_list_init(struct kfd_process *p)
2650{
2651 struct svm_range_list *svms = &p->svms;
2652 int i;
2653
2654 svms->objects = RB_ROOT_CACHED;
2655 mutex_init(&svms->lock);
2656 INIT_LIST_HEAD(&svms->list);
2657 atomic_set(&svms->evicted_ranges, 0);
2658 INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
2659 INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
2660 INIT_LIST_HEAD(&svms->deferred_range_list);
2661 spin_lock_init(&svms->deferred_list_lock);
2662
2663 for (i = 0; i < p->n_pdds; i++)
2664 if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev))
2665 bitmap_set(svms->bitmap_supported, i, 1);
2666
2667 return 0;
2668}
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684static bool
2685svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
2686{
2687 const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
2688 struct vm_area_struct *vma;
2689 unsigned long end;
2690
2691 start <<= PAGE_SHIFT;
2692 end = start + (size << PAGE_SHIFT);
2693
2694 do {
2695 vma = find_vma(mm, start);
2696 if (!vma || start < vma->vm_start ||
2697 (vma->vm_flags & device_vma))
2698 return false;
2699 start = min(end, vma->vm_end);
2700 } while (start < end);
2701
2702 return true;
2703}
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725static int
2726svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
2727 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
2728 struct list_head *update_list, struct list_head *insert_list,
2729 struct list_head *remove_list)
2730{
2731 uint64_t last = start + size - 1UL;
2732 struct svm_range_list *svms;
2733 struct svm_range new = {0};
2734 struct svm_range *prange;
2735 unsigned long left = 0;
2736 int r = 0;
2737
2738 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
2739
2740 svm_range_apply_attrs(p, &new, nattr, attrs);
2741
2742 svms = &p->svms;
2743
2744 r = svm_range_handle_overlap(svms, &new, start, last, update_list,
2745 insert_list, remove_list, &left);
2746 if (r)
2747 return r;
2748
2749 if (left) {
2750 prange = svm_range_new(svms, last - left + 1, last);
2751 list_add(&prange->insert_list, insert_list);
2752 list_add(&prange->update_list, update_list);
2753 }
2754
2755 return 0;
2756}
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784static uint32_t
2785svm_range_best_prefetch_location(struct svm_range *prange)
2786{
2787 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
2788 uint32_t best_loc = prange->prefetch_loc;
2789 struct kfd_process_device *pdd;
2790 struct amdgpu_device *bo_adev;
2791 struct amdgpu_device *adev;
2792 struct kfd_process *p;
2793 uint32_t gpuidx;
2794
2795 p = container_of(prange->svms, struct kfd_process, svms);
2796
2797 if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
2798 goto out;
2799
2800 bo_adev = svm_range_get_adev_by_id(prange, best_loc);
2801 if (!bo_adev) {
2802 WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
2803 best_loc = 0;
2804 goto out;
2805 }
2806
2807 if (p->xnack_enabled)
2808 bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
2809 else
2810 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
2811 MAX_GPU_INSTANCE);
2812
2813 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
2814 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
2815 if (!pdd) {
2816 pr_debug("failed to get device by idx 0x%x\n", gpuidx);
2817 continue;
2818 }
2819 adev = (struct amdgpu_device *)pdd->dev->kgd;
2820
2821 if (adev == bo_adev)
2822 continue;
2823
2824 if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
2825 best_loc = 0;
2826 break;
2827 }
2828 }
2829
2830out:
2831 pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n",
2832 p->xnack_enabled, &p->svms, prange->start, prange->last,
2833 best_loc);
2834
2835 return best_loc;
2836}
2837
2838
2839
2840
2841void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
2842 void *owner)
2843{
2844 struct hmm_range *hmm_range;
2845 int r;
2846
2847 if (prange->validated_once)
2848 return;
2849
2850 r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
2851 prange->start << PAGE_SHIFT,
2852 prange->npages, &hmm_range,
2853 false, true, owner);
2854 if (!r) {
2855 amdgpu_hmm_range_get_pages_done(hmm_range);
2856 prange->validated_once = true;
2857 }
2858}
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884static int
2885svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
2886 bool *migrated)
2887{
2888 uint32_t best_loc;
2889 int r = 0;
2890
2891 *migrated = false;
2892 best_loc = svm_range_best_prefetch_location(prange);
2893
2894 if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
2895 best_loc == prange->actual_loc)
2896 return 0;
2897
2898 if (!best_loc) {
2899 r = svm_migrate_vram_to_ram(prange, mm);
2900 *migrated = !r;
2901 return r;
2902 }
2903
2904 r = svm_migrate_to_vram(prange, best_loc, mm);
2905 *migrated = !r;
2906
2907 return r;
2908}
2909
2910int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
2911{
2912 if (!fence)
2913 return -EINVAL;
2914
2915 if (dma_fence_is_signaled(&fence->base))
2916 return 0;
2917
2918 if (fence->svm_bo) {
2919 WRITE_ONCE(fence->svm_bo->evicting, 1);
2920 schedule_work(&fence->svm_bo->eviction_work);
2921 }
2922
2923 return 0;
2924}
2925
2926static void svm_range_evict_svm_bo_worker(struct work_struct *work)
2927{
2928 struct svm_range_bo *svm_bo;
2929 struct kfd_process *p;
2930 struct mm_struct *mm;
2931
2932 svm_bo = container_of(work, struct svm_range_bo, eviction_work);
2933 if (!svm_bo_ref_unless_zero(svm_bo))
2934 return;
2935
2936
2937
2938
2939 p = container_of(svm_bo->svms, struct kfd_process, svms);
2940 mm = p->mm;
2941 if (!mm)
2942 return;
2943
2944 mmap_read_lock(mm);
2945 spin_lock(&svm_bo->list_lock);
2946 while (!list_empty(&svm_bo->range_list)) {
2947 struct svm_range *prange =
2948 list_first_entry(&svm_bo->range_list,
2949 struct svm_range, svm_bo_list);
2950 list_del_init(&prange->svm_bo_list);
2951 spin_unlock(&svm_bo->list_lock);
2952
2953 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
2954 prange->start, prange->last);
2955
2956 mutex_lock(&prange->migrate_mutex);
2957 svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
2958
2959 mutex_lock(&prange->lock);
2960 prange->svm_bo = NULL;
2961 mutex_unlock(&prange->lock);
2962
2963 mutex_unlock(&prange->migrate_mutex);
2964
2965 spin_lock(&svm_bo->list_lock);
2966 }
2967 spin_unlock(&svm_bo->list_lock);
2968 mmap_read_unlock(mm);
2969
2970 dma_fence_signal(&svm_bo->eviction_fence->base);
2971
2972
2973
2974 WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
2975 svm_range_bo_unref(svm_bo);
2976}
2977
2978static int
2979svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
2980 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
2981{
2982 struct amdkfd_process_info *process_info = p->kgd_process_info;
2983 struct mm_struct *mm = current->mm;
2984 struct list_head update_list;
2985 struct list_head insert_list;
2986 struct list_head remove_list;
2987 struct svm_range_list *svms;
2988 struct svm_range *prange;
2989 struct svm_range *next;
2990 int r = 0;
2991
2992 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
2993 p->pasid, &p->svms, start, start + size - 1, size);
2994
2995 r = svm_range_check_attr(p, nattr, attrs);
2996 if (r)
2997 return r;
2998
2999 svms = &p->svms;
3000
3001 mutex_lock(&process_info->lock);
3002
3003 svm_range_list_lock_and_flush_work(svms, mm);
3004
3005 if (!svm_range_is_valid(mm, start, size)) {
3006 pr_debug("invalid range\n");
3007 r = -EFAULT;
3008 mmap_write_unlock(mm);
3009 goto out;
3010 }
3011
3012 mutex_lock(&svms->lock);
3013
3014
3015 r = svm_range_add(p, start, size, nattr, attrs, &update_list,
3016 &insert_list, &remove_list);
3017 if (r) {
3018 mutex_unlock(&svms->lock);
3019 mmap_write_unlock(mm);
3020 goto out;
3021 }
3022
3023 list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
3024 svm_range_add_to_svms(prange);
3025 svm_range_add_notifier_locked(mm, prange);
3026 }
3027 list_for_each_entry(prange, &update_list, update_list) {
3028 svm_range_apply_attrs(p, prange, nattr, attrs);
3029
3030 }
3031 list_for_each_entry_safe(prange, next, &remove_list,
3032 remove_list) {
3033 pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
3034 prange->svms, prange, prange->start,
3035 prange->last);
3036 svm_range_unlink(prange);
3037 svm_range_remove_notifier(prange);
3038 svm_range_free(prange);
3039 }
3040
3041 mmap_write_downgrade(mm);
3042
3043
3044
3045
3046
3047 list_for_each_entry(prange, &update_list, update_list) {
3048 bool migrated;
3049
3050 mutex_lock(&prange->migrate_mutex);
3051
3052 r = svm_range_trigger_migration(mm, prange, &migrated);
3053 if (r)
3054 goto out_unlock_range;
3055
3056 if (migrated && !p->xnack_enabled) {
3057 pr_debug("restore_work will update mappings of GPUs\n");
3058 mutex_unlock(&prange->migrate_mutex);
3059 continue;
3060 }
3061
3062 r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
3063 true, true);
3064 if (r)
3065 pr_debug("failed %d to map svm range\n", r);
3066
3067out_unlock_range:
3068 mutex_unlock(&prange->migrate_mutex);
3069 if (r)
3070 break;
3071 }
3072
3073 svm_range_debug_dump(svms);
3074
3075 mutex_unlock(&svms->lock);
3076 mmap_read_unlock(mm);
3077out:
3078 mutex_unlock(&process_info->lock);
3079
3080 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
3081 &p->svms, start, start + size - 1, r);
3082
3083 return r;
3084}
3085
3086static int
3087svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
3088 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
3089{
3090 DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
3091 DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
3092 bool get_preferred_loc = false;
3093 bool get_prefetch_loc = false;
3094 bool get_granularity = false;
3095 bool get_accessible = false;
3096 bool get_flags = false;
3097 uint64_t last = start + size - 1UL;
3098 struct mm_struct *mm = current->mm;
3099 uint8_t granularity = 0xff;
3100 struct interval_tree_node *node;
3101 struct svm_range_list *svms;
3102 struct svm_range *prange;
3103 uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3104 uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3105 uint32_t flags_and = 0xffffffff;
3106 uint32_t flags_or = 0;
3107 int gpuidx;
3108 uint32_t i;
3109
3110 pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
3111 start + size - 1, nattr);
3112
3113
3114
3115
3116
3117
3118
3119 flush_work(&p->svms.deferred_list_work);
3120
3121 mmap_read_lock(mm);
3122 if (!svm_range_is_valid(mm, start, size)) {
3123 pr_debug("invalid range\n");
3124 mmap_read_unlock(mm);
3125 return -EINVAL;
3126 }
3127 mmap_read_unlock(mm);
3128
3129 for (i = 0; i < nattr; i++) {
3130 switch (attrs[i].type) {
3131 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
3132 get_preferred_loc = true;
3133 break;
3134 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
3135 get_prefetch_loc = true;
3136 break;
3137 case KFD_IOCTL_SVM_ATTR_ACCESS:
3138 get_accessible = true;
3139 break;
3140 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
3141 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
3142 get_flags = true;
3143 break;
3144 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
3145 get_granularity = true;
3146 break;
3147 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
3148 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
3149 fallthrough;
3150 default:
3151 pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
3152 return -EINVAL;
3153 }
3154 }
3155
3156 svms = &p->svms;
3157
3158 mutex_lock(&svms->lock);
3159
3160 node = interval_tree_iter_first(&svms->objects, start, last);
3161 if (!node) {
3162 pr_debug("range attrs not found return default values\n");
3163 svm_range_set_default_attributes(&location, &prefetch_loc,
3164 &granularity, &flags_and);
3165 flags_or = flags_and;
3166 if (p->xnack_enabled)
3167 bitmap_copy(bitmap_access, svms->bitmap_supported,
3168 MAX_GPU_INSTANCE);
3169 else
3170 bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
3171 bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
3172 goto fill_values;
3173 }
3174 bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE);
3175 bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE);
3176
3177 while (node) {
3178 struct interval_tree_node *next;
3179
3180 prange = container_of(node, struct svm_range, it_node);
3181 next = interval_tree_iter_next(node, start, last);
3182
3183 if (get_preferred_loc) {
3184 if (prange->preferred_loc ==
3185 KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
3186 (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
3187 location != prange->preferred_loc)) {
3188 location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3189 get_preferred_loc = false;
3190 } else {
3191 location = prange->preferred_loc;
3192 }
3193 }
3194 if (get_prefetch_loc) {
3195 if (prange->prefetch_loc ==
3196 KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
3197 (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
3198 prefetch_loc != prange->prefetch_loc)) {
3199 prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3200 get_prefetch_loc = false;
3201 } else {
3202 prefetch_loc = prange->prefetch_loc;
3203 }
3204 }
3205 if (get_accessible) {
3206 bitmap_and(bitmap_access, bitmap_access,
3207 prange->bitmap_access, MAX_GPU_INSTANCE);
3208 bitmap_and(bitmap_aip, bitmap_aip,
3209 prange->bitmap_aip, MAX_GPU_INSTANCE);
3210 }
3211 if (get_flags) {
3212 flags_and &= prange->flags;
3213 flags_or |= prange->flags;
3214 }
3215
3216 if (get_granularity && prange->granularity < granularity)
3217 granularity = prange->granularity;
3218
3219 node = next;
3220 }
3221fill_values:
3222 mutex_unlock(&svms->lock);
3223
3224 for (i = 0; i < nattr; i++) {
3225 switch (attrs[i].type) {
3226 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
3227 attrs[i].value = location;
3228 break;
3229 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
3230 attrs[i].value = prefetch_loc;
3231 break;
3232 case KFD_IOCTL_SVM_ATTR_ACCESS:
3233 gpuidx = kfd_process_gpuidx_from_gpuid(p,
3234 attrs[i].value);
3235 if (gpuidx < 0) {
3236 pr_debug("invalid gpuid %x\n", attrs[i].value);
3237 return -EINVAL;
3238 }
3239 if (test_bit(gpuidx, bitmap_access))
3240 attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS;
3241 else if (test_bit(gpuidx, bitmap_aip))
3242 attrs[i].type =
3243 KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE;
3244 else
3245 attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS;
3246 break;
3247 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
3248 attrs[i].value = flags_and;
3249 break;
3250 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
3251 attrs[i].value = ~flags_or;
3252 break;
3253 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
3254 attrs[i].value = (uint32_t)granularity;
3255 break;
3256 }
3257 }
3258
3259 return 0;
3260}
3261
3262int
3263svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
3264 uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
3265{
3266 int r;
3267
3268 start >>= PAGE_SHIFT;
3269 size >>= PAGE_SHIFT;
3270
3271 switch (op) {
3272 case KFD_IOCTL_SVM_OP_SET_ATTR:
3273 r = svm_range_set_attr(p, start, size, nattrs, attrs);
3274 break;
3275 case KFD_IOCTL_SVM_OP_GET_ATTR:
3276 r = svm_range_get_attr(p, start, size, nattrs, attrs);
3277 break;
3278 default:
3279 r = EINVAL;
3280 break;
3281 }
3282
3283 return r;
3284}
3285