1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/types.h>
25#include <linux/sched/task.h>
26#include <linux/dynamic_debug.h>
27#include <drm/ttm/ttm_tt.h>
28#include <drm/drm_exec.h>
29
30#include "amdgpu_sync.h"
31#include "amdgpu_object.h"
32#include "amdgpu_vm.h"
33#include "amdgpu_hmm.h"
34#include "amdgpu.h"
35#include "amdgpu_xgmi.h"
36#include "kfd_priv.h"
37#include "kfd_svm.h"
38#include "kfd_migrate.h"
39#include "kfd_smi_events.h"
40
41#ifdef dev_fmt
42#undef dev_fmt
43#endif
44#define dev_fmt(fmt) "kfd_svm: %s: " fmt, __func__
45
46#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
47
48
49
50
51#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
52#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
53#define dynamic_svm_range_dump(svms) \
54 _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
55#else
56#define dynamic_svm_range_dump(svms) \
57 do { if (0) svm_range_debug_dump(svms); } while (0)
58#endif
59
60
61
62
63
64static uint64_t max_svm_range_pages;
65
66struct criu_svm_metadata {
67 struct list_head list;
68 struct kfd_criu_svm_range_priv_data data;
69};
70
71static void svm_range_evict_svm_bo_worker(struct work_struct *work);
72static bool
73svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
74 const struct mmu_notifier_range *range,
75 unsigned long cur_seq);
76static int
77svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
78 uint64_t *bo_s, uint64_t *bo_l);
79static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
80 .invalidate = svm_range_cpu_invalidate_pagetables,
81};
82
83
84
85
86
87
88
89
90
91
92static void svm_range_unlink(struct svm_range *prange)
93{
94 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
95 prange, prange->start, prange->last);
96
97 if (prange->svm_bo) {
98 spin_lock(&prange->svm_bo->list_lock);
99 list_del(&prange->svm_bo_list);
100 spin_unlock(&prange->svm_bo->list_lock);
101 }
102
103 list_del(&prange->list);
104 if (prange->it_node.start != 0 && prange->it_node.last != 0)
105 interval_tree_remove(&prange->it_node, &prange->svms->objects);
106}
107
108static void
109svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
110{
111 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
112 prange, prange->start, prange->last);
113
114 mmu_interval_notifier_insert_locked(&prange->notifier, mm,
115 prange->start << PAGE_SHIFT,
116 prange->npages << PAGE_SHIFT,
117 &svm_range_mn_ops);
118}
119
120
121
122
123
124
125
126
127
128static void svm_range_add_to_svms(struct svm_range *prange)
129{
130 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
131 prange, prange->start, prange->last);
132
133 list_move_tail(&prange->list, &prange->svms->list);
134 prange->it_node.start = prange->start;
135 prange->it_node.last = prange->last;
136 interval_tree_insert(&prange->it_node, &prange->svms->objects);
137}
138
139static void svm_range_remove_notifier(struct svm_range *prange)
140{
141 pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
142 prange->svms, prange,
143 prange->notifier.interval_tree.start >> PAGE_SHIFT,
144 prange->notifier.interval_tree.last >> PAGE_SHIFT);
145
146 if (prange->notifier.interval_tree.start != 0 &&
147 prange->notifier.interval_tree.last != 0)
148 mmu_interval_notifier_remove(&prange->notifier);
149}
150
151static bool
152svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
153{
154 return dma_addr && !dma_mapping_error(dev, dma_addr) &&
155 !(dma_addr & SVM_RANGE_VRAM_DOMAIN);
156}
157
158static int
159svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
160 unsigned long offset, unsigned long npages,
161 unsigned long *hmm_pfns, uint32_t gpuidx)
162{
163 enum dma_data_direction dir = DMA_BIDIRECTIONAL;
164 dma_addr_t *addr = prange->dma_addr[gpuidx];
165 struct device *dev = adev->dev;
166 struct page *page;
167 int i, r;
168
169 if (!addr) {
170 addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
171 if (!addr)
172 return -ENOMEM;
173 prange->dma_addr[gpuidx] = addr;
174 }
175
176 addr += offset;
177 for (i = 0; i < npages; i++) {
178 if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
179 dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
180
181 page = hmm_pfn_to_page(hmm_pfns[i]);
182 if (is_zone_device_page(page)) {
183 struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
184
185 addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
186 bo_adev->vm_manager.vram_base_offset -
187 bo_adev->kfd.pgmap.range.start;
188 addr[i] |= SVM_RANGE_VRAM_DOMAIN;
189 pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
190 continue;
191 }
192 addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
193 r = dma_mapping_error(dev, addr[i]);
194 if (r) {
195 dev_err(dev, "failed %d dma_map_page\n", r);
196 return r;
197 }
198 pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
199 addr[i] >> PAGE_SHIFT, page_to_pfn(page));
200 }
201
202 return 0;
203}
204
205static int
206svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
207 unsigned long offset, unsigned long npages,
208 unsigned long *hmm_pfns)
209{
210 struct kfd_process *p;
211 uint32_t gpuidx;
212 int r;
213
214 p = container_of(prange->svms, struct kfd_process, svms);
215
216 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
217 struct kfd_process_device *pdd;
218
219 pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
220 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
221 if (!pdd) {
222 pr_debug("failed to find device idx %d\n", gpuidx);
223 return -EINVAL;
224 }
225
226 r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
227 hmm_pfns, gpuidx);
228 if (r)
229 break;
230 }
231
232 return r;
233}
234
235void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
236 unsigned long offset, unsigned long npages)
237{
238 enum dma_data_direction dir = DMA_BIDIRECTIONAL;
239 int i;
240
241 if (!dma_addr)
242 return;
243
244 for (i = offset; i < offset + npages; i++) {
245 if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
246 continue;
247 pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
248 dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
249 dma_addr[i] = 0;
250 }
251}
252
253void svm_range_dma_unmap(struct svm_range *prange)
254{
255 struct kfd_process_device *pdd;
256 dma_addr_t *dma_addr;
257 struct device *dev;
258 struct kfd_process *p;
259 uint32_t gpuidx;
260
261 p = container_of(prange->svms, struct kfd_process, svms);
262
263 for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
264 dma_addr = prange->dma_addr[gpuidx];
265 if (!dma_addr)
266 continue;
267
268 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
269 if (!pdd) {
270 pr_debug("failed to find device idx %d\n", gpuidx);
271 continue;
272 }
273 dev = &pdd->dev->adev->pdev->dev;
274
275 svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
276 }
277}
278
279static void svm_range_free(struct svm_range *prange, bool do_unmap)
280{
281 uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
282 struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
283 uint32_t gpuidx;
284
285 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
286 prange->start, prange->last);
287
288 svm_range_vram_node_free(prange);
289 if (do_unmap)
290 svm_range_dma_unmap(prange);
291
292 if (do_unmap && !p->xnack_enabled) {
293 pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
294 amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
295 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
296 }
297
298
299 for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
300 if (prange->dma_addr[gpuidx]) {
301 kvfree(prange->dma_addr[gpuidx]);
302 prange->dma_addr[gpuidx] = NULL;
303 }
304 }
305
306 mutex_destroy(&prange->lock);
307 mutex_destroy(&prange->migrate_mutex);
308 kfree(prange);
309}
310
311static void
312svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
313 int32_t *prefetch_loc, uint8_t *granularity,
314 uint32_t *flags)
315{
316 *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
317 *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
318 *granularity = svms->default_granularity;
319 *flags =
320 KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
321}
322
323static struct
324svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
325 uint64_t last, bool update_mem_usage)
326{
327 uint64_t size = last - start + 1;
328 struct svm_range *prange;
329 struct kfd_process *p;
330
331 prange = kzalloc(sizeof(*prange), GFP_KERNEL);
332 if (!prange)
333 return NULL;
334
335 p = container_of(svms, struct kfd_process, svms);
336 if (!p->xnack_enabled && update_mem_usage &&
337 amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
338 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
339 pr_info("SVM mapping failed, exceeds resident system memory limit\n");
340 kfree(prange);
341 return NULL;
342 }
343 prange->npages = size;
344 prange->svms = svms;
345 prange->start = start;
346 prange->last = last;
347 INIT_LIST_HEAD(&prange->list);
348 INIT_LIST_HEAD(&prange->update_list);
349 INIT_LIST_HEAD(&prange->svm_bo_list);
350 INIT_LIST_HEAD(&prange->deferred_list);
351 INIT_LIST_HEAD(&prange->child_list);
352 atomic_set(&prange->invalid, 0);
353 prange->validate_timestamp = 0;
354 prange->vram_pages = 0;
355 mutex_init(&prange->migrate_mutex);
356 mutex_init(&prange->lock);
357
358 if (p->xnack_enabled)
359 bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
360 MAX_GPU_INSTANCE);
361
362 svm_range_set_default_attributes(svms, &prange->preferred_loc,
363 &prange->prefetch_loc,
364 &prange->granularity, &prange->flags);
365
366 pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
367
368 return prange;
369}
370
371static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
372{
373 if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref))
374 return false;
375
376 return true;
377}
378
379static void svm_range_bo_release(struct kref *kref)
380{
381 struct svm_range_bo *svm_bo;
382
383 svm_bo = container_of(kref, struct svm_range_bo, kref);
384 pr_debug("svm_bo 0x%p\n", svm_bo);
385
386 spin_lock(&svm_bo->list_lock);
387 while (!list_empty(&svm_bo->range_list)) {
388 struct svm_range *prange =
389 list_first_entry(&svm_bo->range_list,
390 struct svm_range, svm_bo_list);
391
392
393
394 list_del_init(&prange->svm_bo_list);
395 spin_unlock(&svm_bo->list_lock);
396
397 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
398 prange->start, prange->last);
399 mutex_lock(&prange->lock);
400 prange->svm_bo = NULL;
401
402 WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
403 mutex_unlock(&prange->lock);
404
405 spin_lock(&svm_bo->list_lock);
406 }
407 spin_unlock(&svm_bo->list_lock);
408
409 if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
410 struct kfd_process_device *pdd;
411 struct kfd_process *p;
412 struct mm_struct *mm;
413
414 mm = svm_bo->eviction_fence->mm;
415
416
417
418
419 p = kfd_lookup_process_by_mm(mm);
420 if (p) {
421 pdd = kfd_get_process_device_data(svm_bo->node, p);
422 if (pdd)
423 atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
424 kfd_unref_process(p);
425 }
426 mmput(mm);
427 }
428
429 if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
430
431 dma_fence_signal(&svm_bo->eviction_fence->base);
432 dma_fence_put(&svm_bo->eviction_fence->base);
433 amdgpu_bo_unref(&svm_bo->bo);
434 kfree(svm_bo);
435}
436
437static void svm_range_bo_wq_release(struct work_struct *work)
438{
439 struct svm_range_bo *svm_bo;
440
441 svm_bo = container_of(work, struct svm_range_bo, release_work);
442 svm_range_bo_release(&svm_bo->kref);
443}
444
445static void svm_range_bo_release_async(struct kref *kref)
446{
447 struct svm_range_bo *svm_bo;
448
449 svm_bo = container_of(kref, struct svm_range_bo, kref);
450 pr_debug("svm_bo 0x%p\n", svm_bo);
451 INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
452 schedule_work(&svm_bo->release_work);
453}
454
455void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
456{
457 kref_put(&svm_bo->kref, svm_range_bo_release_async);
458}
459
460static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
461{
462 if (svm_bo)
463 kref_put(&svm_bo->kref, svm_range_bo_release);
464}
465
466static bool
467svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
468{
469 mutex_lock(&prange->lock);
470 if (!prange->svm_bo) {
471 mutex_unlock(&prange->lock);
472 return false;
473 }
474 if (prange->ttm_res) {
475
476 mutex_unlock(&prange->lock);
477 return true;
478 }
479 if (svm_bo_ref_unless_zero(prange->svm_bo)) {
480
481
482
483
484
485 if (prange->svm_bo->node != node) {
486 mutex_unlock(&prange->lock);
487
488 spin_lock(&prange->svm_bo->list_lock);
489 list_del_init(&prange->svm_bo_list);
490 spin_unlock(&prange->svm_bo->list_lock);
491
492 svm_range_bo_unref(prange->svm_bo);
493 return false;
494 }
495 if (READ_ONCE(prange->svm_bo->evicting)) {
496 struct dma_fence *f;
497 struct svm_range_bo *svm_bo;
498
499
500
501 mutex_unlock(&prange->lock);
502 svm_bo = prange->svm_bo;
503 f = dma_fence_get(&svm_bo->eviction_fence->base);
504 svm_range_bo_unref(prange->svm_bo);
505
506
507
508 dma_fence_wait(f, false);
509 dma_fence_put(f);
510 } else {
511
512
513
514 mutex_unlock(&prange->lock);
515 pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
516 prange->svms, prange->start, prange->last);
517
518 prange->ttm_res = prange->svm_bo->bo->tbo.resource;
519 return true;
520 }
521
522 } else {
523 mutex_unlock(&prange->lock);
524 }
525
526
527
528
529
530
531 while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo)
532 cond_resched();
533
534 return false;
535}
536
537static struct svm_range_bo *svm_range_bo_new(void)
538{
539 struct svm_range_bo *svm_bo;
540
541 svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
542 if (!svm_bo)
543 return NULL;
544
545 kref_init(&svm_bo->kref);
546 INIT_LIST_HEAD(&svm_bo->range_list);
547 spin_lock_init(&svm_bo->list_lock);
548
549 return svm_bo;
550}
551
552int
553svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
554 bool clear)
555{
556 struct kfd_process_device *pdd;
557 struct amdgpu_bo_param bp;
558 struct svm_range_bo *svm_bo;
559 struct amdgpu_bo_user *ubo;
560 struct amdgpu_bo *bo;
561 struct kfd_process *p;
562 struct mm_struct *mm;
563 int r;
564
565 p = container_of(prange->svms, struct kfd_process, svms);
566 pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n",
567 p->lead_thread->pid, prange->svms,
568 prange->start, prange->last);
569
570 if (svm_range_validate_svm_bo(node, prange))
571 return 0;
572
573 svm_bo = svm_range_bo_new();
574 if (!svm_bo) {
575 pr_debug("failed to alloc svm bo\n");
576 return -ENOMEM;
577 }
578 mm = get_task_mm(p->lead_thread);
579 if (!mm) {
580 pr_debug("failed to get mm\n");
581 kfree(svm_bo);
582 return -ESRCH;
583 }
584 svm_bo->node = node;
585 svm_bo->eviction_fence =
586 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
587 mm,
588 svm_bo);
589 mmput(mm);
590 INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
591 svm_bo->evicting = 0;
592 memset(&bp, 0, sizeof(bp));
593 bp.size = prange->npages * PAGE_SIZE;
594 bp.byte_align = PAGE_SIZE;
595 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
596 bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
597 bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
598 bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
599 bp.type = ttm_bo_type_device;
600 bp.resv = NULL;
601 if (node->xcp)
602 bp.xcp_id_plus1 = node->xcp->id + 1;
603
604 r = amdgpu_bo_create_user(node->adev, &bp, &ubo);
605 if (r) {
606 pr_debug("failed %d to create bo\n", r);
607 goto create_bo_failed;
608 }
609 bo = &ubo->bo;
610
611 pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n",
612 bo->tbo.resource->start << PAGE_SHIFT, bp.size,
613 bp.xcp_id_plus1 - 1);
614
615 r = amdgpu_bo_reserve(bo, true);
616 if (r) {
617 pr_debug("failed %d to reserve bo\n", r);
618 goto reserve_bo_failed;
619 }
620
621 if (clear) {
622 r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
623 if (r) {
624 pr_debug("failed %d to sync bo\n", r);
625 amdgpu_bo_unreserve(bo);
626 goto reserve_bo_failed;
627 }
628 }
629
630 r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
631 if (r) {
632 pr_debug("failed %d to reserve bo\n", r);
633 amdgpu_bo_unreserve(bo);
634 goto reserve_bo_failed;
635 }
636 amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);
637
638 amdgpu_bo_unreserve(bo);
639
640 svm_bo->bo = bo;
641 prange->svm_bo = svm_bo;
642 prange->ttm_res = bo->tbo.resource;
643 prange->offset = 0;
644
645 spin_lock(&svm_bo->list_lock);
646 list_add(&prange->svm_bo_list, &svm_bo->range_list);
647 spin_unlock(&svm_bo->list_lock);
648
649 pdd = svm_range_get_pdd_by_node(prange, node);
650 if (pdd)
651 atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
652
653 return 0;
654
655reserve_bo_failed:
656 amdgpu_bo_unref(&bo);
657create_bo_failed:
658 dma_fence_put(&svm_bo->eviction_fence->base);
659 kfree(svm_bo);
660 prange->ttm_res = NULL;
661
662 return r;
663}
664
665void svm_range_vram_node_free(struct svm_range *prange)
666{
667
668 mutex_lock(&prange->lock);
669
670 if (prange->ttm_res) {
671 prange->ttm_res = NULL;
672 mutex_unlock(&prange->lock);
673 svm_range_bo_unref(prange->svm_bo);
674 } else
675 mutex_unlock(&prange->lock);
676}
677
678struct kfd_node *
679svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id)
680{
681 struct kfd_process *p;
682 struct kfd_process_device *pdd;
683
684 p = container_of(prange->svms, struct kfd_process, svms);
685 pdd = kfd_process_device_data_by_id(p, gpu_id);
686 if (!pdd) {
687 pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id);
688 return NULL;
689 }
690
691 return pdd->dev;
692}
693
694struct kfd_process_device *
695svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node)
696{
697 struct kfd_process *p;
698
699 p = container_of(prange->svms, struct kfd_process, svms);
700
701 return kfd_get_process_device_data(node, p);
702}
703
704static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
705{
706 struct ttm_operation_ctx ctx = { false, false };
707
708 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
709
710 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
711}
712
713static int
714svm_range_check_attr(struct kfd_process *p,
715 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
716{
717 uint32_t i;
718
719 for (i = 0; i < nattr; i++) {
720 uint32_t val = attrs[i].value;
721 int gpuidx = MAX_GPU_INSTANCE;
722
723 switch (attrs[i].type) {
724 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
725 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
726 val != KFD_IOCTL_SVM_LOCATION_UNDEFINED)
727 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
728 break;
729 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
730 if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM)
731 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
732 break;
733 case KFD_IOCTL_SVM_ATTR_ACCESS:
734 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
735 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
736 gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
737 break;
738 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
739 break;
740 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
741 break;
742 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
743 break;
744 default:
745 pr_debug("unknown attr type 0x%x\n", attrs[i].type);
746 return -EINVAL;
747 }
748
749 if (gpuidx < 0) {
750 pr_debug("no GPU 0x%x found\n", val);
751 return -EINVAL;
752 } else if (gpuidx < MAX_GPU_INSTANCE &&
753 !test_bit(gpuidx, p->svms.bitmap_supported)) {
754 pr_debug("GPU 0x%x not supported\n", val);
755 return -EINVAL;
756 }
757 }
758
759 return 0;
760}
761
762static void
763svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
764 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
765 bool *update_mapping)
766{
767 uint32_t i;
768 int gpuidx;
769
770 for (i = 0; i < nattr; i++) {
771 switch (attrs[i].type) {
772 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
773 prange->preferred_loc = attrs[i].value;
774 break;
775 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
776 prange->prefetch_loc = attrs[i].value;
777 break;
778 case KFD_IOCTL_SVM_ATTR_ACCESS:
779 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
780 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
781 if (!p->xnack_enabled)
782 *update_mapping = true;
783
784 gpuidx = kfd_process_gpuidx_from_gpuid(p,
785 attrs[i].value);
786 if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
787 bitmap_clear(prange->bitmap_access, gpuidx, 1);
788 bitmap_clear(prange->bitmap_aip, gpuidx, 1);
789 } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
790 bitmap_set(prange->bitmap_access, gpuidx, 1);
791 bitmap_clear(prange->bitmap_aip, gpuidx, 1);
792 } else {
793 bitmap_clear(prange->bitmap_access, gpuidx, 1);
794 bitmap_set(prange->bitmap_aip, gpuidx, 1);
795 }
796 break;
797 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
798 *update_mapping = true;
799 prange->flags |= attrs[i].value;
800 break;
801 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
802 *update_mapping = true;
803 prange->flags &= ~attrs[i].value;
804 break;
805 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
806 prange->granularity = min_t(uint32_t, attrs[i].value, 0x3F);
807 break;
808 default:
809 WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
810 }
811 }
812}
813
814static bool
815svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
816 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
817{
818 uint32_t i;
819 int gpuidx;
820
821 for (i = 0; i < nattr; i++) {
822 switch (attrs[i].type) {
823 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
824 if (prange->preferred_loc != attrs[i].value)
825 return false;
826 break;
827 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
828
829
830
831 return false;
832 case KFD_IOCTL_SVM_ATTR_ACCESS:
833 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
834 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
835 gpuidx = kfd_process_gpuidx_from_gpuid(p,
836 attrs[i].value);
837 if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
838 if (test_bit(gpuidx, prange->bitmap_access) ||
839 test_bit(gpuidx, prange->bitmap_aip))
840 return false;
841 } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
842 if (!test_bit(gpuidx, prange->bitmap_access))
843 return false;
844 } else {
845 if (!test_bit(gpuidx, prange->bitmap_aip))
846 return false;
847 }
848 break;
849 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
850 if ((prange->flags & attrs[i].value) != attrs[i].value)
851 return false;
852 break;
853 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
854 if ((prange->flags & attrs[i].value) != 0)
855 return false;
856 break;
857 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
858 if (prange->granularity != attrs[i].value)
859 return false;
860 break;
861 default:
862 WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
863 }
864 }
865
866 return true;
867}
868
869
870
871
872
873
874
875
876
877
878static void svm_range_debug_dump(struct svm_range_list *svms)
879{
880 struct interval_tree_node *node;
881 struct svm_range *prange;
882
883 pr_debug("dump svms 0x%p list\n", svms);
884 pr_debug("range\tstart\tpage\tend\t\tlocation\n");
885
886 list_for_each_entry(prange, &svms->list, list) {
887 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
888 prange, prange->start, prange->npages,
889 prange->start + prange->npages - 1,
890 prange->actual_loc);
891 }
892
893 pr_debug("dump svms 0x%p interval tree\n", svms);
894 pr_debug("range\tstart\tpage\tend\t\tlocation\n");
895 node = interval_tree_iter_first(&svms->objects, 0, ~0ULL);
896 while (node) {
897 prange = container_of(node, struct svm_range, it_node);
898 pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
899 prange, prange->start, prange->npages,
900 prange->start + prange->npages - 1,
901 prange->actual_loc);
902 node = interval_tree_iter_next(node, 0, ~0ULL);
903 }
904}
905
906static void *
907svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
908 uint64_t offset, uint64_t *vram_pages)
909{
910 unsigned char *src = (unsigned char *)psrc + offset;
911 unsigned char *dst;
912 uint64_t i;
913
914 dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
915 if (!dst)
916 return NULL;
917
918 if (!vram_pages) {
919 memcpy(dst, src, num_elements * size);
920 return (void *)dst;
921 }
922
923 *vram_pages = 0;
924 for (i = 0; i < num_elements; i++) {
925 dma_addr_t *temp;
926 temp = (dma_addr_t *)dst + i;
927 *temp = *((dma_addr_t *)src + i);
928 if (*temp&SVM_RANGE_VRAM_DOMAIN)
929 (*vram_pages)++;
930 }
931
932 return (void *)dst;
933}
934
935static int
936svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
937{
938 int i;
939
940 for (i = 0; i < MAX_GPU_INSTANCE; i++) {
941 if (!src->dma_addr[i])
942 continue;
943 dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
944 sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
945 if (!dst->dma_addr[i])
946 return -ENOMEM;
947 }
948
949 return 0;
950}
951
952static int
953svm_range_split_array(void *ppnew, void *ppold, size_t size,
954 uint64_t old_start, uint64_t old_n,
955 uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
956{
957 unsigned char *new, *old, *pold;
958 uint64_t d;
959
960 if (!ppold)
961 return 0;
962 pold = *(unsigned char **)ppold;
963 if (!pold)
964 return 0;
965
966 d = (new_start - old_start) * size;
967
968 new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
969 if (!new)
970 return -ENOMEM;
971 d = (new_start == old_start) ? new_n * size : 0;
972 old = svm_range_copy_array(pold, size, old_n, d, NULL);
973 if (!old) {
974 kvfree(new);
975 return -ENOMEM;
976 }
977 kvfree(pold);
978 *(void **)ppold = old;
979 *(void **)ppnew = new;
980
981 return 0;
982}
983
984static int
985svm_range_split_pages(struct svm_range *new, struct svm_range *old,
986 uint64_t start, uint64_t last)
987{
988 uint64_t npages = last - start + 1;
989 int i, r;
990
991 for (i = 0; i < MAX_GPU_INSTANCE; i++) {
992 r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
993 sizeof(*old->dma_addr[i]), old->start,
994 npages, new->start, new->npages,
995 old->actual_loc ? &new->vram_pages : NULL);
996 if (r)
997 return r;
998 }
999 if (old->actual_loc)
1000 old->vram_pages -= new->vram_pages;
1001
1002 return 0;
1003}
1004
1005static int
1006svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
1007 uint64_t start, uint64_t last)
1008{
1009 uint64_t npages = last - start + 1;
1010
1011 pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n",
1012 new->svms, new, new->start, start, last);
1013
1014 if (new->start == old->start) {
1015 new->offset = old->offset;
1016 old->offset += new->npages;
1017 } else {
1018 new->offset = old->offset + npages;
1019 }
1020
1021 new->svm_bo = svm_range_bo_ref(old->svm_bo);
1022 new->ttm_res = old->ttm_res;
1023
1024 spin_lock(&new->svm_bo->list_lock);
1025 list_add(&new->svm_bo_list, &new->svm_bo->range_list);
1026 spin_unlock(&new->svm_bo->list_lock);
1027
1028 return 0;
1029}
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046static int
1047svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
1048 uint64_t start, uint64_t last)
1049{
1050 int r;
1051
1052 pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n",
1053 new->svms, new->start, old->start, old->last, start, last);
1054
1055 if (new->start < old->start ||
1056 new->last > old->last) {
1057 WARN_ONCE(1, "invalid new range start or last\n");
1058 return -EINVAL;
1059 }
1060
1061 r = svm_range_split_pages(new, old, start, last);
1062 if (r)
1063 return r;
1064
1065 if (old->actual_loc && old->ttm_res) {
1066 r = svm_range_split_nodes(new, old, start, last);
1067 if (r)
1068 return r;
1069 }
1070
1071 old->npages = last - start + 1;
1072 old->start = start;
1073 old->last = last;
1074 new->flags = old->flags;
1075 new->preferred_loc = old->preferred_loc;
1076 new->prefetch_loc = old->prefetch_loc;
1077 new->actual_loc = old->actual_loc;
1078 new->granularity = old->granularity;
1079 new->mapped_to_gpu = old->mapped_to_gpu;
1080 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
1081 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
1082 atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
1083
1084 return 0;
1085}
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107static int
1108svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
1109 struct svm_range **new)
1110{
1111 uint64_t old_start = prange->start;
1112 uint64_t old_last = prange->last;
1113 struct svm_range_list *svms;
1114 int r = 0;
1115
1116 pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms,
1117 old_start, old_last, start, last);
1118
1119 if (old_start != start && old_last != last)
1120 return -EINVAL;
1121 if (start < old_start || last > old_last)
1122 return -EINVAL;
1123
1124 svms = prange->svms;
1125 if (old_start == start)
1126 *new = svm_range_new(svms, last + 1, old_last, false);
1127 else
1128 *new = svm_range_new(svms, old_start, start - 1, false);
1129 if (!*new)
1130 return -ENOMEM;
1131
1132 r = svm_range_split_adjust(*new, prange, start, last);
1133 if (r) {
1134 pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
1135 r, old_start, old_last, start, last);
1136 svm_range_free(*new, false);
1137 *new = NULL;
1138 }
1139
1140 return r;
1141}
1142
1143static int
1144svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
1145 struct list_head *insert_list, struct list_head *remap_list)
1146{
1147 struct svm_range *tail = NULL;
1148 int r = svm_range_split(prange, prange->start, new_last, &tail);
1149
1150 if (!r) {
1151 list_add(&tail->list, insert_list);
1152 if (!IS_ALIGNED(new_last + 1, 1UL << prange->granularity))
1153 list_add(&tail->update_list, remap_list);
1154 }
1155 return r;
1156}
1157
1158static int
1159svm_range_split_head(struct svm_range *prange, uint64_t new_start,
1160 struct list_head *insert_list, struct list_head *remap_list)
1161{
1162 struct svm_range *head = NULL;
1163 int r = svm_range_split(prange, new_start, prange->last, &head);
1164
1165 if (!r) {
1166 list_add(&head->list, insert_list);
1167 if (!IS_ALIGNED(new_start, 1UL << prange->granularity))
1168 list_add(&head->update_list, remap_list);
1169 }
1170 return r;
1171}
1172
1173static void
1174svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
1175{
1176 pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
1177 pchild, pchild->start, pchild->last, prange, op);
1178
1179 pchild->work_item.mm = NULL;
1180 pchild->work_item.op = op;
1181 list_add_tail(&pchild->child_list, &prange->child_list);
1182}
1183
1184static bool
1185svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
1186{
1187 return (node_a->adev == node_b->adev ||
1188 amdgpu_xgmi_same_hive(node_a->adev, node_b->adev));
1189}
1190
1191static uint64_t
1192svm_range_get_pte_flags(struct kfd_node *node,
1193 struct svm_range *prange, int domain)
1194{
1195 struct kfd_node *bo_node;
1196 uint32_t flags = prange->flags;
1197 uint32_t mapping_flags = 0;
1198 uint32_t gc_ip_version = KFD_GC_VERSION(node);
1199 uint64_t pte_flags;
1200 bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
1201 bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
1202 bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
1203 unsigned int mtype_local;
1204
1205 if (domain == SVM_RANGE_VRAM_DOMAIN)
1206 bo_node = prange->svm_bo->node;
1207
1208 switch (gc_ip_version) {
1209 case IP_VERSION(9, 4, 1):
1210 if (domain == SVM_RANGE_VRAM_DOMAIN) {
1211 if (bo_node == node) {
1212 mapping_flags |= coherent ?
1213 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
1214 } else {
1215 mapping_flags |= coherent ?
1216 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1217 if (svm_nodes_in_same_hive(node, bo_node))
1218 snoop = true;
1219 }
1220 } else {
1221 mapping_flags |= coherent ?
1222 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1223 }
1224 break;
1225 case IP_VERSION(9, 4, 2):
1226 if (domain == SVM_RANGE_VRAM_DOMAIN) {
1227 if (bo_node == node) {
1228 mapping_flags |= coherent ?
1229 AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
1230 if (node->adev->gmc.xgmi.connected_to_cpu)
1231 snoop = true;
1232 } else {
1233 mapping_flags |= coherent ?
1234 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1235 if (svm_nodes_in_same_hive(node, bo_node))
1236 snoop = true;
1237 }
1238 } else {
1239 mapping_flags |= coherent ?
1240 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1241 }
1242 break;
1243 case IP_VERSION(9, 4, 3):
1244 case IP_VERSION(9, 4, 4):
1245 case IP_VERSION(9, 5, 0):
1246 if (ext_coherent)
1247 mtype_local = AMDGPU_VM_MTYPE_CC;
1248 else
1249 mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
1250 amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
1251 snoop = true;
1252 if (domain == SVM_RANGE_VRAM_DOMAIN) {
1253
1254 if (bo_node->adev == node->adev &&
1255 (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
1256 mapping_flags |= mtype_local;
1257
1258
1259
1260 else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
1261 mapping_flags |= AMDGPU_VM_MTYPE_NC;
1262
1263 else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
1264 !svm_nodes_in_same_hive(bo_node, node))
1265 mapping_flags |= AMDGPU_VM_MTYPE_UC;
1266
1267 else
1268 mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1269
1270 } else if (node->adev->flags & AMD_IS_APU) {
1271
1272
1273
1274 if (num_possible_nodes() <= 1)
1275 mapping_flags |= mtype_local;
1276 else
1277 mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1278
1279 } else {
1280 if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
1281 mapping_flags |= AMDGPU_VM_MTYPE_UC;
1282 else
1283 mapping_flags |= AMDGPU_VM_MTYPE_NC;
1284 }
1285 break;
1286 case IP_VERSION(12, 0, 0):
1287 case IP_VERSION(12, 0, 1):
1288 mapping_flags |= AMDGPU_VM_MTYPE_NC;
1289 break;
1290 default:
1291 mapping_flags |= coherent ?
1292 AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
1293 }
1294
1295 mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
1296
1297 if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
1298 mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
1299 if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
1300 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
1301
1302 pte_flags = AMDGPU_PTE_VALID;
1303 pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
1304 pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
1305 if (gc_ip_version >= IP_VERSION(12, 0, 0))
1306 pte_flags |= AMDGPU_PTE_IS_PTE;
1307
1308 pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
1309 return pte_flags;
1310}
1311
1312static int
1313svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1314 uint64_t start, uint64_t last,
1315 struct dma_fence **fence)
1316{
1317 uint64_t init_pte_value = 0;
1318
1319 pr_debug("[0x%llx 0x%llx]\n", start, last);
1320
1321 return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, start,
1322 last, init_pte_value, 0, 0, NULL, NULL,
1323 fence);
1324}
1325
1326static int
1327svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
1328 unsigned long last, uint32_t trigger)
1329{
1330 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
1331 struct kfd_process_device *pdd;
1332 struct dma_fence *fence = NULL;
1333 struct kfd_process *p;
1334 uint32_t gpuidx;
1335 int r = 0;
1336
1337 if (!prange->mapped_to_gpu) {
1338 pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
1339 prange, prange->start, prange->last);
1340 return 0;
1341 }
1342
1343 if (prange->start == start && prange->last == last) {
1344 pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
1345 prange->mapped_to_gpu = false;
1346 }
1347
1348 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
1349 MAX_GPU_INSTANCE);
1350 p = container_of(prange->svms, struct kfd_process, svms);
1351
1352 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
1353 pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
1354 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1355 if (!pdd) {
1356 pr_debug("failed to find device idx %d\n", gpuidx);
1357 return -EINVAL;
1358 }
1359
1360 kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
1361 start, last, trigger);
1362
1363 r = svm_range_unmap_from_gpu(pdd->dev->adev,
1364 drm_priv_to_vm(pdd->drm_priv),
1365 start, last, &fence);
1366 if (r)
1367 break;
1368
1369 if (fence) {
1370 r = dma_fence_wait(fence, false);
1371 dma_fence_put(fence);
1372 fence = NULL;
1373 if (r)
1374 break;
1375 }
1376 kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
1377 }
1378
1379 return r;
1380}
1381
1382static int
1383svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
1384 unsigned long offset, unsigned long npages, bool readonly,
1385 dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
1386 struct dma_fence **fence, bool flush_tlb)
1387{
1388 struct amdgpu_device *adev = pdd->dev->adev;
1389 struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
1390 uint64_t pte_flags;
1391 unsigned long last_start;
1392 int last_domain;
1393 int r = 0;
1394 int64_t i, j;
1395
1396 last_start = prange->start + offset;
1397
1398 pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
1399 last_start, last_start + npages - 1, readonly);
1400
1401 for (i = offset; i < offset + npages; i++) {
1402 last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
1403 dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
1404
1405
1406
1407
1408 if (i < offset + npages - 1 &&
1409 last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
1410 continue;
1411
1412 pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
1413 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
1414
1415 pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
1416 if (readonly)
1417 pte_flags &= ~AMDGPU_PTE_WRITEABLE;
1418
1419 pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
1420 prange->svms, last_start, prange->start + i,
1421 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
1422 pte_flags);
1423
1424
1425
1426
1427
1428 r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, true,
1429 NULL, last_start, prange->start + i,
1430 pte_flags,
1431 (last_start - prange->start) << PAGE_SHIFT,
1432 bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
1433 NULL, dma_addr, &vm->last_update);
1434
1435 for (j = last_start - prange->start; j <= i; j++)
1436 dma_addr[j] |= last_domain;
1437
1438 if (r) {
1439 pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
1440 goto out;
1441 }
1442 last_start = prange->start + i + 1;
1443 }
1444
1445 r = amdgpu_vm_update_pdes(adev, vm, false);
1446 if (r) {
1447 pr_debug("failed %d to update directories 0x%lx\n", r,
1448 prange->start);
1449 goto out;
1450 }
1451
1452 if (fence)
1453 *fence = dma_fence_get(vm->last_update);
1454
1455out:
1456 return r;
1457}
1458
1459static int
1460svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
1461 unsigned long npages, bool readonly,
1462 unsigned long *bitmap, bool wait, bool flush_tlb)
1463{
1464 struct kfd_process_device *pdd;
1465 struct amdgpu_device *bo_adev = NULL;
1466 struct kfd_process *p;
1467 struct dma_fence *fence = NULL;
1468 uint32_t gpuidx;
1469 int r = 0;
1470
1471 if (prange->svm_bo && prange->ttm_res)
1472 bo_adev = prange->svm_bo->node->adev;
1473
1474 p = container_of(prange->svms, struct kfd_process, svms);
1475 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
1476 pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
1477 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1478 if (!pdd) {
1479 pr_debug("failed to find device idx %d\n", gpuidx);
1480 return -EINVAL;
1481 }
1482
1483 pdd = kfd_bind_process_to_device(pdd->dev, p);
1484 if (IS_ERR(pdd))
1485 return -EINVAL;
1486
1487 if (bo_adev && pdd->dev->adev != bo_adev &&
1488 !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
1489 pr_debug("cannot map to device idx %d\n", gpuidx);
1490 continue;
1491 }
1492
1493 r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
1494 prange->dma_addr[gpuidx],
1495 bo_adev, wait ? &fence : NULL,
1496 flush_tlb);
1497 if (r)
1498 break;
1499
1500 if (fence) {
1501 r = dma_fence_wait(fence, false);
1502 dma_fence_put(fence);
1503 fence = NULL;
1504 if (r) {
1505 pr_debug("failed %d to dma fence wait\n", r);
1506 break;
1507 }
1508 }
1509
1510 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1511 }
1512
1513 return r;
1514}
1515
1516struct svm_validate_context {
1517 struct kfd_process *process;
1518 struct svm_range *prange;
1519 bool intr;
1520 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
1521 struct drm_exec exec;
1522};
1523
1524static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
1525{
1526 struct kfd_process_device *pdd;
1527 struct amdgpu_vm *vm;
1528 uint32_t gpuidx;
1529 int r;
1530
1531 drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
1532 drm_exec_until_all_locked(&ctx->exec) {
1533 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
1534 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
1535 if (!pdd) {
1536 pr_debug("failed to find device idx %d\n", gpuidx);
1537 r = -EINVAL;
1538 goto unreserve_out;
1539 }
1540 vm = drm_priv_to_vm(pdd->drm_priv);
1541
1542 r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
1543 drm_exec_retry_on_contention(&ctx->exec);
1544 if (unlikely(r)) {
1545 pr_debug("failed %d to reserve bo\n", r);
1546 goto unreserve_out;
1547 }
1548 }
1549 }
1550
1551 for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
1552 pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
1553 if (!pdd) {
1554 pr_debug("failed to find device idx %d\n", gpuidx);
1555 r = -EINVAL;
1556 goto unreserve_out;
1557 }
1558
1559 r = amdgpu_vm_validate(pdd->dev->adev,
1560 drm_priv_to_vm(pdd->drm_priv), NULL,
1561 svm_range_bo_validate, NULL);
1562 if (r) {
1563 pr_debug("failed %d validate pt bos\n", r);
1564 goto unreserve_out;
1565 }
1566 }
1567
1568 return 0;
1569
1570unreserve_out:
1571 drm_exec_fini(&ctx->exec);
1572 return r;
1573}
1574
1575static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
1576{
1577 drm_exec_fini(&ctx->exec);
1578}
1579
1580static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
1581{
1582 struct kfd_process_device *pdd;
1583
1584 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
1585 if (!pdd)
1586 return NULL;
1587
1588 return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
1589}
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615static int svm_range_validate_and_map(struct mm_struct *mm,
1616 unsigned long map_start, unsigned long map_last,
1617 struct svm_range *prange, int32_t gpuidx,
1618 bool intr, bool wait, bool flush_tlb)
1619{
1620 struct svm_validate_context *ctx;
1621 unsigned long start, end, addr;
1622 struct kfd_process *p;
1623 void *owner;
1624 int32_t idx;
1625 int r = 0;
1626
1627 ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
1628 if (!ctx)
1629 return -ENOMEM;
1630 ctx->process = container_of(prange->svms, struct kfd_process, svms);
1631 ctx->prange = prange;
1632 ctx->intr = intr;
1633
1634 if (gpuidx < MAX_GPU_INSTANCE) {
1635 bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE);
1636 bitmap_set(ctx->bitmap, gpuidx, 1);
1637 } else if (ctx->process->xnack_enabled) {
1638 bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
1639
1640
1641
1642
1643
1644 if (prange->actual_loc) {
1645 gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process,
1646 prange->actual_loc);
1647 if (gpuidx < 0) {
1648 WARN_ONCE(1, "failed get device by id 0x%x\n",
1649 prange->actual_loc);
1650 r = -EINVAL;
1651 goto free_ctx;
1652 }
1653 if (test_bit(gpuidx, prange->bitmap_access))
1654 bitmap_set(ctx->bitmap, gpuidx, 1);
1655 }
1656
1657
1658
1659
1660
1661 if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
1662 if (prange->mapped_to_gpu ||
1663 prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
1664 bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
1665 }
1666 } else {
1667 bitmap_or(ctx->bitmap, prange->bitmap_access,
1668 prange->bitmap_aip, MAX_GPU_INSTANCE);
1669 }
1670
1671 if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
1672 r = 0;
1673 goto free_ctx;
1674 }
1675
1676 if (prange->actual_loc && !prange->ttm_res) {
1677
1678
1679
1680 WARN_ONCE(1, "VRAM BO missing during validation\n");
1681 r = -EINVAL;
1682 goto free_ctx;
1683 }
1684
1685 r = svm_range_reserve_bos(ctx, intr);
1686 if (r)
1687 goto free_ctx;
1688
1689 p = container_of(prange->svms, struct kfd_process, svms);
1690 owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
1691 MAX_GPU_INSTANCE));
1692 for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) {
1693 if (kfd_svm_page_owner(p, idx) != owner) {
1694 owner = NULL;
1695 break;
1696 }
1697 }
1698
1699 start = map_start << PAGE_SHIFT;
1700 end = (map_last + 1) << PAGE_SHIFT;
1701 for (addr = start; !r && addr < end; ) {
1702 struct hmm_range *hmm_range = NULL;
1703 unsigned long map_start_vma;
1704 unsigned long map_last_vma;
1705 struct vm_area_struct *vma;
1706 unsigned long next = 0;
1707 unsigned long offset;
1708 unsigned long npages;
1709 bool readonly;
1710
1711 vma = vma_lookup(mm, addr);
1712 if (vma) {
1713 readonly = !(vma->vm_flags & VM_WRITE);
1714
1715 next = min(vma->vm_end, end);
1716 npages = (next - addr) >> PAGE_SHIFT;
1717 WRITE_ONCE(p->svms.faulting_task, current);
1718 r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
1719 readonly, owner, NULL,
1720 &hmm_range);
1721 WRITE_ONCE(p->svms.faulting_task, NULL);
1722 if (r)
1723 pr_debug("failed %d to get svm range pages\n", r);
1724 } else {
1725 r = -EFAULT;
1726 }
1727
1728 if (!r) {
1729 offset = (addr >> PAGE_SHIFT) - prange->start;
1730 r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
1731 hmm_range->hmm_pfns);
1732 if (r)
1733 pr_debug("failed %d to dma map range\n", r);
1734 }
1735
1736 svm_range_lock(prange);
1737
1738
1739
1740
1741
1742 if (hmm_range && amdgpu_hmm_range_get_pages_done(hmm_range) && !r) {
1743 pr_debug("hmm update the range, need validate again\n");
1744 r = -EAGAIN;
1745 }
1746
1747 if (!r && !list_empty(&prange->child_list)) {
1748 pr_debug("range split by unmap in parallel, validate again\n");
1749 r = -EAGAIN;
1750 }
1751
1752 if (!r) {
1753 map_start_vma = max(map_start, prange->start + offset);
1754 map_last_vma = min(map_last, prange->start + offset + npages - 1);
1755 if (map_start_vma <= map_last_vma) {
1756 offset = map_start_vma - prange->start;
1757 npages = map_last_vma - map_start_vma + 1;
1758 r = svm_range_map_to_gpus(prange, offset, npages, readonly,
1759 ctx->bitmap, wait, flush_tlb);
1760 }
1761 }
1762
1763 if (!r && next == end)
1764 prange->mapped_to_gpu = true;
1765
1766 svm_range_unlock(prange);
1767
1768 addr = next;
1769 }
1770
1771 svm_range_unreserve_bos(ctx);
1772 if (!r)
1773 prange->validate_timestamp = ktime_get_boottime();
1774
1775free_ctx:
1776 kfree(ctx);
1777
1778 return r;
1779}
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790void
1791svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
1792 struct mm_struct *mm)
1793{
1794retry_flush_work:
1795 flush_work(&svms->deferred_list_work);
1796 mmap_write_lock(mm);
1797
1798 if (list_empty(&svms->deferred_range_list))
1799 return;
1800 mmap_write_unlock(mm);
1801 pr_debug("retry flush\n");
1802 goto retry_flush_work;
1803}
1804
1805static void svm_range_restore_work(struct work_struct *work)
1806{
1807 struct delayed_work *dwork = to_delayed_work(work);
1808 struct amdkfd_process_info *process_info;
1809 struct svm_range_list *svms;
1810 struct svm_range *prange;
1811 struct kfd_process *p;
1812 struct mm_struct *mm;
1813 int evicted_ranges;
1814 int invalid;
1815 int r;
1816
1817 svms = container_of(dwork, struct svm_range_list, restore_work);
1818 evicted_ranges = atomic_read(&svms->evicted_ranges);
1819 if (!evicted_ranges)
1820 return;
1821
1822 pr_debug("restore svm ranges\n");
1823
1824 p = container_of(svms, struct kfd_process, svms);
1825 process_info = p->kgd_process_info;
1826
1827
1828 mm = get_task_mm(p->lead_thread);
1829 if (!mm) {
1830 pr_debug("svms 0x%p process mm gone\n", svms);
1831 return;
1832 }
1833
1834 mutex_lock(&process_info->lock);
1835 svm_range_list_lock_and_flush_work(svms, mm);
1836 mutex_lock(&svms->lock);
1837
1838 evicted_ranges = atomic_read(&svms->evicted_ranges);
1839
1840 list_for_each_entry(prange, &svms->list, list) {
1841 invalid = atomic_read(&prange->invalid);
1842 if (!invalid)
1843 continue;
1844
1845 pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
1846 prange->svms, prange, prange->start, prange->last,
1847 invalid);
1848
1849
1850
1851
1852 mutex_lock(&prange->migrate_mutex);
1853
1854 r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
1855 MAX_GPU_INSTANCE, false, true, false);
1856 if (r)
1857 pr_debug("failed %d to map 0x%lx to gpus\n", r,
1858 prange->start);
1859
1860 mutex_unlock(&prange->migrate_mutex);
1861 if (r)
1862 goto out_reschedule;
1863
1864 if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
1865 goto out_reschedule;
1866 }
1867
1868 if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
1869 evicted_ranges)
1870 goto out_reschedule;
1871
1872 evicted_ranges = 0;
1873
1874 r = kgd2kfd_resume_mm(mm);
1875 if (r) {
1876
1877
1878
1879 pr_debug("failed %d to resume KFD\n", r);
1880 }
1881
1882 pr_debug("restore svm ranges successfully\n");
1883
1884out_reschedule:
1885 mutex_unlock(&svms->lock);
1886 mmap_write_unlock(mm);
1887 mutex_unlock(&process_info->lock);
1888
1889
1890 if (evicted_ranges) {
1891 pr_debug("reschedule to restore svm range\n");
1892 queue_delayed_work(system_freezable_wq, &svms->restore_work,
1893 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
1894
1895 kfd_smi_event_queue_restore_rescheduled(mm);
1896 }
1897 mmput(mm);
1898}
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916static int
1917svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
1918 unsigned long start, unsigned long last,
1919 enum mmu_notifier_event event)
1920{
1921 struct svm_range_list *svms = prange->svms;
1922 struct svm_range *pchild;
1923 struct kfd_process *p;
1924 int r = 0;
1925
1926 p = container_of(svms, struct kfd_process, svms);
1927
1928 pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
1929 svms, prange->start, prange->last, start, last);
1930
1931 if (!p->xnack_enabled ||
1932 (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
1933 int evicted_ranges;
1934 bool mapped = prange->mapped_to_gpu;
1935
1936 list_for_each_entry(pchild, &prange->child_list, child_list) {
1937 if (!pchild->mapped_to_gpu)
1938 continue;
1939 mapped = true;
1940 mutex_lock_nested(&pchild->lock, 1);
1941 if (pchild->start <= last && pchild->last >= start) {
1942 pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
1943 pchild->start, pchild->last);
1944 atomic_inc(&pchild->invalid);
1945 }
1946 mutex_unlock(&pchild->lock);
1947 }
1948
1949 if (!mapped)
1950 return r;
1951
1952 if (prange->start <= last && prange->last >= start)
1953 atomic_inc(&prange->invalid);
1954
1955 evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
1956 if (evicted_ranges != 1)
1957 return r;
1958
1959 pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
1960 prange->svms, prange->start, prange->last);
1961
1962
1963 r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
1964 if (r)
1965 pr_debug("failed to quiesce KFD\n");
1966
1967 pr_debug("schedule to restore svm %p ranges\n", svms);
1968 queue_delayed_work(system_freezable_wq, &svms->restore_work,
1969 msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
1970 } else {
1971 unsigned long s, l;
1972 uint32_t trigger;
1973
1974 if (event == MMU_NOTIFY_MIGRATE)
1975 trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
1976 else
1977 trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
1978
1979 pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
1980 prange->svms, start, last);
1981 list_for_each_entry(pchild, &prange->child_list, child_list) {
1982 mutex_lock_nested(&pchild->lock, 1);
1983 s = max(start, pchild->start);
1984 l = min(last, pchild->last);
1985 if (l >= s)
1986 svm_range_unmap_from_gpus(pchild, s, l, trigger);
1987 mutex_unlock(&pchild->lock);
1988 }
1989 s = max(start, prange->start);
1990 l = min(last, prange->last);
1991 if (l >= s)
1992 svm_range_unmap_from_gpus(prange, s, l, trigger);
1993 }
1994
1995 return r;
1996}
1997
1998static struct svm_range *svm_range_clone(struct svm_range *old)
1999{
2000 struct svm_range *new;
2001
2002 new = svm_range_new(old->svms, old->start, old->last, false);
2003 if (!new)
2004 return NULL;
2005 if (svm_range_copy_dma_addrs(new, old)) {
2006 svm_range_free(new, false);
2007 return NULL;
2008 }
2009 if (old->svm_bo) {
2010 new->ttm_res = old->ttm_res;
2011 new->offset = old->offset;
2012 new->svm_bo = svm_range_bo_ref(old->svm_bo);
2013 spin_lock(&new->svm_bo->list_lock);
2014 list_add(&new->svm_bo_list, &new->svm_bo->range_list);
2015 spin_unlock(&new->svm_bo->list_lock);
2016 }
2017 new->flags = old->flags;
2018 new->preferred_loc = old->preferred_loc;
2019 new->prefetch_loc = old->prefetch_loc;
2020 new->actual_loc = old->actual_loc;
2021 new->granularity = old->granularity;
2022 new->mapped_to_gpu = old->mapped_to_gpu;
2023 new->vram_pages = old->vram_pages;
2024 bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
2025 bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
2026 atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
2027
2028 return new;
2029}
2030
2031void svm_range_set_max_pages(struct amdgpu_device *adev)
2032{
2033 uint64_t max_pages;
2034 uint64_t pages, _pages;
2035 uint64_t min_pages = 0;
2036 int i, id;
2037
2038 for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
2039 if (adev->kfd.dev->nodes[i]->xcp)
2040 id = adev->kfd.dev->nodes[i]->xcp->id;
2041 else
2042 id = -1;
2043 pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
2044 pages = clamp(pages, 1ULL << 9, 1ULL << 18);
2045 pages = rounddown_pow_of_two(pages);
2046 min_pages = min_not_zero(min_pages, pages);
2047 }
2048
2049 do {
2050 max_pages = READ_ONCE(max_svm_range_pages);
2051 _pages = min_not_zero(max_pages, min_pages);
2052 } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
2053}
2054
2055static int
2056svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
2057 uint64_t max_pages, struct list_head *insert_list,
2058 struct list_head *update_list)
2059{
2060 struct svm_range *prange;
2061 uint64_t l;
2062
2063 pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n",
2064 max_pages, start, last);
2065
2066 while (last >= start) {
2067 l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1);
2068
2069 prange = svm_range_new(svms, start, l, true);
2070 if (!prange)
2071 return -ENOMEM;
2072 list_add(&prange->list, insert_list);
2073 list_add(&prange->update_list, update_list);
2074
2075 start = l + 1;
2076 }
2077 return 0;
2078}
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110static int
2111svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
2112 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
2113 struct list_head *update_list, struct list_head *insert_list,
2114 struct list_head *remove_list, struct list_head *remap_list)
2115{
2116 unsigned long last = start + size - 1UL;
2117 struct svm_range_list *svms = &p->svms;
2118 struct interval_tree_node *node;
2119 struct svm_range *prange;
2120 struct svm_range *tmp;
2121 struct list_head new_list;
2122 int r = 0;
2123
2124 pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
2125
2126 INIT_LIST_HEAD(update_list);
2127 INIT_LIST_HEAD(insert_list);
2128 INIT_LIST_HEAD(remove_list);
2129 INIT_LIST_HEAD(&new_list);
2130 INIT_LIST_HEAD(remap_list);
2131
2132 node = interval_tree_iter_first(&svms->objects, start, last);
2133 while (node) {
2134 struct interval_tree_node *next;
2135 unsigned long next_start;
2136
2137 pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
2138 node->last);
2139
2140 prange = container_of(node, struct svm_range, it_node);
2141 next = interval_tree_iter_next(node, start, last);
2142 next_start = min(node->last, last) + 1;
2143
2144 if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
2145 prange->mapped_to_gpu) {
2146
2147 } else if (node->start < start || node->last > last) {
2148
2149
2150
2151
2152 struct svm_range *old = prange;
2153
2154 prange = svm_range_clone(old);
2155 if (!prange) {
2156 r = -ENOMEM;
2157 goto out;
2158 }
2159
2160 list_add(&old->update_list, remove_list);
2161 list_add(&prange->list, insert_list);
2162 list_add(&prange->update_list, update_list);
2163
2164 if (node->start < start) {
2165 pr_debug("change old range start\n");
2166 r = svm_range_split_head(prange, start,
2167 insert_list, remap_list);
2168 if (r)
2169 goto out;
2170 }
2171 if (node->last > last) {
2172 pr_debug("change old range last\n");
2173 r = svm_range_split_tail(prange, last,
2174 insert_list, remap_list);
2175 if (r)
2176 goto out;
2177 }
2178 } else {
2179
2180
2181
2182 list_add(&prange->update_list, update_list);
2183 }
2184
2185
2186 if (node->start > start) {
2187 r = svm_range_split_new(svms, start, node->start - 1,
2188 READ_ONCE(max_svm_range_pages),
2189 &new_list, update_list);
2190 if (r)
2191 goto out;
2192 }
2193
2194 node = next;
2195 start = next_start;
2196 }
2197
2198
2199 if (start <= last)
2200 r = svm_range_split_new(svms, start, last,
2201 READ_ONCE(max_svm_range_pages),
2202 &new_list, update_list);
2203
2204out:
2205 if (r) {
2206 list_for_each_entry_safe(prange, tmp, insert_list, list)
2207 svm_range_free(prange, false);
2208 list_for_each_entry_safe(prange, tmp, &new_list, list)
2209 svm_range_free(prange, true);
2210 } else {
2211 list_splice(&new_list, insert_list);
2212 }
2213
2214 return r;
2215}
2216
2217static void
2218svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
2219 struct svm_range *prange)
2220{
2221 unsigned long start;
2222 unsigned long last;
2223
2224 start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
2225 last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
2226
2227 if (prange->start == start && prange->last == last)
2228 return;
2229
2230 pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
2231 prange->svms, prange, start, last, prange->start,
2232 prange->last);
2233
2234 if (start != 0 && last != 0) {
2235 interval_tree_remove(&prange->it_node, &prange->svms->objects);
2236 svm_range_remove_notifier(prange);
2237 }
2238 prange->it_node.start = prange->start;
2239 prange->it_node.last = prange->last;
2240
2241 interval_tree_insert(&prange->it_node, &prange->svms->objects);
2242 svm_range_add_notifier_locked(mm, prange);
2243}
2244
2245static void
2246svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
2247 struct mm_struct *mm)
2248{
2249 switch (prange->work_item.op) {
2250 case SVM_OP_NULL:
2251 pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
2252 svms, prange, prange->start, prange->last);
2253 break;
2254 case SVM_OP_UNMAP_RANGE:
2255 pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
2256 svms, prange, prange->start, prange->last);
2257 svm_range_unlink(prange);
2258 svm_range_remove_notifier(prange);
2259 svm_range_free(prange, true);
2260 break;
2261 case SVM_OP_UPDATE_RANGE_NOTIFIER:
2262 pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
2263 svms, prange, prange->start, prange->last);
2264 svm_range_update_notifier_and_interval_tree(mm, prange);
2265 break;
2266 case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
2267 pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
2268 svms, prange, prange->start, prange->last);
2269 svm_range_update_notifier_and_interval_tree(mm, prange);
2270
2271 break;
2272 case SVM_OP_ADD_RANGE:
2273 pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
2274 prange->start, prange->last);
2275 svm_range_add_to_svms(prange);
2276 svm_range_add_notifier_locked(mm, prange);
2277 break;
2278 case SVM_OP_ADD_RANGE_AND_MAP:
2279 pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
2280 prange, prange->start, prange->last);
2281 svm_range_add_to_svms(prange);
2282 svm_range_add_notifier_locked(mm, prange);
2283
2284 break;
2285 default:
2286 WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
2287 prange->work_item.op);
2288 }
2289}
2290
2291static void svm_range_drain_retry_fault(struct svm_range_list *svms)
2292{
2293 struct kfd_process_device *pdd;
2294 struct kfd_process *p;
2295 uint32_t i;
2296
2297 p = container_of(svms, struct kfd_process, svms);
2298
2299 for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
2300 pdd = p->pdds[i];
2301 if (!pdd)
2302 continue;
2303
2304 pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
2305
2306 amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
2307 pdd->dev->adev->irq.retry_cam_enabled ?
2308 &pdd->dev->adev->irq.ih :
2309 &pdd->dev->adev->irq.ih1);
2310
2311 if (pdd->dev->adev->irq.retry_cam_enabled)
2312 amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
2313 &pdd->dev->adev->irq.ih_soft);
2314
2315
2316 pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
2317 }
2318}
2319
2320static void svm_range_deferred_list_work(struct work_struct *work)
2321{
2322 struct svm_range_list *svms;
2323 struct svm_range *prange;
2324 struct mm_struct *mm;
2325
2326 svms = container_of(work, struct svm_range_list, deferred_list_work);
2327 pr_debug("enter svms 0x%p\n", svms);
2328
2329 spin_lock(&svms->deferred_list_lock);
2330 while (!list_empty(&svms->deferred_range_list)) {
2331 prange = list_first_entry(&svms->deferred_range_list,
2332 struct svm_range, deferred_list);
2333 spin_unlock(&svms->deferred_list_lock);
2334
2335 pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
2336 prange->start, prange->last, prange->work_item.op);
2337
2338 mm = prange->work_item.mm;
2339
2340 mmap_write_lock(mm);
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 spin_lock(&svms->deferred_list_lock);
2351 list_del_init(&prange->deferred_list);
2352 spin_unlock(&svms->deferred_list_lock);
2353
2354 mutex_lock(&svms->lock);
2355 mutex_lock(&prange->migrate_mutex);
2356 while (!list_empty(&prange->child_list)) {
2357 struct svm_range *pchild;
2358
2359 pchild = list_first_entry(&prange->child_list,
2360 struct svm_range, child_list);
2361 pr_debug("child prange 0x%p op %d\n", pchild,
2362 pchild->work_item.op);
2363 list_del_init(&pchild->child_list);
2364 svm_range_handle_list_op(svms, pchild, mm);
2365 }
2366 mutex_unlock(&prange->migrate_mutex);
2367
2368 svm_range_handle_list_op(svms, prange, mm);
2369 mutex_unlock(&svms->lock);
2370 mmap_write_unlock(mm);
2371
2372
2373
2374
2375 mmput_async(mm);
2376
2377 spin_lock(&svms->deferred_list_lock);
2378 }
2379 spin_unlock(&svms->deferred_list_lock);
2380 pr_debug("exit svms 0x%p\n", svms);
2381}
2382
2383void
2384svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
2385 struct mm_struct *mm, enum svm_work_list_ops op)
2386{
2387 spin_lock(&svms->deferred_list_lock);
2388
2389 if (!list_empty(&prange->deferred_list)) {
2390 pr_debug("update exist prange 0x%p work op %d\n", prange, op);
2391 WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
2392 if (op != SVM_OP_NULL &&
2393 prange->work_item.op != SVM_OP_UNMAP_RANGE)
2394 prange->work_item.op = op;
2395 } else {
2396
2397
2398
2399 if (mmget_not_zero(mm)) {
2400 prange->work_item.mm = mm;
2401 prange->work_item.op = op;
2402 list_add_tail(&prange->deferred_list,
2403 &prange->svms->deferred_range_list);
2404 pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
2405 prange, prange->start, prange->last, op);
2406 }
2407 }
2408 spin_unlock(&svms->deferred_list_lock);
2409}
2410
2411void schedule_deferred_list_work(struct svm_range_list *svms)
2412{
2413 spin_lock(&svms->deferred_list_lock);
2414 if (!list_empty(&svms->deferred_range_list))
2415 schedule_work(&svms->deferred_list_work);
2416 spin_unlock(&svms->deferred_list_lock);
2417}
2418
2419static void
2420svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
2421 unsigned long last)
2422{
2423 struct svm_range *head;
2424 struct svm_range *tail;
2425
2426 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
2427 pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
2428 prange->start, prange->last);
2429 return;
2430 }
2431 if (start > prange->last || last < prange->start)
2432 return;
2433
2434 head = tail = prange;
2435 if (start > prange->start)
2436 svm_range_split(prange, prange->start, start - 1, &tail);
2437 if (last < tail->last)
2438 svm_range_split(tail, last + 1, tail->last, &head);
2439
2440 if (head != prange && tail != prange) {
2441 svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
2442 svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
2443 } else if (tail != prange) {
2444 svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
2445 } else if (head != prange) {
2446 svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
2447 } else if (parent != prange) {
2448 prange->work_item.op = SVM_OP_UNMAP_RANGE;
2449 }
2450}
2451
2452static void
2453svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
2454 unsigned long start, unsigned long last)
2455{
2456 uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
2457 struct svm_range_list *svms;
2458 struct svm_range *pchild;
2459 struct kfd_process *p;
2460 unsigned long s, l;
2461 bool unmap_parent;
2462 uint32_t i;
2463
2464 if (atomic_read(&prange->queue_refcount)) {
2465 int r;
2466
2467 pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n",
2468 prange->start << PAGE_SHIFT);
2469 r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
2470 if (r)
2471 pr_debug("failed %d to quiesce KFD queues\n", r);
2472 }
2473
2474 p = kfd_lookup_process_by_mm(mm);
2475 if (!p)
2476 return;
2477 svms = &p->svms;
2478
2479 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
2480 prange, prange->start, prange->last, start, last);
2481
2482
2483
2484
2485 for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
2486 struct kfd_process_device *pdd;
2487 struct amdgpu_device *adev;
2488 struct amdgpu_ih_ring *ih;
2489 uint32_t checkpoint_wptr;
2490
2491 pdd = p->pdds[i];
2492 if (!pdd)
2493 continue;
2494
2495 adev = pdd->dev->adev;
2496
2497
2498 if (adev->irq.ih1.ring_size) {
2499 ih = &adev->irq.ih1;
2500 checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
2501 if (ih->rptr != checkpoint_wptr) {
2502 svms->checkpoint_ts[i] =
2503 amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
2504 continue;
2505 }
2506 }
2507
2508
2509 ih = &adev->irq.ih_soft;
2510 checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
2511 if (ih->rptr != checkpoint_wptr)
2512 svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
2513 }
2514
2515 unmap_parent = start <= prange->start && last >= prange->last;
2516
2517 list_for_each_entry(pchild, &prange->child_list, child_list) {
2518 mutex_lock_nested(&pchild->lock, 1);
2519 s = max(start, pchild->start);
2520 l = min(last, pchild->last);
2521 if (l >= s)
2522 svm_range_unmap_from_gpus(pchild, s, l, trigger);
2523 svm_range_unmap_split(prange, pchild, start, last);
2524 mutex_unlock(&pchild->lock);
2525 }
2526 s = max(start, prange->start);
2527 l = min(last, prange->last);
2528 if (l >= s)
2529 svm_range_unmap_from_gpus(prange, s, l, trigger);
2530 svm_range_unmap_split(prange, prange, start, last);
2531
2532 if (unmap_parent)
2533 svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
2534 else
2535 svm_range_add_list_work(svms, prange, mm,
2536 SVM_OP_UPDATE_RANGE_NOTIFIER);
2537 schedule_deferred_list_work(svms);
2538
2539 kfd_unref_process(p);
2540}
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562static bool
2563svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
2564 const struct mmu_notifier_range *range,
2565 unsigned long cur_seq)
2566{
2567 struct svm_range *prange;
2568 unsigned long start;
2569 unsigned long last;
2570
2571 if (range->event == MMU_NOTIFY_RELEASE)
2572 return true;
2573
2574 start = mni->interval_tree.start;
2575 last = mni->interval_tree.last;
2576 start = max(start, range->start) >> PAGE_SHIFT;
2577 last = min(last, range->end - 1) >> PAGE_SHIFT;
2578 pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
2579 start, last, range->start >> PAGE_SHIFT,
2580 (range->end - 1) >> PAGE_SHIFT,
2581 mni->interval_tree.start >> PAGE_SHIFT,
2582 mni->interval_tree.last >> PAGE_SHIFT, range->event);
2583
2584 prange = container_of(mni, struct svm_range, notifier);
2585
2586 svm_range_lock(prange);
2587 mmu_interval_set_seq(mni, cur_seq);
2588
2589 switch (range->event) {
2590 case MMU_NOTIFY_UNMAP:
2591 svm_range_unmap_from_cpu(mni->mm, prange, start, last);
2592 break;
2593 default:
2594 svm_range_evict(prange, mni->mm, start, last, range->event);
2595 break;
2596 }
2597
2598 svm_range_unlock(prange);
2599
2600 return true;
2601}
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613struct svm_range *
2614svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
2615 struct svm_range **parent)
2616{
2617 struct interval_tree_node *node;
2618 struct svm_range *prange;
2619 struct svm_range *pchild;
2620
2621 node = interval_tree_iter_first(&svms->objects, addr, addr);
2622 if (!node)
2623 return NULL;
2624
2625 prange = container_of(node, struct svm_range, it_node);
2626 pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n",
2627 addr, prange->start, prange->last, node->start, node->last);
2628
2629 if (addr >= prange->start && addr <= prange->last) {
2630 if (parent)
2631 *parent = prange;
2632 return prange;
2633 }
2634 list_for_each_entry(pchild, &prange->child_list, child_list)
2635 if (addr >= pchild->start && addr <= pchild->last) {
2636 pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n",
2637 addr, pchild->start, pchild->last);
2638 if (parent)
2639 *parent = prange;
2640 return pchild;
2641 }
2642
2643 return NULL;
2644}
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667static int32_t
2668svm_range_best_restore_location(struct svm_range *prange,
2669 struct kfd_node *node,
2670 int32_t *gpuidx)
2671{
2672 struct kfd_node *bo_node, *preferred_node;
2673 struct kfd_process *p;
2674 uint32_t gpuid;
2675 int r;
2676
2677 p = container_of(prange->svms, struct kfd_process, svms);
2678
2679 r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx);
2680 if (r < 0) {
2681 pr_debug("failed to get gpuid from kgd\n");
2682 return -1;
2683 }
2684
2685 if (node->adev->apu_prefer_gtt)
2686 return 0;
2687
2688 if (prange->preferred_loc == gpuid ||
2689 prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
2690 return prange->preferred_loc;
2691 } else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
2692 preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc);
2693 if (preferred_node && svm_nodes_in_same_hive(node, preferred_node))
2694 return prange->preferred_loc;
2695
2696 }
2697
2698 if (test_bit(*gpuidx, prange->bitmap_access))
2699 return gpuid;
2700
2701 if (test_bit(*gpuidx, prange->bitmap_aip)) {
2702 if (!prange->actual_loc)
2703 return 0;
2704
2705 bo_node = svm_range_get_node_by_id(prange, prange->actual_loc);
2706 if (bo_node && svm_nodes_in_same_hive(node, bo_node))
2707 return prange->actual_loc;
2708 else
2709 return 0;
2710 }
2711
2712 return -1;
2713}
2714
2715static int
2716svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
2717 unsigned long *start, unsigned long *last,
2718 bool *is_heap_stack)
2719{
2720 struct vm_area_struct *vma;
2721 struct interval_tree_node *node;
2722 struct rb_node *rb_node;
2723 unsigned long start_limit, end_limit;
2724
2725 vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
2726 if (!vma) {
2727 pr_debug("VMA does not exist in address [0x%llx]\n", addr);
2728 return -EFAULT;
2729 }
2730
2731 *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
2732
2733 start_limit = max(vma->vm_start >> PAGE_SHIFT,
2734 (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
2735 end_limit = min(vma->vm_end >> PAGE_SHIFT,
2736 (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
2737
2738
2739 node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
2740 if (node) {
2741 end_limit = min(end_limit, node->start);
2742
2743 rb_node = rb_prev(&node->rb);
2744 } else {
2745
2746
2747
2748 rb_node = rb_last(&p->svms.objects.rb_root);
2749 }
2750 if (rb_node) {
2751 node = container_of(rb_node, struct interval_tree_node, rb);
2752 if (node->last >= addr) {
2753 WARN(1, "Overlap with prev node and page fault addr\n");
2754 return -EFAULT;
2755 }
2756 start_limit = max(start_limit, node->last + 1);
2757 }
2758
2759 *start = start_limit;
2760 *last = end_limit - 1;
2761
2762 pr_debug("vma [0x%lx 0x%lx] range [0x%lx 0x%lx] is_heap_stack %d\n",
2763 vma->vm_start >> PAGE_SHIFT, vma->vm_end >> PAGE_SHIFT,
2764 *start, *last, *is_heap_stack);
2765
2766 return 0;
2767}
2768
2769static int
2770svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
2771 uint64_t *bo_s, uint64_t *bo_l)
2772{
2773 struct amdgpu_bo_va_mapping *mapping;
2774 struct interval_tree_node *node;
2775 struct amdgpu_bo *bo = NULL;
2776 unsigned long userptr;
2777 uint32_t i;
2778 int r;
2779
2780 for (i = 0; i < p->n_pdds; i++) {
2781 struct amdgpu_vm *vm;
2782
2783 if (!p->pdds[i]->drm_priv)
2784 continue;
2785
2786 vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
2787 r = amdgpu_bo_reserve(vm->root.bo, false);
2788 if (r)
2789 return r;
2790
2791
2792 node = interval_tree_iter_first(&vm->va, 0, ~0ULL);
2793 while (node) {
2794 mapping = container_of((struct rb_node *)node,
2795 struct amdgpu_bo_va_mapping, rb);
2796 bo = mapping->bo_va->base.bo;
2797
2798 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
2799 start << PAGE_SHIFT,
2800 last << PAGE_SHIFT,
2801 &userptr)) {
2802 node = interval_tree_iter_next(node, 0, ~0ULL);
2803 continue;
2804 }
2805
2806 pr_debug("[0x%llx 0x%llx] already userptr mapped\n",
2807 start, last);
2808 if (bo_s && bo_l) {
2809 *bo_s = userptr >> PAGE_SHIFT;
2810 *bo_l = *bo_s + bo->tbo.ttm->num_pages - 1;
2811 }
2812 amdgpu_bo_unreserve(vm->root.bo);
2813 return -EADDRINUSE;
2814 }
2815 amdgpu_bo_unreserve(vm->root.bo);
2816 }
2817 return 0;
2818}
2819
2820static struct
2821svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
2822 struct kfd_process *p,
2823 struct mm_struct *mm,
2824 int64_t addr)
2825{
2826 struct svm_range *prange = NULL;
2827 unsigned long start, last;
2828 uint32_t gpuid, gpuidx;
2829 bool is_heap_stack;
2830 uint64_t bo_s = 0;
2831 uint64_t bo_l = 0;
2832 int r;
2833
2834 if (svm_range_get_range_boundaries(p, addr, &start, &last,
2835 &is_heap_stack))
2836 return NULL;
2837
2838 r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
2839 if (r != -EADDRINUSE)
2840 r = svm_range_check_vm_userptr(p, start, last, &bo_s, &bo_l);
2841
2842 if (r == -EADDRINUSE) {
2843 if (addr >= bo_s && addr <= bo_l)
2844 return NULL;
2845
2846
2847 start = addr;
2848 last = addr;
2849 }
2850
2851 prange = svm_range_new(&p->svms, start, last, true);
2852 if (!prange) {
2853 pr_debug("Failed to create prange in address [0x%llx]\n", addr);
2854 return NULL;
2855 }
2856 if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
2857 pr_debug("failed to get gpuid from kgd\n");
2858 svm_range_free(prange, true);
2859 return NULL;
2860 }
2861
2862 if (is_heap_stack)
2863 prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
2864
2865 svm_range_add_to_svms(prange);
2866 svm_range_add_notifier_locked(mm, prange);
2867
2868 return prange;
2869}
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883static bool svm_range_skip_recover(struct svm_range *prange)
2884{
2885 struct svm_range_list *svms = prange->svms;
2886
2887 spin_lock(&svms->deferred_list_lock);
2888 if (list_empty(&prange->deferred_list) &&
2889 list_empty(&prange->child_list)) {
2890 spin_unlock(&svms->deferred_list_lock);
2891 return false;
2892 }
2893 spin_unlock(&svms->deferred_list_lock);
2894
2895 if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
2896 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
2897 svms, prange, prange->start, prange->last);
2898 return true;
2899 }
2900 if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
2901 prange->work_item.op == SVM_OP_ADD_RANGE) {
2902 pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
2903 svms, prange, prange->start, prange->last);
2904 return true;
2905 }
2906 return false;
2907}
2908
2909static void
2910svm_range_count_fault(struct kfd_node *node, struct kfd_process *p,
2911 int32_t gpuidx)
2912{
2913 struct kfd_process_device *pdd;
2914
2915
2916
2917
2918
2919 if (gpuidx == MAX_GPU_INSTANCE) {
2920 uint32_t gpuid;
2921 int r;
2922
2923 r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx);
2924 if (r < 0)
2925 return;
2926 }
2927
2928
2929
2930
2931 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
2932 if (pdd)
2933 WRITE_ONCE(pdd->faults, pdd->faults + 1);
2934}
2935
2936static bool
2937svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
2938{
2939 unsigned long requested = VM_READ;
2940
2941 if (write_fault)
2942 requested |= VM_WRITE;
2943
2944 pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
2945 vma->vm_flags);
2946 return (vma->vm_flags & requested) == requested;
2947}
2948
2949int
2950svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
2951 uint32_t vmid, uint32_t node_id,
2952 uint64_t addr, uint64_t ts, bool write_fault)
2953{
2954 unsigned long start, last, size;
2955 struct mm_struct *mm = NULL;
2956 struct svm_range_list *svms;
2957 struct svm_range *prange;
2958 struct kfd_process *p;
2959 ktime_t timestamp = ktime_get_boottime();
2960 struct kfd_node *node;
2961 int32_t best_loc;
2962 int32_t gpuid, gpuidx = MAX_GPU_INSTANCE;
2963 bool write_locked = false;
2964 struct vm_area_struct *vma;
2965 bool migration = false;
2966 int r = 0;
2967
2968 if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
2969 pr_debug("device does not support SVM\n");
2970 return -EFAULT;
2971 }
2972
2973 p = kfd_lookup_process_by_pasid(pasid, NULL);
2974 if (!p) {
2975 pr_debug("kfd process not founded pasid 0x%x\n", pasid);
2976 return 0;
2977 }
2978 svms = &p->svms;
2979
2980 pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
2981
2982 if (atomic_read(&svms->drain_pagefaults)) {
2983 pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr);
2984 r = 0;
2985 goto out;
2986 }
2987
2988 node = kfd_node_by_irq_ids(adev, node_id, vmid);
2989 if (!node) {
2990 pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
2991 vmid);
2992 r = -EFAULT;
2993 goto out;
2994 }
2995
2996 if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
2997 pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id);
2998 r = -EFAULT;
2999 goto out;
3000 }
3001
3002 if (!p->xnack_enabled) {
3003 pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
3004 r = -EFAULT;
3005 goto out;
3006 }
3007
3008
3009
3010
3011 mm = get_task_mm(p->lead_thread);
3012 if (!mm) {
3013 pr_debug("svms 0x%p failed to get mm\n", svms);
3014 r = 0;
3015 goto out;
3016 }
3017
3018 mmap_read_lock(mm);
3019retry_write_locked:
3020 mutex_lock(&svms->lock);
3021
3022
3023 if (svms->checkpoint_ts[gpuidx] != 0) {
3024 if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
3025 pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
3026 r = -EAGAIN;
3027 goto out_unlock_svms;
3028 } else {
3029
3030
3031
3032 svms->checkpoint_ts[gpuidx] = 0;
3033 }
3034 }
3035
3036 prange = svm_range_from_addr(svms, addr, NULL);
3037 if (!prange) {
3038 pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
3039 svms, addr);
3040 if (!write_locked) {
3041
3042
3043
3044
3045 mutex_unlock(&svms->lock);
3046 mmap_read_unlock(mm);
3047 mmap_write_lock(mm);
3048 write_locked = true;
3049 goto retry_write_locked;
3050 }
3051 prange = svm_range_create_unregistered_range(node, p, mm, addr);
3052 if (!prange) {
3053 pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
3054 svms, addr);
3055 mmap_write_downgrade(mm);
3056 r = -EFAULT;
3057 goto out_unlock_svms;
3058 }
3059 }
3060 if (write_locked)
3061 mmap_write_downgrade(mm);
3062
3063 mutex_lock(&prange->migrate_mutex);
3064
3065 if (svm_range_skip_recover(prange)) {
3066 amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
3067 r = 0;
3068 goto out_unlock_range;
3069 }
3070
3071
3072 if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
3073 AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
3074 pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
3075 svms, prange->start, prange->last);
3076 r = 0;
3077 goto out_unlock_range;
3078 }
3079
3080
3081
3082
3083 vma = vma_lookup(mm, addr << PAGE_SHIFT);
3084 if (!vma) {
3085 pr_debug("address 0x%llx VMA is removed\n", addr);
3086 r = 0;
3087 goto out_unlock_range;
3088 }
3089
3090 if (!svm_fault_allowed(vma, write_fault)) {
3091 pr_debug("fault addr 0x%llx no %s permission\n", addr,
3092 write_fault ? "write" : "read");
3093 r = -EPERM;
3094 goto out_unlock_range;
3095 }
3096
3097 best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
3098 if (best_loc == -1) {
3099 pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
3100 svms, prange->start, prange->last);
3101 r = -EACCES;
3102 goto out_unlock_range;
3103 }
3104
3105 pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
3106 svms, prange->start, prange->last, best_loc,
3107 prange->actual_loc);
3108
3109 kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
3110 write_fault, timestamp);
3111
3112
3113 size = 1UL << prange->granularity;
3114 start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
3115 last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
3116 if (prange->actual_loc != 0 || best_loc != 0) {
3117 if (best_loc) {
3118 r = svm_migrate_to_vram(prange, best_loc, start, last,
3119 mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
3120 if (r) {
3121 pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
3122 r, addr);
3123
3124
3125
3126 if (prange->actual_loc && prange->actual_loc != best_loc)
3127 r = svm_migrate_vram_to_ram(prange, mm, start, last,
3128 KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
3129 else
3130 r = 0;
3131 }
3132 } else {
3133 r = svm_migrate_vram_to_ram(prange, mm, start, last,
3134 KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
3135 }
3136 if (r) {
3137 pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
3138 r, svms, start, last);
3139 goto out_migrate_fail;
3140 } else {
3141 migration = true;
3142 }
3143 }
3144
3145 r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
3146 false, false);
3147 if (r)
3148 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
3149 r, svms, start, last);
3150
3151out_migrate_fail:
3152 kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
3153 migration);
3154
3155out_unlock_range:
3156 mutex_unlock(&prange->migrate_mutex);
3157out_unlock_svms:
3158 mutex_unlock(&svms->lock);
3159 mmap_read_unlock(mm);
3160
3161 if (r != -EAGAIN)
3162 svm_range_count_fault(node, p, gpuidx);
3163
3164 mmput(mm);
3165out:
3166 kfd_unref_process(p);
3167
3168 if (r == -EAGAIN) {
3169 pr_debug("recover vm fault later\n");
3170 amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
3171 r = 0;
3172 }
3173 return r;
3174}
3175
3176int
3177svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
3178{
3179 struct svm_range *prange, *pchild;
3180 uint64_t reserved_size = 0;
3181 uint64_t size;
3182 int r = 0;
3183
3184 pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
3185
3186 mutex_lock(&p->svms.lock);
3187
3188 list_for_each_entry(prange, &p->svms.list, list) {
3189 svm_range_lock(prange);
3190 list_for_each_entry(pchild, &prange->child_list, child_list) {
3191 size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
3192 if (xnack_enabled) {
3193 amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
3194 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
3195 } else {
3196 r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
3197 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
3198 if (r)
3199 goto out_unlock;
3200 reserved_size += size;
3201 }
3202 }
3203
3204 size = (prange->last - prange->start + 1) << PAGE_SHIFT;
3205 if (xnack_enabled) {
3206 amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
3207 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
3208 } else {
3209 r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
3210 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
3211 if (r)
3212 goto out_unlock;
3213 reserved_size += size;
3214 }
3215out_unlock:
3216 svm_range_unlock(prange);
3217 if (r)
3218 break;
3219 }
3220
3221 if (r)
3222 amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
3223 KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
3224 else
3225
3226
3227
3228 p->xnack_enabled = xnack_enabled;
3229
3230 mutex_unlock(&p->svms.lock);
3231 return r;
3232}
3233
3234void svm_range_list_fini(struct kfd_process *p)
3235{
3236 struct svm_range *prange;
3237 struct svm_range *next;
3238
3239 pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid,
3240 &p->svms);
3241
3242 cancel_delayed_work_sync(&p->svms.restore_work);
3243
3244
3245 flush_work(&p->svms.deferred_list_work);
3246
3247
3248
3249
3250
3251
3252 atomic_set(&p->svms.drain_pagefaults, 1);
3253 svm_range_drain_retry_fault(&p->svms);
3254
3255 list_for_each_entry_safe(prange, next, &p->svms.list, list) {
3256 svm_range_unlink(prange);
3257 svm_range_remove_notifier(prange);
3258 svm_range_free(prange, true);
3259 }
3260
3261 mutex_destroy(&p->svms.lock);
3262
3263 pr_debug("process pid %d svms 0x%p done\n",
3264 p->lead_thread->pid, &p->svms);
3265}
3266
3267int svm_range_list_init(struct kfd_process *p)
3268{
3269 struct svm_range_list *svms = &p->svms;
3270 int i;
3271
3272 svms->objects = RB_ROOT_CACHED;
3273 mutex_init(&svms->lock);
3274 INIT_LIST_HEAD(&svms->list);
3275 atomic_set(&svms->evicted_ranges, 0);
3276 atomic_set(&svms->drain_pagefaults, 0);
3277 INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
3278 INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
3279 INIT_LIST_HEAD(&svms->deferred_range_list);
3280 INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
3281 spin_lock_init(&svms->deferred_list_lock);
3282
3283 for (i = 0; i < p->n_pdds; i++)
3284 if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
3285 bitmap_set(svms->bitmap_supported, i, 1);
3286
3287
3288
3289
3290 svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
3291 pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
3292
3293 return 0;
3294}
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static int
3317svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
3318 uint64_t *bo_s, uint64_t *bo_l)
3319{
3320 struct amdgpu_bo_va_mapping *mapping;
3321 struct interval_tree_node *node;
3322 uint32_t i;
3323 int r;
3324
3325 for (i = 0; i < p->n_pdds; i++) {
3326 struct amdgpu_vm *vm;
3327
3328 if (!p->pdds[i]->drm_priv)
3329 continue;
3330
3331 vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
3332 r = amdgpu_bo_reserve(vm->root.bo, false);
3333 if (r)
3334 return r;
3335
3336 node = interval_tree_iter_first(&vm->va, start, last);
3337 if (node) {
3338 pr_debug("range [0x%llx 0x%llx] already TTM mapped\n",
3339 start, last);
3340 mapping = container_of((struct rb_node *)node,
3341 struct amdgpu_bo_va_mapping, rb);
3342 if (bo_s && bo_l) {
3343 *bo_s = mapping->start;
3344 *bo_l = mapping->last;
3345 }
3346 amdgpu_bo_unreserve(vm->root.bo);
3347 return -EADDRINUSE;
3348 }
3349 amdgpu_bo_unreserve(vm->root.bo);
3350 }
3351
3352 return 0;
3353}
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368static int
3369svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
3370{
3371 const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
3372 struct vm_area_struct *vma;
3373 unsigned long end;
3374 unsigned long start_unchg = start;
3375
3376 start <<= PAGE_SHIFT;
3377 end = start + (size << PAGE_SHIFT);
3378 do {
3379 vma = vma_lookup(p->mm, start);
3380 if (!vma || (vma->vm_flags & device_vma))
3381 return -EFAULT;
3382 start = min(end, vma->vm_end);
3383 } while (start < end);
3384
3385 return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT, NULL,
3386 NULL);
3387}
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415static uint32_t
3416svm_range_best_prefetch_location(struct svm_range *prange)
3417{
3418 DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
3419 uint32_t best_loc = prange->prefetch_loc;
3420 struct kfd_process_device *pdd;
3421 struct kfd_node *bo_node;
3422 struct kfd_process *p;
3423 uint32_t gpuidx;
3424
3425 p = container_of(prange->svms, struct kfd_process, svms);
3426
3427 if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
3428 goto out;
3429
3430 bo_node = svm_range_get_node_by_id(prange, best_loc);
3431 if (!bo_node) {
3432 WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc);
3433 best_loc = 0;
3434 goto out;
3435 }
3436
3437 if (bo_node->adev->apu_prefer_gtt) {
3438 best_loc = 0;
3439 goto out;
3440 }
3441
3442 if (p->xnack_enabled)
3443 bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
3444 else
3445 bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
3446 MAX_GPU_INSTANCE);
3447
3448 for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
3449 pdd = kfd_process_device_from_gpuidx(p, gpuidx);
3450 if (!pdd) {
3451 pr_debug("failed to get device by idx 0x%x\n", gpuidx);
3452 continue;
3453 }
3454
3455 if (pdd->dev->adev == bo_node->adev)
3456 continue;
3457
3458 if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) {
3459 best_loc = 0;
3460 break;
3461 }
3462 }
3463
3464out:
3465 pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n",
3466 p->xnack_enabled, &p->svms, prange->start, prange->last,
3467 best_loc);
3468
3469 return best_loc;
3470}
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496static int
3497svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
3498 bool *migrated)
3499{
3500 uint32_t best_loc;
3501 int r = 0;
3502
3503 *migrated = false;
3504 best_loc = svm_range_best_prefetch_location(prange);
3505
3506
3507
3508
3509
3510
3511 if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
3512 (best_loc == 0 && prange->actual_loc == 0))
3513 return 0;
3514
3515 if (!best_loc) {
3516 r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
3517 KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
3518 *migrated = !r;
3519 return r;
3520 }
3521
3522 r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
3523 mm, KFD_MIGRATE_TRIGGER_PREFETCH);
3524 *migrated = !r;
3525
3526 return 0;
3527}
3528
3529int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
3530{
3531
3532
3533
3534
3535
3536
3537
3538 if (svm_bo_ref_unless_zero(fence->svm_bo)) {
3539 WRITE_ONCE(fence->svm_bo->evicting, 1);
3540 schedule_work(&fence->svm_bo->eviction_work);
3541 }
3542
3543 return 0;
3544}
3545
3546static void svm_range_evict_svm_bo_worker(struct work_struct *work)
3547{
3548 struct svm_range_bo *svm_bo;
3549 struct mm_struct *mm;
3550 int r = 0;
3551
3552 svm_bo = container_of(work, struct svm_range_bo, eviction_work);
3553
3554 if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
3555 mm = svm_bo->eviction_fence->mm;
3556 } else {
3557 svm_range_bo_unref(svm_bo);
3558 return;
3559 }
3560
3561 mmap_read_lock(mm);
3562 spin_lock(&svm_bo->list_lock);
3563 while (!list_empty(&svm_bo->range_list) && !r) {
3564 struct svm_range *prange =
3565 list_first_entry(&svm_bo->range_list,
3566 struct svm_range, svm_bo_list);
3567 int retries = 3;
3568
3569 list_del_init(&prange->svm_bo_list);
3570 spin_unlock(&svm_bo->list_lock);
3571
3572 pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
3573 prange->start, prange->last);
3574
3575 mutex_lock(&prange->migrate_mutex);
3576 do {
3577
3578
3579
3580 r = svm_migrate_vram_to_ram(prange, mm,
3581 prange->start, prange->last,
3582 KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
3583 } while (!r && prange->actual_loc && --retries);
3584
3585 if (!r && prange->actual_loc)
3586 pr_info_once("Migration failed during eviction");
3587
3588 if (!prange->actual_loc) {
3589 mutex_lock(&prange->lock);
3590 prange->svm_bo = NULL;
3591 mutex_unlock(&prange->lock);
3592 }
3593 mutex_unlock(&prange->migrate_mutex);
3594
3595 spin_lock(&svm_bo->list_lock);
3596 }
3597 spin_unlock(&svm_bo->list_lock);
3598 mmap_read_unlock(mm);
3599 mmput(mm);
3600
3601 dma_fence_signal(&svm_bo->eviction_fence->base);
3602
3603
3604
3605
3606 WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
3607 svm_range_bo_unref(svm_bo);
3608}
3609
3610static int
3611svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
3612 uint64_t start, uint64_t size, uint32_t nattr,
3613 struct kfd_ioctl_svm_attribute *attrs)
3614{
3615 struct amdkfd_process_info *process_info = p->kgd_process_info;
3616 struct list_head update_list;
3617 struct list_head insert_list;
3618 struct list_head remove_list;
3619 struct list_head remap_list;
3620 struct svm_range_list *svms;
3621 struct svm_range *prange;
3622 struct svm_range *next;
3623 bool update_mapping = false;
3624 bool flush_tlb;
3625 int r, ret = 0;
3626
3627 pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
3628 p->lead_thread->pid, &p->svms, start, start + size - 1, size);
3629
3630 r = svm_range_check_attr(p, nattr, attrs);
3631 if (r)
3632 return r;
3633
3634 svms = &p->svms;
3635
3636 mutex_lock(&process_info->lock);
3637
3638 svm_range_list_lock_and_flush_work(svms, mm);
3639
3640 r = svm_range_is_valid(p, start, size);
3641 if (r) {
3642 pr_debug("invalid range r=%d\n", r);
3643 mmap_write_unlock(mm);
3644 goto out;
3645 }
3646
3647 mutex_lock(&svms->lock);
3648
3649
3650 r = svm_range_add(p, start, size, nattr, attrs, &update_list,
3651 &insert_list, &remove_list, &remap_list);
3652 if (r) {
3653 mutex_unlock(&svms->lock);
3654 mmap_write_unlock(mm);
3655 goto out;
3656 }
3657
3658 list_for_each_entry_safe(prange, next, &insert_list, list) {
3659 svm_range_add_to_svms(prange);
3660 svm_range_add_notifier_locked(mm, prange);
3661 }
3662 list_for_each_entry(prange, &update_list, update_list) {
3663 svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
3664
3665 }
3666 list_for_each_entry_safe(prange, next, &remove_list, update_list) {
3667 pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
3668 prange->svms, prange, prange->start,
3669 prange->last);
3670 svm_range_unlink(prange);
3671 svm_range_remove_notifier(prange);
3672 svm_range_free(prange, false);
3673 }
3674
3675 mmap_write_downgrade(mm);
3676
3677
3678
3679
3680
3681 list_for_each_entry(prange, &update_list, update_list) {
3682 bool migrated;
3683
3684 mutex_lock(&prange->migrate_mutex);
3685
3686 r = svm_range_trigger_migration(mm, prange, &migrated);
3687 if (r)
3688 goto out_unlock_range;
3689
3690 if (migrated && (!p->xnack_enabled ||
3691 (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
3692 prange->mapped_to_gpu) {
3693 pr_debug("restore_work will update mappings of GPUs\n");
3694 mutex_unlock(&prange->migrate_mutex);
3695 continue;
3696 }
3697
3698 if (!migrated && !update_mapping) {
3699 mutex_unlock(&prange->migrate_mutex);
3700 continue;
3701 }
3702
3703 flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
3704
3705 r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
3706 MAX_GPU_INSTANCE, true, true, flush_tlb);
3707 if (r)
3708 pr_debug("failed %d to map svm range\n", r);
3709
3710out_unlock_range:
3711 mutex_unlock(&prange->migrate_mutex);
3712 if (r)
3713 ret = r;
3714 }
3715
3716 list_for_each_entry(prange, &remap_list, update_list) {
3717 pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
3718 prange, prange->start, prange->last);
3719 mutex_lock(&prange->migrate_mutex);
3720 r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
3721 MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
3722 if (r)
3723 pr_debug("failed %d on remap svm range\n", r);
3724 mutex_unlock(&prange->migrate_mutex);
3725 if (r)
3726 ret = r;
3727 }
3728
3729 dynamic_svm_range_dump(svms);
3730
3731 mutex_unlock(&svms->lock);
3732 mmap_read_unlock(mm);
3733out:
3734 mutex_unlock(&process_info->lock);
3735
3736 pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n",
3737 p->lead_thread->pid, &p->svms, start, start + size - 1, r);
3738
3739 return ret ? ret : r;
3740}
3741
3742static int
3743svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
3744 uint64_t start, uint64_t size, uint32_t nattr,
3745 struct kfd_ioctl_svm_attribute *attrs)
3746{
3747 DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
3748 DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
3749 bool get_preferred_loc = false;
3750 bool get_prefetch_loc = false;
3751 bool get_granularity = false;
3752 bool get_accessible = false;
3753 bool get_flags = false;
3754 uint64_t last = start + size - 1UL;
3755 uint8_t granularity = 0xff;
3756 struct interval_tree_node *node;
3757 struct svm_range_list *svms;
3758 struct svm_range *prange;
3759 uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3760 uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3761 uint32_t flags_and = 0xffffffff;
3762 uint32_t flags_or = 0;
3763 int gpuidx;
3764 uint32_t i;
3765 int r = 0;
3766
3767 pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
3768 start + size - 1, nattr);
3769
3770
3771
3772
3773
3774
3775
3776 flush_work(&p->svms.deferred_list_work);
3777
3778 mmap_read_lock(mm);
3779 r = svm_range_is_valid(p, start, size);
3780 mmap_read_unlock(mm);
3781 if (r) {
3782 pr_debug("invalid range r=%d\n", r);
3783 return r;
3784 }
3785
3786 for (i = 0; i < nattr; i++) {
3787 switch (attrs[i].type) {
3788 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
3789 get_preferred_loc = true;
3790 break;
3791 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
3792 get_prefetch_loc = true;
3793 break;
3794 case KFD_IOCTL_SVM_ATTR_ACCESS:
3795 get_accessible = true;
3796 break;
3797 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
3798 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
3799 get_flags = true;
3800 break;
3801 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
3802 get_granularity = true;
3803 break;
3804 case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
3805 case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
3806 fallthrough;
3807 default:
3808 pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
3809 return -EINVAL;
3810 }
3811 }
3812
3813 svms = &p->svms;
3814
3815 mutex_lock(&svms->lock);
3816
3817 node = interval_tree_iter_first(&svms->objects, start, last);
3818 if (!node) {
3819 pr_debug("range attrs not found return default values\n");
3820 svm_range_set_default_attributes(svms, &location, &prefetch_loc,
3821 &granularity, &flags_and);
3822 flags_or = flags_and;
3823 if (p->xnack_enabled)
3824 bitmap_copy(bitmap_access, svms->bitmap_supported,
3825 MAX_GPU_INSTANCE);
3826 else
3827 bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
3828 bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
3829 goto fill_values;
3830 }
3831 bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE);
3832 bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE);
3833
3834 while (node) {
3835 struct interval_tree_node *next;
3836
3837 prange = container_of(node, struct svm_range, it_node);
3838 next = interval_tree_iter_next(node, start, last);
3839
3840 if (get_preferred_loc) {
3841 if (prange->preferred_loc ==
3842 KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
3843 (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
3844 location != prange->preferred_loc)) {
3845 location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3846 get_preferred_loc = false;
3847 } else {
3848 location = prange->preferred_loc;
3849 }
3850 }
3851 if (get_prefetch_loc) {
3852 if (prange->prefetch_loc ==
3853 KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
3854 (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
3855 prefetch_loc != prange->prefetch_loc)) {
3856 prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
3857 get_prefetch_loc = false;
3858 } else {
3859 prefetch_loc = prange->prefetch_loc;
3860 }
3861 }
3862 if (get_accessible) {
3863 bitmap_and(bitmap_access, bitmap_access,
3864 prange->bitmap_access, MAX_GPU_INSTANCE);
3865 bitmap_and(bitmap_aip, bitmap_aip,
3866 prange->bitmap_aip, MAX_GPU_INSTANCE);
3867 }
3868 if (get_flags) {
3869 flags_and &= prange->flags;
3870 flags_or |= prange->flags;
3871 }
3872
3873 if (get_granularity && prange->granularity < granularity)
3874 granularity = prange->granularity;
3875
3876 node = next;
3877 }
3878fill_values:
3879 mutex_unlock(&svms->lock);
3880
3881 for (i = 0; i < nattr; i++) {
3882 switch (attrs[i].type) {
3883 case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
3884 attrs[i].value = location;
3885 break;
3886 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
3887 attrs[i].value = prefetch_loc;
3888 break;
3889 case KFD_IOCTL_SVM_ATTR_ACCESS:
3890 gpuidx = kfd_process_gpuidx_from_gpuid(p,
3891 attrs[i].value);
3892 if (gpuidx < 0) {
3893 pr_debug("invalid gpuid %x\n", attrs[i].value);
3894 return -EINVAL;
3895 }
3896 if (test_bit(gpuidx, bitmap_access))
3897 attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS;
3898 else if (test_bit(gpuidx, bitmap_aip))
3899 attrs[i].type =
3900 KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE;
3901 else
3902 attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS;
3903 break;
3904 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
3905 attrs[i].value = flags_and;
3906 break;
3907 case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
3908 attrs[i].value = ~flags_or;
3909 break;
3910 case KFD_IOCTL_SVM_ATTR_GRANULARITY:
3911 attrs[i].value = (uint32_t)granularity;
3912 break;
3913 }
3914 }
3915
3916 return 0;
3917}
3918
3919int kfd_criu_resume_svm(struct kfd_process *p)
3920{
3921 struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL;
3922 int nattr_common = 4, nattr_accessibility = 1;
3923 struct criu_svm_metadata *criu_svm_md = NULL;
3924 struct svm_range_list *svms = &p->svms;
3925 struct criu_svm_metadata *next = NULL;
3926 uint32_t set_flags = 0xffffffff;
3927 int i, j, num_attrs, ret = 0;
3928 uint64_t set_attr_size;
3929 struct mm_struct *mm;
3930
3931 if (list_empty(&svms->criu_svm_metadata_list)) {
3932 pr_debug("No SVM data from CRIU restore stage 2\n");
3933 return ret;
3934 }
3935
3936 mm = get_task_mm(p->lead_thread);
3937 if (!mm) {
3938 pr_err("failed to get mm for the target process\n");
3939 return -ESRCH;
3940 }
3941
3942 num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
3943
3944 i = j = 0;
3945 list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
3946 pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n",
3947 i, criu_svm_md->data.start_addr, criu_svm_md->data.size);
3948
3949 for (j = 0; j < num_attrs; j++) {
3950 pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
3951 i, j, criu_svm_md->data.attrs[j].type,
3952 i, j, criu_svm_md->data.attrs[j].value);
3953 switch (criu_svm_md->data.attrs[j].type) {
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964 case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
3965 if (criu_svm_md->data.attrs[j].value ==
3966 KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
3967 criu_svm_md->data.attrs[j].type =
3968 KFD_IOCTL_SVM_ATTR_SET_FLAGS;
3969 criu_svm_md->data.attrs[j].value = 0;
3970 }
3971 break;
3972 case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
3973 set_flags = criu_svm_md->data.attrs[j].value;
3974 break;
3975 default:
3976 break;
3977 }
3978 }
3979
3980
3981
3982
3983
3984 set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
3985 (num_attrs + 1);
3986 set_attr_new = krealloc(set_attr, set_attr_size,
3987 GFP_KERNEL);
3988 if (!set_attr_new) {
3989 ret = -ENOMEM;
3990 goto exit;
3991 }
3992 set_attr = set_attr_new;
3993
3994 memcpy(set_attr, criu_svm_md->data.attrs, num_attrs *
3995 sizeof(struct kfd_ioctl_svm_attribute));
3996 set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS;
3997 set_attr[num_attrs].value = ~set_flags;
3998
3999 ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr,
4000 criu_svm_md->data.size, num_attrs + 1,
4001 set_attr);
4002 if (ret) {
4003 pr_err("CRIU: failed to set range attributes\n");
4004 goto exit;
4005 }
4006
4007 i++;
4008 }
4009exit:
4010 kfree(set_attr);
4011 list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
4012 pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
4013 criu_svm_md->data.start_addr);
4014 kfree(criu_svm_md);
4015 }
4016
4017 mmput(mm);
4018 return ret;
4019
4020}
4021
4022int kfd_criu_restore_svm(struct kfd_process *p,
4023 uint8_t __user *user_priv_ptr,
4024 uint64_t *priv_data_offset,
4025 uint64_t max_priv_data_size)
4026{
4027 uint64_t svm_priv_data_size, svm_object_md_size, svm_attrs_size;
4028 int nattr_common = 4, nattr_accessibility = 1;
4029 struct criu_svm_metadata *criu_svm_md = NULL;
4030 struct svm_range_list *svms = &p->svms;
4031 uint32_t num_devices;
4032 int ret = 0;
4033
4034 num_devices = p->n_pdds;
4035
4036
4037
4038
4039
4040 svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
4041 (nattr_common + nattr_accessibility * num_devices);
4042 svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
4043
4044 svm_priv_data_size = sizeof(struct kfd_criu_svm_range_priv_data) +
4045 svm_attrs_size;
4046
4047 criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
4048 if (!criu_svm_md) {
4049 pr_err("failed to allocate memory to store svm metadata\n");
4050 return -ENOMEM;
4051 }
4052 if (*priv_data_offset + svm_priv_data_size > max_priv_data_size) {
4053 ret = -EINVAL;
4054 goto exit;
4055 }
4056
4057 ret = copy_from_user(&criu_svm_md->data, user_priv_ptr + *priv_data_offset,
4058 svm_priv_data_size);
4059 if (ret) {
4060 ret = -EFAULT;
4061 goto exit;
4062 }
4063 *priv_data_offset += svm_priv_data_size;
4064
4065 list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
4066
4067 return 0;
4068
4069
4070exit:
4071 kfree(criu_svm_md);
4072 return ret;
4073}
4074
4075void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
4076 uint64_t *svm_priv_data_size)
4077{
4078 uint64_t total_size, accessibility_size, common_attr_size;
4079 int nattr_common = 4, nattr_accessibility = 1;
4080 int num_devices = p->n_pdds;
4081 struct svm_range_list *svms;
4082 struct svm_range *prange;
4083 uint32_t count = 0;
4084
4085 *svm_priv_data_size = 0;
4086
4087 svms = &p->svms;
4088
4089 mutex_lock(&svms->lock);
4090 list_for_each_entry(prange, &svms->list, list) {
4091 pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
4092 prange, prange->start, prange->npages,
4093 prange->start + prange->npages - 1);
4094 count++;
4095 }
4096 mutex_unlock(&svms->lock);
4097
4098 *num_svm_ranges = count;
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115 if (*num_svm_ranges > 0) {
4116 common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
4117 nattr_common;
4118 accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
4119 nattr_accessibility * num_devices;
4120
4121 total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
4122 common_attr_size + accessibility_size;
4123
4124 *svm_priv_data_size = *num_svm_ranges * total_size;
4125 }
4126
4127 pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
4128 *svm_priv_data_size);
4129}
4130
4131int kfd_criu_checkpoint_svm(struct kfd_process *p,
4132 uint8_t __user *user_priv_data,
4133 uint64_t *priv_data_offset)
4134{
4135 struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
4136 struct kfd_ioctl_svm_attribute *query_attr = NULL;
4137 uint64_t svm_priv_data_size, query_attr_size = 0;
4138 int index, nattr_common = 4, ret = 0;
4139 struct svm_range_list *svms;
4140 int num_devices = p->n_pdds;
4141 struct svm_range *prange;
4142 struct mm_struct *mm;
4143
4144 svms = &p->svms;
4145
4146 mm = get_task_mm(p->lead_thread);
4147 if (!mm) {
4148 pr_err("failed to get mm for the target process\n");
4149 return -ESRCH;
4150 }
4151
4152 query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
4153 (nattr_common + num_devices);
4154
4155 query_attr = kzalloc(query_attr_size, GFP_KERNEL);
4156 if (!query_attr) {
4157 ret = -ENOMEM;
4158 goto exit;
4159 }
4160
4161 query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
4162 query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
4163 query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
4164 query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
4165
4166 for (index = 0; index < num_devices; index++) {
4167 struct kfd_process_device *pdd = p->pdds[index];
4168
4169 query_attr[index + nattr_common].type =
4170 KFD_IOCTL_SVM_ATTR_ACCESS;
4171 query_attr[index + nattr_common].value = pdd->user_gpu_id;
4172 }
4173
4174 svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
4175
4176 svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
4177 if (!svm_priv) {
4178 ret = -ENOMEM;
4179 goto exit_query;
4180 }
4181
4182 index = 0;
4183 list_for_each_entry(prange, &svms->list, list) {
4184
4185 svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
4186 svm_priv->start_addr = prange->start;
4187 svm_priv->size = prange->npages;
4188 memcpy(&svm_priv->attrs, query_attr, query_attr_size);
4189 pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
4190 prange, prange->start, prange->npages,
4191 prange->start + prange->npages - 1,
4192 prange->npages * PAGE_SIZE);
4193
4194 ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
4195 svm_priv->size,
4196 (nattr_common + num_devices),
4197 svm_priv->attrs);
4198 if (ret) {
4199 pr_err("CRIU: failed to obtain range attributes\n");
4200 goto exit_priv;
4201 }
4202
4203 if (copy_to_user(user_priv_data + *priv_data_offset, svm_priv,
4204 svm_priv_data_size)) {
4205 pr_err("Failed to copy svm priv to user\n");
4206 ret = -EFAULT;
4207 goto exit_priv;
4208 }
4209
4210 *priv_data_offset += svm_priv_data_size;
4211
4212 }
4213
4214
4215exit_priv:
4216 kfree(svm_priv);
4217exit_query:
4218 kfree(query_attr);
4219exit:
4220 mmput(mm);
4221 return ret;
4222}
4223
4224int
4225svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
4226 uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
4227{
4228 struct mm_struct *mm = current->mm;
4229 int r;
4230
4231 start >>= PAGE_SHIFT;
4232 size >>= PAGE_SHIFT;
4233
4234 switch (op) {
4235 case KFD_IOCTL_SVM_OP_SET_ATTR:
4236 r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
4237 break;
4238 case KFD_IOCTL_SVM_OP_GET_ATTR:
4239 r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
4240 break;
4241 default:
4242 r = EINVAL;
4243 break;
4244 }
4245
4246 return r;
4247}
4248