1
2
3
4
5
6
7
8#include <uapi/misc/habanalabs.h>
9#include "habanalabs.h"
10#include "../include/hw_ip/mmu/mmu_general.h"
11
12#include <linux/uaccess.h>
13#include <linux/slab.h>
14
15#define HL_MMU_DEBUG 0
16
17
18#define DRAM_POOL_PAGE_SIZE SZ_8M
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
54 u32 *ret_handle)
55{
56 struct hl_device *hdev = ctx->hdev;
57 struct hl_vm *vm = &hdev->vm;
58 struct hl_vm_phys_pg_pack *phys_pg_pack;
59 u64 paddr = 0, total_size, num_pgs, i;
60 u32 num_curr_pgs, page_size;
61 int handle, rc;
62 bool contiguous;
63
64 num_curr_pgs = 0;
65 page_size = hdev->asic_prop.dram_page_size;
66 num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
67 total_size = num_pgs * page_size;
68
69 if (!total_size) {
70 dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
71 return -EINVAL;
72 }
73
74 contiguous = args->flags & HL_MEM_CONTIGUOUS;
75
76 if (contiguous) {
77 paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
78 if (!paddr) {
79 dev_err(hdev->dev,
80 "failed to allocate %llu contiguous pages with total size of %llu\n",
81 num_pgs, total_size);
82 return -ENOMEM;
83 }
84 }
85
86 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
87 if (!phys_pg_pack) {
88 rc = -ENOMEM;
89 goto pages_pack_err;
90 }
91
92 phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
93 phys_pg_pack->asid = ctx->asid;
94 phys_pg_pack->npages = num_pgs;
95 phys_pg_pack->page_size = page_size;
96 phys_pg_pack->total_size = total_size;
97 phys_pg_pack->flags = args->flags;
98 phys_pg_pack->contiguous = contiguous;
99
100 phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
101 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
102 rc = -ENOMEM;
103 goto pages_arr_err;
104 }
105
106 if (phys_pg_pack->contiguous) {
107 for (i = 0 ; i < num_pgs ; i++)
108 phys_pg_pack->pages[i] = paddr + i * page_size;
109 } else {
110 for (i = 0 ; i < num_pgs ; i++) {
111 phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
112 vm->dram_pg_pool,
113 page_size);
114 if (!phys_pg_pack->pages[i]) {
115 dev_err(hdev->dev,
116 "Failed to allocate device memory (out of memory)\n");
117 rc = -ENOMEM;
118 goto page_err;
119 }
120
121 num_curr_pgs++;
122 }
123 }
124
125 spin_lock(&vm->idr_lock);
126 handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
127 GFP_ATOMIC);
128 spin_unlock(&vm->idr_lock);
129
130 if (handle < 0) {
131 dev_err(hdev->dev, "Failed to get handle for page\n");
132 rc = -EFAULT;
133 goto idr_err;
134 }
135
136 for (i = 0 ; i < num_pgs ; i++)
137 kref_get(&vm->dram_pg_pool_refcount);
138
139 phys_pg_pack->handle = handle;
140
141 atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
142 atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
143
144 *ret_handle = handle;
145
146 return 0;
147
148idr_err:
149page_err:
150 if (!phys_pg_pack->contiguous)
151 for (i = 0 ; i < num_curr_pgs ; i++)
152 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
153 page_size);
154
155 kvfree(phys_pg_pack->pages);
156pages_arr_err:
157 kfree(phys_pg_pack);
158pages_pack_err:
159 if (contiguous)
160 gen_pool_free(vm->dram_pg_pool, paddr, total_size);
161
162 return rc;
163}
164
165
166
167
168
169
170
171
172
173
174
175
176
177static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
178 struct hl_userptr **p_userptr)
179{
180 struct hl_userptr *userptr;
181 int rc;
182
183 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
184 if (!userptr) {
185 rc = -ENOMEM;
186 goto userptr_err;
187 }
188
189 rc = hl_pin_host_memory(hdev, addr, size, userptr);
190 if (rc) {
191 dev_err(hdev->dev, "Failed to pin host memory\n");
192 goto pin_err;
193 }
194
195 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
196 userptr->sgt->nents, DMA_BIDIRECTIONAL);
197 if (rc) {
198 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
199 goto dma_map_err;
200 }
201
202 userptr->dma_mapped = true;
203 userptr->dir = DMA_BIDIRECTIONAL;
204 userptr->vm_type = VM_TYPE_USERPTR;
205
206 *p_userptr = userptr;
207
208 return 0;
209
210dma_map_err:
211 hl_unpin_host_memory(hdev, userptr);
212pin_err:
213 kfree(userptr);
214userptr_err:
215
216 return rc;
217}
218
219
220
221
222
223
224
225
226
227
228static void dma_unmap_host_va(struct hl_device *hdev,
229 struct hl_userptr *userptr)
230{
231 hl_unpin_host_memory(hdev, userptr);
232 kfree(userptr);
233}
234
235
236
237
238
239
240
241
242
243static void dram_pg_pool_do_release(struct kref *ref)
244{
245 struct hl_vm *vm = container_of(ref, struct hl_vm,
246 dram_pg_pool_refcount);
247
248
249
250
251
252 idr_destroy(&vm->phys_pg_pack_handles);
253 gen_pool_destroy(vm->dram_pg_pool);
254}
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270static int free_phys_pg_pack(struct hl_device *hdev,
271 struct hl_vm_phys_pg_pack *phys_pg_pack)
272{
273 struct hl_vm *vm = &hdev->vm;
274 u64 i;
275 int rc = 0;
276
277 if (phys_pg_pack->created_from_userptr)
278 goto end;
279
280 if (phys_pg_pack->contiguous) {
281 if (hdev->memory_scrub && !hdev->disabled) {
282 rc = hdev->asic_funcs->scrub_device_mem(hdev,
283 phys_pg_pack->pages[0],
284 phys_pg_pack->total_size);
285 if (rc)
286 dev_err(hdev->dev,
287 "Failed to scrub contiguous device memory\n");
288 }
289
290 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
291 phys_pg_pack->total_size);
292
293 for (i = 0; i < phys_pg_pack->npages ; i++)
294 kref_put(&vm->dram_pg_pool_refcount,
295 dram_pg_pool_do_release);
296 } else {
297 for (i = 0 ; i < phys_pg_pack->npages ; i++) {
298 if (hdev->memory_scrub && !hdev->disabled && rc == 0) {
299 rc = hdev->asic_funcs->scrub_device_mem(
300 hdev,
301 phys_pg_pack->pages[i],
302 phys_pg_pack->page_size);
303 if (rc)
304 dev_err(hdev->dev,
305 "Failed to scrub device memory\n");
306 }
307 gen_pool_free(vm->dram_pg_pool,
308 phys_pg_pack->pages[i],
309 phys_pg_pack->page_size);
310 kref_put(&vm->dram_pg_pool_refcount,
311 dram_pg_pool_do_release);
312 }
313 }
314
315 if (rc && !hdev->disabled)
316 hl_device_reset(hdev, HL_RESET_HARD);
317
318end:
319 kvfree(phys_pg_pack->pages);
320 kfree(phys_pg_pack);
321
322 return rc;
323}
324
325
326
327
328
329
330
331
332
333static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
334{
335 struct hl_device *hdev = ctx->hdev;
336 struct hl_vm *vm = &hdev->vm;
337 struct hl_vm_phys_pg_pack *phys_pg_pack;
338 u32 handle = args->free.handle;
339
340 spin_lock(&vm->idr_lock);
341 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
342 if (phys_pg_pack) {
343 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
344 dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
345 handle);
346 spin_unlock(&vm->idr_lock);
347 return -EINVAL;
348 }
349
350
351
352
353
354
355 idr_remove(&vm->phys_pg_pack_handles, handle);
356 spin_unlock(&vm->idr_lock);
357
358 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
359 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
360
361 return free_phys_pg_pack(hdev, phys_pg_pack);
362 } else {
363 spin_unlock(&vm->idr_lock);
364 dev_err(hdev->dev,
365 "free device memory failed, no match for handle %u\n",
366 handle);
367 return -EINVAL;
368 }
369
370 return 0;
371}
372
373
374
375
376
377
378
379
380
381
382
383static void clear_va_list_locked(struct hl_device *hdev,
384 struct list_head *va_list)
385{
386 struct hl_vm_va_block *va_block, *tmp;
387
388 list_for_each_entry_safe(va_block, tmp, va_list, node) {
389 list_del(&va_block->node);
390 kfree(va_block);
391 }
392}
393
394
395
396
397
398
399
400
401
402
403
404static void print_va_list_locked(struct hl_device *hdev,
405 struct list_head *va_list)
406{
407#if HL_MMU_DEBUG
408 struct hl_vm_va_block *va_block;
409
410 dev_dbg(hdev->dev, "print va list:\n");
411
412 list_for_each_entry(va_block, va_list, node)
413 dev_dbg(hdev->dev,
414 "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
415 va_block->start, va_block->end, va_block->size);
416#endif
417}
418
419
420
421
422
423
424
425
426
427
428
429
430
431static void merge_va_blocks_locked(struct hl_device *hdev,
432 struct list_head *va_list, struct hl_vm_va_block *va_block)
433{
434 struct hl_vm_va_block *prev, *next;
435
436 prev = list_prev_entry(va_block, node);
437 if (&prev->node != va_list && prev->end + 1 == va_block->start) {
438 prev->end = va_block->end;
439 prev->size = prev->end - prev->start;
440 list_del(&va_block->node);
441 kfree(va_block);
442 va_block = prev;
443 }
444
445 next = list_next_entry(va_block, node);
446 if (&next->node != va_list && va_block->end + 1 == next->start) {
447 next->start = va_block->start;
448 next->size = next->end - next->start;
449 list_del(&va_block->node);
450 kfree(va_block);
451 }
452}
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467static int add_va_block_locked(struct hl_device *hdev,
468 struct list_head *va_list, u64 start, u64 end)
469{
470 struct hl_vm_va_block *va_block, *res = NULL;
471 u64 size = end - start;
472
473 print_va_list_locked(hdev, va_list);
474
475 list_for_each_entry(va_block, va_list, node) {
476
477 if (hl_mem_area_crosses_range(start, size, va_block->start,
478 va_block->end)) {
479 dev_err(hdev->dev,
480 "block crossing ranges at start 0x%llx, end 0x%llx\n",
481 va_block->start, va_block->end);
482 return -EINVAL;
483 }
484
485 if (va_block->end < start)
486 res = va_block;
487 }
488
489 va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
490 if (!va_block)
491 return -ENOMEM;
492
493 va_block->start = start;
494 va_block->end = end;
495 va_block->size = size;
496
497 if (!res)
498 list_add(&va_block->node, va_list);
499 else
500 list_add(&va_block->node, &res->node);
501
502 merge_va_blocks_locked(hdev, va_list, va_block);
503
504 print_va_list_locked(hdev, va_list);
505
506 return 0;
507}
508
509
510
511
512
513
514
515
516
517
518
519static inline int add_va_block(struct hl_device *hdev,
520 struct hl_va_range *va_range, u64 start, u64 end)
521{
522 int rc;
523
524 mutex_lock(&va_range->lock);
525 rc = add_va_block_locked(hdev, &va_range->list, start, end);
526 mutex_unlock(&va_range->lock);
527
528 return rc;
529}
530
531
532
533
534
535static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
536 u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
537 bool range_cross;
538
539 if (range_type == HL_VA_RANGE_TYPE_DRAM)
540 range_cross =
541 hl_mem_area_crosses_range(start_addr, size,
542 prop->hints_dram_reserved_va_range.start_addr,
543 prop->hints_dram_reserved_va_range.end_addr);
544 else if (range_type == HL_VA_RANGE_TYPE_HOST)
545 range_cross =
546 hl_mem_area_crosses_range(start_addr, size,
547 prop->hints_host_reserved_va_range.start_addr,
548 prop->hints_host_reserved_va_range.end_addr);
549 else
550 range_cross =
551 hl_mem_area_crosses_range(start_addr, size,
552 prop->hints_host_hpage_reserved_va_range.start_addr,
553 prop->hints_host_hpage_reserved_va_range.end_addr);
554
555 return range_cross;
556}
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575static u64 get_va_block(struct hl_device *hdev,
576 struct hl_va_range *va_range,
577 u64 size, u64 hint_addr, u32 va_block_align,
578 enum hl_va_range_type range_type,
579 u32 flags)
580{
581 struct hl_vm_va_block *va_block, *new_va_block = NULL;
582 struct asic_fixed_properties *prop = &hdev->asic_prop;
583 u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
584 align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
585 dram_hint_mask = prop->dram_hints_align_mask;
586 bool add_prev = false;
587 bool is_align_pow_2 = is_power_of_2(va_range->page_size);
588 bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
589 bool force_hint = flags & HL_MEM_FORCE_HINT;
590
591 if (is_align_pow_2)
592 align_mask = ~((u64)va_block_align - 1);
593 else
594
595
596
597
598
599 size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
600 va_range->page_size;
601
602 tmp_hint_addr = hint_addr & ~dram_hint_mask;
603
604
605 if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
606 (!is_align_pow_2 && is_hint_dram_addr &&
607 do_div(tmp_hint_addr, va_range->page_size))) {
608
609 if (force_hint) {
610
611 dev_err(hdev->dev,
612 "Hint address 0x%llx is not page aligned - cannot be respected\n",
613 hint_addr);
614 return 0;
615 }
616
617 dev_dbg(hdev->dev,
618 "Hint address 0x%llx will be ignored because it is not aligned\n",
619 hint_addr);
620 hint_addr = 0;
621 }
622
623 mutex_lock(&va_range->lock);
624
625 print_va_list_locked(hdev, &va_range->list);
626
627 list_for_each_entry(va_block, &va_range->list, node) {
628
629 valid_start = va_block->start;
630
631 if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
632 valid_start &= align_mask;
633 valid_start += va_block_align;
634 if (valid_start > va_block->end)
635 continue;
636 }
637
638 valid_size = va_block->end - valid_start;
639 if (valid_size < size)
640 continue;
641
642
643
644
645
646
647 if (prop->hints_range_reservation && !hint_addr)
648 if (is_hint_crossing_range(range_type, valid_start,
649 size, prop))
650 continue;
651
652
653 if (!new_va_block || (valid_size < reserved_valid_size)) {
654 new_va_block = va_block;
655 reserved_valid_start = valid_start;
656 reserved_valid_size = valid_size;
657 }
658
659 if (hint_addr && hint_addr >= valid_start &&
660 (hint_addr + size) <= va_block->end) {
661 new_va_block = va_block;
662 reserved_valid_start = hint_addr;
663 reserved_valid_size = valid_size;
664 break;
665 }
666 }
667
668 if (!new_va_block) {
669 dev_err(hdev->dev, "no available va block for size %llu\n",
670 size);
671 goto out;
672 }
673
674 if (force_hint && reserved_valid_start != hint_addr) {
675
676
677
678 dev_err(hdev->dev,
679 "Hint address 0x%llx could not be respected\n",
680 hint_addr);
681 reserved_valid_start = 0;
682 goto out;
683 }
684
685
686
687
688
689 if (reserved_valid_start > new_va_block->start) {
690 prev_start = new_va_block->start;
691 prev_end = reserved_valid_start - 1;
692
693 new_va_block->start = reserved_valid_start;
694 new_va_block->size = reserved_valid_size;
695
696 add_prev = true;
697 }
698
699 if (new_va_block->size > size) {
700 new_va_block->start += size;
701 new_va_block->size = new_va_block->end - new_va_block->start;
702 } else {
703 list_del(&new_va_block->node);
704 kfree(new_va_block);
705 }
706
707 if (add_prev)
708 add_va_block_locked(hdev, &va_range->list, prev_start,
709 prev_end);
710
711 print_va_list_locked(hdev, &va_range->list);
712out:
713 mutex_unlock(&va_range->lock);
714
715 return reserved_valid_start;
716}
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
734 enum hl_va_range_type type, u32 size, u32 alignment)
735{
736 return get_va_block(hdev, ctx->va_range[type], size, 0,
737 max(alignment, ctx->va_range[type]->page_size),
738 type, 0);
739}
740
741
742
743
744
745
746
747
748
749static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
750 enum hl_va_range_type *type)
751{
752 int i;
753
754 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) {
755 if (hl_mem_area_inside_range(address, size,
756 ctx->va_range[i]->start_addr,
757 ctx->va_range[i]->end_addr)) {
758 *type = i;
759 return 0;
760 }
761 }
762
763 return -EINVAL;
764}
765
766
767
768
769
770
771
772
773
774
775
776int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
777 u64 start_addr, u64 size)
778{
779 enum hl_va_range_type type;
780 int rc;
781
782 rc = hl_get_va_range_type(ctx, start_addr, size, &type);
783 if (rc) {
784 dev_err(hdev->dev,
785 "cannot find va_range for va %#llx size %llu",
786 start_addr, size);
787 return rc;
788 }
789
790 rc = add_va_block(hdev, ctx->va_range[type], start_addr,
791 start_addr + size - 1);
792 if (rc)
793 dev_warn(hdev->dev,
794 "add va block failed for vaddr: 0x%llx\n", start_addr);
795
796 return rc;
797}
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
816 struct hl_userptr *userptr,
817 struct hl_vm_phys_pg_pack **pphys_pg_pack,
818 bool force_regular_page)
819{
820 u32 npages, page_size = PAGE_SIZE,
821 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
822 u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
823 struct hl_vm_phys_pg_pack *phys_pg_pack;
824 bool first = true, is_huge_page_opt;
825 u64 page_mask, total_npages;
826 struct scatterlist *sg;
827 dma_addr_t dma_addr;
828 int rc, i, j;
829
830 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
831 if (!phys_pg_pack)
832 return -ENOMEM;
833
834 phys_pg_pack->vm_type = userptr->vm_type;
835 phys_pg_pack->created_from_userptr = true;
836 phys_pg_pack->asid = ctx->asid;
837 atomic_set(&phys_pg_pack->mapping_cnt, 1);
838
839 is_huge_page_opt = (force_regular_page ? false : true);
840
841
842
843
844
845
846
847 total_npages = 0;
848 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
849 npages = hl_get_sg_info(sg, &dma_addr);
850
851 total_npages += npages;
852
853 if ((npages % pgs_in_huge_page) ||
854 (dma_addr & (huge_page_size - 1)))
855 is_huge_page_opt = false;
856 }
857
858 if (is_huge_page_opt) {
859 page_size = huge_page_size;
860 do_div(total_npages, pgs_in_huge_page);
861 }
862
863 page_mask = ~(((u64) page_size) - 1);
864
865 phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
866 GFP_KERNEL);
867 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
868 rc = -ENOMEM;
869 goto page_pack_arr_mem_err;
870 }
871
872 phys_pg_pack->npages = total_npages;
873 phys_pg_pack->page_size = page_size;
874 phys_pg_pack->total_size = total_npages * page_size;
875
876 j = 0;
877 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
878 npages = hl_get_sg_info(sg, &dma_addr);
879
880
881 if (first) {
882 first = false;
883 phys_pg_pack->offset = dma_addr & (page_size - 1);
884 dma_addr &= page_mask;
885 }
886
887 while (npages) {
888 phys_pg_pack->pages[j++] = dma_addr;
889 dma_addr += page_size;
890
891 if (is_huge_page_opt)
892 npages -= pgs_in_huge_page;
893 else
894 npages--;
895 }
896 }
897
898 *pphys_pg_pack = phys_pg_pack;
899
900 return 0;
901
902page_pack_arr_mem_err:
903 kfree(phys_pg_pack);
904
905 return rc;
906}
907
908
909
910
911
912
913
914
915
916
917
918
919static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
920 struct hl_vm_phys_pg_pack *phys_pg_pack)
921{
922 struct hl_device *hdev = ctx->hdev;
923 u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
924 u32 page_size = phys_pg_pack->page_size;
925 int rc = 0;
926 bool is_host_addr;
927
928 for (i = 0 ; i < phys_pg_pack->npages ; i++) {
929 paddr = phys_pg_pack->pages[i];
930
931 rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
932 (i + 1) == phys_pg_pack->npages);
933 if (rc) {
934 dev_err(hdev->dev,
935 "map failed for handle %u, npages: %llu, mapped: %llu",
936 phys_pg_pack->handle, phys_pg_pack->npages,
937 mapped_pg_cnt);
938 goto err;
939 }
940
941 mapped_pg_cnt++;
942 next_vaddr += page_size;
943 }
944
945 return 0;
946
947err:
948 is_host_addr = !hl_is_dram_va(hdev, vaddr);
949
950 next_vaddr = vaddr;
951 for (i = 0 ; i < mapped_pg_cnt ; i++) {
952 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
953 (i + 1) == mapped_pg_cnt))
954 dev_warn_ratelimited(hdev->dev,
955 "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
956 phys_pg_pack->handle, next_vaddr,
957 phys_pg_pack->pages[i], page_size);
958
959 next_vaddr += page_size;
960
961
962
963
964
965
966
967
968
969 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
970 usleep_range(50, 200);
971 }
972
973 return rc;
974}
975
976
977
978
979
980
981
982static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
983 struct hl_vm_phys_pg_pack *phys_pg_pack)
984{
985 struct hl_device *hdev = ctx->hdev;
986 u64 next_vaddr, i;
987 bool is_host_addr;
988 u32 page_size;
989
990 is_host_addr = !hl_is_dram_va(hdev, vaddr);
991 page_size = phys_pg_pack->page_size;
992 next_vaddr = vaddr;
993
994 for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
995 if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
996 (i + 1) == phys_pg_pack->npages))
997 dev_warn_ratelimited(hdev->dev,
998 "unmap failed for vaddr: 0x%llx\n", next_vaddr);
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
1009 usleep_range(50, 200);
1010 }
1011}
1012
1013static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
1014 u64 *paddr)
1015{
1016 struct hl_device *hdev = ctx->hdev;
1017 struct hl_vm *vm = &hdev->vm;
1018 struct hl_vm_phys_pg_pack *phys_pg_pack;
1019 u32 handle;
1020
1021 handle = lower_32_bits(args->map_device.handle);
1022 spin_lock(&vm->idr_lock);
1023 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
1024 if (!phys_pg_pack) {
1025 spin_unlock(&vm->idr_lock);
1026 dev_err(hdev->dev, "no match for handle %u\n", handle);
1027 return -EINVAL;
1028 }
1029
1030 *paddr = phys_pg_pack->pages[0];
1031
1032 spin_unlock(&vm->idr_lock);
1033
1034 return 0;
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
1051 u64 *device_addr)
1052{
1053 struct hl_device *hdev = ctx->hdev;
1054 struct hl_vm *vm = &hdev->vm;
1055 struct hl_vm_phys_pg_pack *phys_pg_pack;
1056 struct hl_userptr *userptr = NULL;
1057 struct hl_vm_hash_node *hnode;
1058 struct hl_va_range *va_range;
1059 enum vm_type *vm_type;
1060 u64 ret_vaddr, hint_addr;
1061 u32 handle = 0, va_block_align;
1062 int rc;
1063 bool is_userptr = args->flags & HL_MEM_USERPTR;
1064 enum hl_va_range_type va_range_type = 0;
1065
1066
1067 *device_addr = 0;
1068
1069 if (is_userptr) {
1070 u64 addr = args->map_host.host_virt_addr,
1071 size = args->map_host.mem_size;
1072 u32 page_size = hdev->asic_prop.pmmu.page_size,
1073 huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
1074
1075 rc = dma_map_host_va(hdev, addr, size, &userptr);
1076 if (rc) {
1077 dev_err(hdev->dev, "failed to get userptr from va\n");
1078 return rc;
1079 }
1080
1081 rc = init_phys_pg_pack_from_userptr(ctx, userptr,
1082 &phys_pg_pack, false);
1083 if (rc) {
1084 dev_err(hdev->dev,
1085 "unable to init page pack for vaddr 0x%llx\n",
1086 addr);
1087 goto init_page_pack_err;
1088 }
1089
1090 vm_type = (enum vm_type *) userptr;
1091 hint_addr = args->map_host.hint_addr;
1092 handle = phys_pg_pack->handle;
1093
1094
1095 if (phys_pg_pack->page_size == page_size) {
1096 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1097 va_range_type = HL_VA_RANGE_TYPE_HOST;
1098
1099
1100
1101
1102 if (addr & (huge_page_size - 1))
1103 va_block_align = page_size;
1104 else
1105 va_block_align = huge_page_size;
1106 } else {
1107
1108
1109
1110
1111 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
1112 va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
1113 va_block_align = huge_page_size;
1114 }
1115 } else {
1116 handle = lower_32_bits(args->map_device.handle);
1117
1118 spin_lock(&vm->idr_lock);
1119 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
1120 if (!phys_pg_pack) {
1121 spin_unlock(&vm->idr_lock);
1122 dev_err(hdev->dev,
1123 "no match for handle %u\n", handle);
1124 return -EINVAL;
1125 }
1126
1127
1128 atomic_inc(&phys_pg_pack->mapping_cnt);
1129
1130 spin_unlock(&vm->idr_lock);
1131
1132 vm_type = (enum vm_type *) phys_pg_pack;
1133
1134 hint_addr = args->map_device.hint_addr;
1135
1136
1137 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
1138 va_range_type = HL_VA_RANGE_TYPE_DRAM;
1139 va_block_align = hdev->asic_prop.dmmu.page_size;
1140 }
1141
1142
1143
1144
1145
1146 if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
1147 phys_pg_pack->asid != ctx->asid) {
1148 dev_err(hdev->dev,
1149 "Failed to map memory, handle %u is not shared\n",
1150 handle);
1151 rc = -EPERM;
1152 goto shared_err;
1153 }
1154
1155 hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
1156 if (!hnode) {
1157 rc = -ENOMEM;
1158 goto hnode_err;
1159 }
1160
1161 if (hint_addr && phys_pg_pack->offset) {
1162 if (args->flags & HL_MEM_FORCE_HINT) {
1163
1164 dev_err(hdev->dev,
1165 "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
1166 hint_addr, phys_pg_pack->offset);
1167 rc = -EINVAL;
1168 goto va_block_err;
1169 }
1170 dev_dbg(hdev->dev,
1171 "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
1172 hint_addr, phys_pg_pack->offset);
1173 }
1174
1175 ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
1176 hint_addr, va_block_align,
1177 va_range_type, args->flags);
1178 if (!ret_vaddr) {
1179 dev_err(hdev->dev, "no available va block for handle %u\n",
1180 handle);
1181 rc = -ENOMEM;
1182 goto va_block_err;
1183 }
1184
1185 mutex_lock(&ctx->mmu_lock);
1186
1187 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
1188 if (rc) {
1189 mutex_unlock(&ctx->mmu_lock);
1190 dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
1191 handle);
1192 goto map_err;
1193 }
1194
1195 rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
1196 *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
1197
1198 mutex_unlock(&ctx->mmu_lock);
1199
1200 if (rc) {
1201 dev_err(hdev->dev,
1202 "mapping handle %u failed due to MMU cache invalidation\n",
1203 handle);
1204 goto map_err;
1205 }
1206
1207 ret_vaddr += phys_pg_pack->offset;
1208
1209 hnode->ptr = vm_type;
1210 hnode->vaddr = ret_vaddr;
1211
1212 mutex_lock(&ctx->mem_hash_lock);
1213 hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
1214 mutex_unlock(&ctx->mem_hash_lock);
1215
1216 *device_addr = ret_vaddr;
1217
1218 if (is_userptr)
1219 rc = free_phys_pg_pack(hdev, phys_pg_pack);
1220
1221 return rc;
1222
1223map_err:
1224 if (add_va_block(hdev, va_range, ret_vaddr,
1225 ret_vaddr + phys_pg_pack->total_size - 1))
1226 dev_warn(hdev->dev,
1227 "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
1228 handle, ret_vaddr);
1229
1230va_block_err:
1231 kfree(hnode);
1232hnode_err:
1233shared_err:
1234 atomic_dec(&phys_pg_pack->mapping_cnt);
1235 if (is_userptr)
1236 free_phys_pg_pack(hdev, phys_pg_pack);
1237init_page_pack_err:
1238 if (is_userptr)
1239 dma_unmap_host_va(hdev, userptr);
1240
1241 return rc;
1242}
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
1255 bool ctx_free)
1256{
1257 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
1258 u64 vaddr = args->unmap.device_virt_addr;
1259 struct hl_vm_hash_node *hnode = NULL;
1260 struct asic_fixed_properties *prop;
1261 struct hl_device *hdev = ctx->hdev;
1262 struct hl_userptr *userptr = NULL;
1263 struct hl_va_range *va_range;
1264 enum vm_type *vm_type;
1265 bool is_userptr;
1266 int rc = 0;
1267
1268 prop = &hdev->asic_prop;
1269
1270
1271 mutex_lock(&ctx->mem_hash_lock);
1272 hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
1273 if (vaddr == hnode->vaddr)
1274 break;
1275
1276 if (!hnode) {
1277 mutex_unlock(&ctx->mem_hash_lock);
1278 dev_err(hdev->dev,
1279 "unmap failed, no mem hnode for vaddr 0x%llx\n",
1280 vaddr);
1281 return -EINVAL;
1282 }
1283
1284 hash_del(&hnode->node);
1285 mutex_unlock(&ctx->mem_hash_lock);
1286
1287 vm_type = hnode->ptr;
1288
1289 if (*vm_type == VM_TYPE_USERPTR) {
1290 is_userptr = true;
1291 userptr = hnode->ptr;
1292
1293 rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
1294 false);
1295 if (rc) {
1296 dev_err(hdev->dev,
1297 "unable to init page pack for vaddr 0x%llx\n",
1298 vaddr);
1299 goto vm_type_err;
1300 }
1301
1302 if (phys_pg_pack->page_size ==
1303 hdev->asic_prop.pmmu.page_size)
1304 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1305 else
1306 va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
1307 } else if (*vm_type == VM_TYPE_PHYS_PACK) {
1308 is_userptr = false;
1309 va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
1310 phys_pg_pack = hnode->ptr;
1311 } else {
1312 dev_warn(hdev->dev,
1313 "unmap failed, unknown vm desc for vaddr 0x%llx\n",
1314 vaddr);
1315 rc = -EFAULT;
1316 goto vm_type_err;
1317 }
1318
1319 if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
1320 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
1321 rc = -EINVAL;
1322 goto mapping_cnt_err;
1323 }
1324
1325 if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
1326 vaddr = prop->dram_base_address +
1327 DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
1328 phys_pg_pack->page_size) *
1329 phys_pg_pack->page_size;
1330 else
1331 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
1332
1333 mutex_lock(&ctx->mmu_lock);
1334
1335 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
1336
1337
1338
1339
1340
1341
1342 if (!ctx_free)
1343 rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
1344 *vm_type, ctx->asid, vaddr,
1345 phys_pg_pack->total_size);
1346
1347 mutex_unlock(&ctx->mmu_lock);
1348
1349
1350
1351
1352
1353
1354
1355 if (!ctx_free) {
1356 int tmp_rc;
1357
1358 if (rc)
1359 dev_err(hdev->dev,
1360 "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
1361 vaddr);
1362
1363 tmp_rc = add_va_block(hdev, va_range, vaddr,
1364 vaddr + phys_pg_pack->total_size - 1);
1365 if (tmp_rc) {
1366 dev_warn(hdev->dev,
1367 "add va block failed for vaddr: 0x%llx\n",
1368 vaddr);
1369 if (!rc)
1370 rc = tmp_rc;
1371 }
1372 }
1373
1374 atomic_dec(&phys_pg_pack->mapping_cnt);
1375 kfree(hnode);
1376
1377 if (is_userptr) {
1378 free_phys_pg_pack(hdev, phys_pg_pack);
1379 dma_unmap_host_va(hdev, userptr);
1380 }
1381
1382 return rc;
1383
1384mapping_cnt_err:
1385 if (is_userptr)
1386 free_phys_pg_pack(hdev, phys_pg_pack);
1387vm_type_err:
1388 mutex_lock(&ctx->mem_hash_lock);
1389 hash_add(ctx->mem_hash, &hnode->node, vaddr);
1390 mutex_unlock(&ctx->mem_hash_lock);
1391
1392 return rc;
1393}
1394
1395static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
1396 u32 *size)
1397{
1398 u32 block_id = 0;
1399 int rc;
1400
1401 rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
1402
1403 *handle = block_id | HL_MMAP_TYPE_BLOCK;
1404 *handle <<= PAGE_SHIFT;
1405
1406 return rc;
1407}
1408
1409static void hw_block_vm_close(struct vm_area_struct *vma)
1410{
1411 struct hl_vm_hw_block_list_node *lnode =
1412 (struct hl_vm_hw_block_list_node *) vma->vm_private_data;
1413 struct hl_ctx *ctx = lnode->ctx;
1414
1415 mutex_lock(&ctx->hw_block_list_lock);
1416 list_del(&lnode->node);
1417 mutex_unlock(&ctx->hw_block_list_lock);
1418 hl_ctx_put(ctx);
1419 kfree(lnode);
1420 vma->vm_private_data = NULL;
1421}
1422
1423static const struct vm_operations_struct hw_block_vm_ops = {
1424 .close = hw_block_vm_close
1425};
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
1436{
1437 struct hl_vm_hw_block_list_node *lnode;
1438 struct hl_device *hdev = hpriv->hdev;
1439 struct hl_ctx *ctx = hpriv->ctx;
1440 u32 block_id, block_size;
1441 int rc;
1442
1443
1444
1445
1446 block_id = vma->vm_pgoff;
1447 vma->vm_pgoff = 0;
1448
1449
1450 block_size = vma->vm_end - vma->vm_start;
1451
1452 if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
1453 dev_err(hdev->dev,
1454 "user pointer is invalid - 0x%lx\n",
1455 vma->vm_start);
1456
1457 return -EINVAL;
1458 }
1459
1460 lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
1461 if (!lnode)
1462 return -ENOMEM;
1463
1464 vma->vm_ops = &hw_block_vm_ops;
1465 vma->vm_private_data = lnode;
1466
1467 hl_ctx_get(hdev, ctx);
1468
1469 rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
1470 if (rc) {
1471 hl_ctx_put(ctx);
1472 kfree(lnode);
1473 return rc;
1474 }
1475
1476 lnode->ctx = ctx;
1477 lnode->vaddr = vma->vm_start;
1478 lnode->size = block_size;
1479 lnode->id = block_id;
1480
1481 mutex_lock(&ctx->hw_block_list_lock);
1482 list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
1483 mutex_unlock(&ctx->hw_block_list_lock);
1484
1485 vma->vm_pgoff = block_id;
1486
1487 return 0;
1488}
1489
1490static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
1491{
1492 struct hl_device *hdev = hpriv->hdev;
1493 struct hl_ctx *ctx = hpriv->ctx;
1494 u64 block_handle, device_addr = 0;
1495 u32 handle = 0, block_size;
1496 int rc;
1497
1498 switch (args->in.op) {
1499 case HL_MEM_OP_ALLOC:
1500 if (args->in.alloc.mem_size == 0) {
1501 dev_err(hdev->dev,
1502 "alloc size must be larger than 0\n");
1503 rc = -EINVAL;
1504 goto out;
1505 }
1506
1507
1508
1509
1510 args->in.flags |= HL_MEM_CONTIGUOUS;
1511 rc = alloc_device_memory(ctx, &args->in, &handle);
1512
1513 memset(args, 0, sizeof(*args));
1514 args->out.handle = (__u64) handle;
1515 break;
1516
1517 case HL_MEM_OP_FREE:
1518 rc = free_device_memory(ctx, &args->in);
1519 break;
1520
1521 case HL_MEM_OP_MAP:
1522 if (args->in.flags & HL_MEM_USERPTR) {
1523 device_addr = args->in.map_host.host_virt_addr;
1524 rc = 0;
1525 } else {
1526 rc = get_paddr_from_handle(ctx, &args->in,
1527 &device_addr);
1528 }
1529
1530 memset(args, 0, sizeof(*args));
1531 args->out.device_virt_addr = device_addr;
1532 break;
1533
1534 case HL_MEM_OP_UNMAP:
1535 rc = 0;
1536 break;
1537
1538 case HL_MEM_OP_MAP_BLOCK:
1539 rc = map_block(hdev, args->in.map_block.block_addr,
1540 &block_handle, &block_size);
1541 args->out.block_handle = block_handle;
1542 args->out.block_size = block_size;
1543 break;
1544
1545 default:
1546 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1547 rc = -ENOTTY;
1548 break;
1549 }
1550
1551out:
1552 return rc;
1553}
1554
1555int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
1556{
1557 enum hl_device_status status;
1558 union hl_mem_args *args = data;
1559 struct hl_device *hdev = hpriv->hdev;
1560 struct hl_ctx *ctx = hpriv->ctx;
1561 u64 block_handle, device_addr = 0;
1562 u32 handle = 0, block_size;
1563 int rc;
1564
1565 if (!hl_device_operational(hdev, &status)) {
1566 dev_warn_ratelimited(hdev->dev,
1567 "Device is %s. Can't execute MEMORY IOCTL\n",
1568 hdev->status[status]);
1569 return -EBUSY;
1570 }
1571
1572 if (!hdev->mmu_enable)
1573 return mem_ioctl_no_mmu(hpriv, args);
1574
1575 switch (args->in.op) {
1576 case HL_MEM_OP_ALLOC:
1577 if (args->in.alloc.mem_size == 0) {
1578 dev_err(hdev->dev,
1579 "alloc size must be larger than 0\n");
1580 rc = -EINVAL;
1581 goto out;
1582 }
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592 if (!hdev->asic_prop.dram_supports_virtual_memory) {
1593 atomic64_add(args->in.alloc.mem_size,
1594 &ctx->dram_phys_mem);
1595 atomic64_add(args->in.alloc.mem_size,
1596 &hdev->dram_used_mem);
1597
1598 dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
1599 rc = 0;
1600
1601 memset(args, 0, sizeof(*args));
1602 args->out.handle = 0;
1603 goto out;
1604 }
1605
1606 rc = alloc_device_memory(ctx, &args->in, &handle);
1607
1608 memset(args, 0, sizeof(*args));
1609 args->out.handle = (__u64) handle;
1610 break;
1611
1612 case HL_MEM_OP_FREE:
1613
1614
1615
1616
1617
1618
1619
1620
1621 if (!hdev->asic_prop.dram_supports_virtual_memory) {
1622 atomic64_sub(args->in.alloc.mem_size,
1623 &ctx->dram_phys_mem);
1624 atomic64_sub(args->in.alloc.mem_size,
1625 &hdev->dram_used_mem);
1626
1627 dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
1628 rc = 0;
1629
1630 goto out;
1631 }
1632
1633 rc = free_device_memory(ctx, &args->in);
1634 break;
1635
1636 case HL_MEM_OP_MAP:
1637 rc = map_device_va(ctx, &args->in, &device_addr);
1638
1639 memset(args, 0, sizeof(*args));
1640 args->out.device_virt_addr = device_addr;
1641 break;
1642
1643 case HL_MEM_OP_UNMAP:
1644 rc = unmap_device_va(ctx, &args->in, false);
1645 break;
1646
1647 case HL_MEM_OP_MAP_BLOCK:
1648 rc = map_block(hdev, args->in.map_block.block_addr,
1649 &block_handle, &block_size);
1650 args->out.block_handle = block_handle;
1651 args->out.block_size = block_size;
1652 break;
1653
1654 default:
1655 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
1656 rc = -ENOTTY;
1657 break;
1658 }
1659
1660out:
1661 return rc;
1662}
1663
1664static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
1665 u32 npages, u64 start, u32 offset,
1666 struct hl_userptr *userptr)
1667{
1668 int rc;
1669
1670 if (!access_ok((void __user *) (uintptr_t) addr, size)) {
1671 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
1672 return -EFAULT;
1673 }
1674
1675 userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
1676 GFP_KERNEL);
1677 if (!userptr->pages)
1678 return -ENOMEM;
1679
1680 rc = pin_user_pages_fast(start, npages,
1681 FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
1682 userptr->pages);
1683
1684 if (rc != npages) {
1685 dev_err(hdev->dev,
1686 "Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n",
1687 rc, addr, size, npages);
1688 if (rc < 0)
1689 goto destroy_pages;
1690 npages = rc;
1691 rc = -EFAULT;
1692 goto put_pages;
1693 }
1694 userptr->npages = npages;
1695
1696 rc = sg_alloc_table_from_pages(userptr->sgt,
1697 userptr->pages,
1698 npages, offset, size, GFP_KERNEL);
1699 if (rc < 0) {
1700 dev_err(hdev->dev, "failed to create SG table from pages\n");
1701 goto put_pages;
1702 }
1703
1704 return 0;
1705
1706put_pages:
1707 unpin_user_pages(userptr->pages, npages);
1708destroy_pages:
1709 kvfree(userptr->pages);
1710 return rc;
1711}
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
1725 struct hl_userptr *userptr)
1726{
1727 u64 start, end;
1728 u32 npages, offset;
1729 int rc;
1730
1731 if (!size) {
1732 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
1733 return -EINVAL;
1734 }
1735
1736
1737
1738
1739
1740 if (((addr + size) < addr) ||
1741 PAGE_ALIGN(addr + size) < (addr + size)) {
1742 dev_err(hdev->dev,
1743 "user pointer 0x%llx + %llu causes integer overflow\n",
1744 addr, size);
1745 return -EINVAL;
1746 }
1747
1748 userptr->pid = current->pid;
1749 userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
1750 if (!userptr->sgt)
1751 return -ENOMEM;
1752
1753 start = addr & PAGE_MASK;
1754 offset = addr & ~PAGE_MASK;
1755 end = PAGE_ALIGN(addr + size);
1756 npages = (end - start) >> PAGE_SHIFT;
1757
1758 userptr->size = size;
1759 userptr->addr = addr;
1760 userptr->dma_mapped = false;
1761 INIT_LIST_HEAD(&userptr->job_node);
1762
1763 rc = get_user_memory(hdev, addr, size, npages, start, offset,
1764 userptr);
1765 if (rc) {
1766 dev_err(hdev->dev,
1767 "failed to get user memory for address 0x%llx\n",
1768 addr);
1769 goto free_sgt;
1770 }
1771
1772 hl_debugfs_add_userptr(hdev, userptr);
1773
1774 return 0;
1775
1776free_sgt:
1777 kfree(userptr->sgt);
1778 return rc;
1779}
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
1791{
1792 hl_debugfs_remove_userptr(hdev, userptr);
1793
1794 if (userptr->dma_mapped)
1795 hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
1796 userptr->sgt->nents,
1797 userptr->dir);
1798
1799 unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
1800 kvfree(userptr->pages);
1801
1802 list_del(&userptr->job_node);
1803
1804 sg_free_table(userptr->sgt);
1805 kfree(userptr->sgt);
1806}
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817void hl_userptr_delete_list(struct hl_device *hdev,
1818 struct list_head *userptr_list)
1819{
1820 struct hl_userptr *userptr, *tmp;
1821
1822 list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
1823 hl_unpin_host_memory(hdev, userptr);
1824 kfree(userptr);
1825 }
1826
1827 INIT_LIST_HEAD(userptr_list);
1828}
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
1841 u32 size, struct list_head *userptr_list,
1842 struct hl_userptr **userptr)
1843{
1844 list_for_each_entry((*userptr), userptr_list, job_node) {
1845 if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
1846 return true;
1847 }
1848
1849 return false;
1850}
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
1864 u64 start, u64 end, u32 page_size)
1865{
1866 int rc;
1867
1868 INIT_LIST_HEAD(&va_range->list);
1869
1870
1871
1872
1873
1874
1875
1876 if (is_power_of_2(page_size)) {
1877 if (start & (PAGE_SIZE - 1)) {
1878 start &= PAGE_MASK;
1879 start += PAGE_SIZE;
1880 }
1881
1882 if (end & (PAGE_SIZE - 1))
1883 end &= PAGE_MASK;
1884 }
1885
1886 if (start >= end) {
1887 dev_err(hdev->dev, "too small vm range for va list\n");
1888 return -EFAULT;
1889 }
1890
1891 rc = add_va_block(hdev, va_range, start, end);
1892
1893 if (rc) {
1894 dev_err(hdev->dev, "Failed to init host va list\n");
1895 return rc;
1896 }
1897
1898 va_range->start_addr = start;
1899 va_range->end_addr = end;
1900 va_range->page_size = page_size;
1901
1902 return 0;
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
1914{
1915 mutex_lock(&va_range->lock);
1916 clear_va_list_locked(hdev, &va_range->list);
1917 mutex_unlock(&va_range->lock);
1918
1919 mutex_destroy(&va_range->lock);
1920 kfree(va_range);
1921}
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
1941 u64 host_range_start,
1942 u64 host_range_end,
1943 u32 host_page_size,
1944 u64 host_huge_range_start,
1945 u64 host_huge_range_end,
1946 u32 host_huge_page_size,
1947 u64 dram_range_start,
1948 u64 dram_range_end,
1949 u32 dram_page_size)
1950{
1951 struct hl_device *hdev = ctx->hdev;
1952 int i, rc;
1953
1954 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) {
1955 ctx->va_range[i] =
1956 kzalloc(sizeof(struct hl_va_range), GFP_KERNEL);
1957 if (!ctx->va_range[i]) {
1958 rc = -ENOMEM;
1959 goto free_va_range;
1960 }
1961 }
1962
1963 rc = hl_mmu_ctx_init(ctx);
1964 if (rc) {
1965 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
1966 goto free_va_range;
1967 }
1968
1969 mutex_init(&ctx->mem_hash_lock);
1970 hash_init(ctx->mem_hash);
1971
1972 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
1973
1974 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST],
1975 host_range_start, host_range_end, host_page_size);
1976 if (rc) {
1977 dev_err(hdev->dev, "failed to init host vm range\n");
1978 goto mmu_ctx_fini;
1979 }
1980
1981 if (hdev->pmmu_huge_range) {
1982 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
1983
1984 rc = va_range_init(hdev,
1985 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE],
1986 host_huge_range_start, host_huge_range_end,
1987 host_huge_page_size);
1988 if (rc) {
1989 dev_err(hdev->dev,
1990 "failed to init host huge vm range\n");
1991 goto clear_host_va_range;
1992 }
1993 } else {
1994 kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
1995 ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] =
1996 ctx->va_range[HL_VA_RANGE_TYPE_HOST];
1997 }
1998
1999 mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
2000
2001 rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM],
2002 dram_range_start, dram_range_end, dram_page_size);
2003 if (rc) {
2004 dev_err(hdev->dev, "failed to init dram vm range\n");
2005 goto clear_host_huge_va_range;
2006 }
2007
2008 hl_debugfs_add_ctx_mem_hash(hdev, ctx);
2009
2010 return 0;
2011
2012clear_host_huge_va_range:
2013 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
2014
2015 if (hdev->pmmu_huge_range) {
2016 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
2017 clear_va_list_locked(hdev,
2018 &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list);
2019 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
2020 }
2021clear_host_va_range:
2022 if (hdev->pmmu_huge_range)
2023 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
2024 mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
2025 clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list);
2026 mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
2027mmu_ctx_fini:
2028 mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
2029 mutex_destroy(&ctx->mem_hash_lock);
2030 hl_mmu_ctx_fini(ctx);
2031free_va_range:
2032 for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++)
2033 kfree(ctx->va_range[i]);
2034
2035 return rc;
2036}
2037
2038int hl_vm_ctx_init(struct hl_ctx *ctx)
2039{
2040 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
2041 u64 host_range_start, host_range_end, host_huge_range_start,
2042 host_huge_range_end, dram_range_start, dram_range_end;
2043 u32 host_page_size, host_huge_page_size, dram_page_size;
2044
2045 atomic64_set(&ctx->dram_phys_mem, 0);
2046
2047
2048
2049
2050
2051
2052
2053
2054 if (!ctx->hdev->mmu_enable)
2055 return 0;
2056
2057 dram_range_start = prop->dmmu.start_addr;
2058 dram_range_end = prop->dmmu.end_addr;
2059 dram_page_size = prop->dram_page_size ?
2060 prop->dram_page_size : prop->dmmu.page_size;
2061 host_range_start = prop->pmmu.start_addr;
2062 host_range_end = prop->pmmu.end_addr;
2063 host_page_size = prop->pmmu.page_size;
2064 host_huge_range_start = prop->pmmu_huge.start_addr;
2065 host_huge_range_end = prop->pmmu_huge.end_addr;
2066 host_huge_page_size = prop->pmmu_huge.page_size;
2067
2068 return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
2069 host_page_size, host_huge_range_start,
2070 host_huge_range_end, host_huge_page_size,
2071 dram_range_start, dram_range_end, dram_page_size);
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093void hl_vm_ctx_fini(struct hl_ctx *ctx)
2094{
2095 struct hl_device *hdev = ctx->hdev;
2096 struct hl_vm *vm = &hdev->vm;
2097 struct hl_vm_phys_pg_pack *phys_pg_list;
2098 struct hl_vm_hash_node *hnode;
2099 struct hlist_node *tmp_node;
2100 struct hl_mem_in args;
2101 int i;
2102
2103 if (!hdev->mmu_enable)
2104 return;
2105
2106 hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
2107
2108
2109
2110
2111
2112 if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
2113 dev_dbg(hdev->dev,
2114 "user released device without removing its memory mappings\n");
2115
2116 hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
2117 dev_dbg(hdev->dev,
2118 "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
2119 hnode->vaddr, ctx->asid);
2120 args.unmap.device_virt_addr = hnode->vaddr;
2121 unmap_device_va(ctx, &args, true);
2122 }
2123
2124 mutex_lock(&ctx->mmu_lock);
2125
2126
2127 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
2128 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
2129
2130 mutex_unlock(&ctx->mmu_lock);
2131
2132 spin_lock(&vm->idr_lock);
2133 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
2134 if (phys_pg_list->asid == ctx->asid) {
2135 dev_dbg(hdev->dev,
2136 "page list 0x%px of asid %d is still alive\n",
2137 phys_pg_list, ctx->asid);
2138 atomic64_sub(phys_pg_list->total_size,
2139 &hdev->dram_used_mem);
2140 free_phys_pg_pack(hdev, phys_pg_list);
2141 idr_remove(&vm->phys_pg_pack_handles, i);
2142 }
2143 spin_unlock(&vm->idr_lock);
2144
2145 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
2146 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
2147
2148 if (hdev->pmmu_huge_range)
2149 va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
2150
2151 mutex_destroy(&ctx->mem_hash_lock);
2152 hl_mmu_ctx_fini(ctx);
2153
2154
2155
2156
2157
2158 if (ctx->asid != HL_KERNEL_ASID_ID &&
2159 !hdev->asic_prop.dram_supports_virtual_memory)
2160 atomic64_set(&hdev->dram_used_mem, 0);
2161}
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172int hl_vm_init(struct hl_device *hdev)
2173{
2174 struct asic_fixed_properties *prop = &hdev->asic_prop;
2175 struct hl_vm *vm = &hdev->vm;
2176 int rc;
2177
2178 if (is_power_of_2(prop->dram_page_size))
2179 vm->dram_pg_pool =
2180 gen_pool_create(__ffs(prop->dram_page_size), -1);
2181 else
2182 vm->dram_pg_pool =
2183 gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
2184
2185 if (!vm->dram_pg_pool) {
2186 dev_err(hdev->dev, "Failed to create dram page pool\n");
2187 return -ENOMEM;
2188 }
2189
2190 kref_init(&vm->dram_pg_pool_refcount);
2191
2192 rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
2193 prop->dram_end_address - prop->dram_user_base_address,
2194 -1);
2195
2196 if (rc) {
2197 dev_err(hdev->dev,
2198 "Failed to add memory to dram page pool %d\n", rc);
2199 goto pool_add_err;
2200 }
2201
2202 spin_lock_init(&vm->idr_lock);
2203 idr_init(&vm->phys_pg_pack_handles);
2204
2205 atomic64_set(&hdev->dram_used_mem, 0);
2206
2207 vm->init_done = true;
2208
2209 return 0;
2210
2211pool_add_err:
2212 gen_pool_destroy(vm->dram_pg_pool);
2213
2214 return rc;
2215}
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226void hl_vm_fini(struct hl_device *hdev)
2227{
2228 struct hl_vm *vm = &hdev->vm;
2229
2230 if (!vm->init_done)
2231 return;
2232
2233
2234
2235
2236
2237 if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
2238 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
2239 __func__);
2240
2241 vm->init_done = false;
2242}
2243
2244
2245
2246
2247
2248
2249
2250
2251void hl_hw_block_mem_init(struct hl_ctx *ctx)
2252{
2253 mutex_init(&ctx->hw_block_list_lock);
2254 INIT_LIST_HEAD(&ctx->hw_block_mem_list);
2255}
2256
2257
2258
2259
2260
2261
2262
2263
2264void hl_hw_block_mem_fini(struct hl_ctx *ctx)
2265{
2266 struct hl_vm_hw_block_list_node *lnode, *tmp;
2267
2268 if (!list_empty(&ctx->hw_block_mem_list))
2269 dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
2270
2271 list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
2272 list_del(&lnode->node);
2273 kfree(lnode);
2274 }
2275
2276 mutex_destroy(&ctx->hw_block_list_lock);
2277}
2278