1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/compat.h>
25#include <linux/device.h>
26#include <linux/fs.h>
27#include <linux/iommu.h>
28#include <linux/module.h>
29#include <linux/mm.h>
30#include <linux/rbtree.h>
31#include <linux/sched/signal.h>
32#include <linux/sched/mm.h>
33#include <linux/slab.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/workqueue.h>
37#include <linux/mdev.h>
38#include <linux/notifier.h>
39#include <linux/dma-iommu.h>
40#include <linux/irqdomain.h>
41
42#define DRIVER_VERSION "0.2"
43#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
44#define DRIVER_DESC "Type1 IOMMU driver for VFIO"
45
46static bool allow_unsafe_interrupts;
47module_param_named(allow_unsafe_interrupts,
48 allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
49MODULE_PARM_DESC(allow_unsafe_interrupts,
50 "Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
51
52static bool disable_hugepages;
53module_param_named(disable_hugepages,
54 disable_hugepages, bool, S_IRUGO | S_IWUSR);
55MODULE_PARM_DESC(disable_hugepages,
56 "Disable VFIO IOMMU support for IOMMU hugepages.");
57
58static unsigned int dma_entry_limit __read_mostly = U16_MAX;
59module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
60MODULE_PARM_DESC(dma_entry_limit,
61 "Maximum number of user DMA mappings per container (65535).");
62
63struct vfio_iommu {
64 struct list_head domain_list;
65 struct vfio_domain *external_domain;
66 struct mutex lock;
67 struct rb_root dma_list;
68 struct blocking_notifier_head notifier;
69 unsigned int dma_avail;
70 bool v2;
71 bool nesting;
72};
73
74struct vfio_domain {
75 struct iommu_domain *domain;
76 struct list_head next;
77 struct list_head group_list;
78 int prot;
79 bool fgsp;
80};
81
82struct vfio_dma {
83 struct rb_node node;
84 dma_addr_t iova;
85 unsigned long vaddr;
86 size_t size;
87 int prot;
88 bool iommu_mapped;
89 bool lock_cap;
90 struct task_struct *task;
91 struct rb_root pfn_list;
92};
93
94struct vfio_group {
95 struct iommu_group *iommu_group;
96 struct list_head next;
97 bool mdev_group;
98};
99
100
101
102
103struct vfio_pfn {
104 struct rb_node node;
105 dma_addr_t iova;
106 unsigned long pfn;
107 atomic_t ref_count;
108};
109
110struct vfio_regions {
111 struct list_head list;
112 dma_addr_t iova;
113 phys_addr_t phys;
114 size_t len;
115};
116
117#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
118 (!list_empty(&iommu->domain_list))
119
120static int put_pfn(unsigned long pfn, int prot);
121
122
123
124
125
126
127static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
128 dma_addr_t start, size_t size)
129{
130 struct rb_node *node = iommu->dma_list.rb_node;
131
132 while (node) {
133 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
134
135 if (start + size <= dma->iova)
136 node = node->rb_left;
137 else if (start >= dma->iova + dma->size)
138 node = node->rb_right;
139 else
140 return dma;
141 }
142
143 return NULL;
144}
145
146static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
147{
148 struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
149 struct vfio_dma *dma;
150
151 while (*link) {
152 parent = *link;
153 dma = rb_entry(parent, struct vfio_dma, node);
154
155 if (new->iova + new->size <= dma->iova)
156 link = &(*link)->rb_left;
157 else
158 link = &(*link)->rb_right;
159 }
160
161 rb_link_node(&new->node, parent, link);
162 rb_insert_color(&new->node, &iommu->dma_list);
163}
164
165static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
166{
167 rb_erase(&old->node, &iommu->dma_list);
168}
169
170
171
172
173static struct vfio_pfn *vfio_find_vpfn(struct vfio_dma *dma, dma_addr_t iova)
174{
175 struct vfio_pfn *vpfn;
176 struct rb_node *node = dma->pfn_list.rb_node;
177
178 while (node) {
179 vpfn = rb_entry(node, struct vfio_pfn, node);
180
181 if (iova < vpfn->iova)
182 node = node->rb_left;
183 else if (iova > vpfn->iova)
184 node = node->rb_right;
185 else
186 return vpfn;
187 }
188 return NULL;
189}
190
191static void vfio_link_pfn(struct vfio_dma *dma,
192 struct vfio_pfn *new)
193{
194 struct rb_node **link, *parent = NULL;
195 struct vfio_pfn *vpfn;
196
197 link = &dma->pfn_list.rb_node;
198 while (*link) {
199 parent = *link;
200 vpfn = rb_entry(parent, struct vfio_pfn, node);
201
202 if (new->iova < vpfn->iova)
203 link = &(*link)->rb_left;
204 else
205 link = &(*link)->rb_right;
206 }
207
208 rb_link_node(&new->node, parent, link);
209 rb_insert_color(&new->node, &dma->pfn_list);
210}
211
212static void vfio_unlink_pfn(struct vfio_dma *dma, struct vfio_pfn *old)
213{
214 rb_erase(&old->node, &dma->pfn_list);
215}
216
217static int vfio_add_to_pfn_list(struct vfio_dma *dma, dma_addr_t iova,
218 unsigned long pfn)
219{
220 struct vfio_pfn *vpfn;
221
222 vpfn = kzalloc(sizeof(*vpfn), GFP_KERNEL);
223 if (!vpfn)
224 return -ENOMEM;
225
226 vpfn->iova = iova;
227 vpfn->pfn = pfn;
228 atomic_set(&vpfn->ref_count, 1);
229 vfio_link_pfn(dma, vpfn);
230 return 0;
231}
232
233static void vfio_remove_from_pfn_list(struct vfio_dma *dma,
234 struct vfio_pfn *vpfn)
235{
236 vfio_unlink_pfn(dma, vpfn);
237 kfree(vpfn);
238}
239
240static struct vfio_pfn *vfio_iova_get_vfio_pfn(struct vfio_dma *dma,
241 unsigned long iova)
242{
243 struct vfio_pfn *vpfn = vfio_find_vpfn(dma, iova);
244
245 if (vpfn)
246 atomic_inc(&vpfn->ref_count);
247 return vpfn;
248}
249
250static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
251{
252 int ret = 0;
253
254 if (atomic_dec_and_test(&vpfn->ref_count)) {
255 ret = put_pfn(vpfn->pfn, dma->prot);
256 vfio_remove_from_pfn_list(dma, vpfn);
257 }
258 return ret;
259}
260
261static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
262{
263 struct mm_struct *mm;
264 int ret;
265
266 if (!npage)
267 return 0;
268
269 mm = async ? get_task_mm(dma->task) : dma->task->mm;
270 if (!mm)
271 return -ESRCH;
272
273 ret = down_write_killable(&mm->mmap_sem);
274 if (!ret) {
275 ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
276 dma->lock_cap);
277 up_write(&mm->mmap_sem);
278 }
279
280 if (async)
281 mmput(mm);
282
283 return ret;
284}
285
286
287
288
289
290
291static bool is_invalid_reserved_pfn(unsigned long pfn)
292{
293 if (pfn_valid(pfn)) {
294 bool reserved;
295 struct page *tail = pfn_to_page(pfn);
296 struct page *head = compound_head(tail);
297 reserved = !!(PageReserved(head));
298 if (head != tail) {
299
300
301
302
303
304
305
306
307
308
309 smp_rmb();
310 if (PageTail(tail))
311 return reserved;
312 }
313 return PageReserved(tail);
314 }
315
316 return true;
317}
318
319static int put_pfn(unsigned long pfn, int prot)
320{
321 if (!is_invalid_reserved_pfn(pfn)) {
322 struct page *page = pfn_to_page(pfn);
323 if (prot & IOMMU_WRITE)
324 SetPageDirty(page);
325 put_page(page);
326 return 1;
327 }
328 return 0;
329}
330
331static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
332 int prot, unsigned long *pfn)
333{
334 struct page *page[1];
335 struct vm_area_struct *vma;
336 struct vm_area_struct *vmas[1];
337 unsigned int flags = 0;
338 int ret;
339
340 if (prot & IOMMU_WRITE)
341 flags |= FOLL_WRITE;
342
343 down_read(&mm->mmap_sem);
344 if (mm == current->mm) {
345 ret = get_user_pages(vaddr, 1, flags | FOLL_LONGTERM, page,
346 vmas);
347 } else {
348 ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
349 vmas, NULL);
350
351
352
353
354
355
356
357 if (ret > 0 && vma_is_fsdax(vmas[0])) {
358 ret = -EOPNOTSUPP;
359 put_page(page[0]);
360 }
361 }
362 up_read(&mm->mmap_sem);
363
364 if (ret == 1) {
365 *pfn = page_to_pfn(page[0]);
366 return 0;
367 }
368
369 down_read(&mm->mmap_sem);
370
371 vma = find_vma_intersection(mm, vaddr, vaddr + 1);
372
373 if (vma && vma->vm_flags & VM_PFNMAP) {
374 *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
375 if (is_invalid_reserved_pfn(*pfn))
376 ret = 0;
377 }
378
379 up_read(&mm->mmap_sem);
380 return ret;
381}
382
383
384
385
386
387
388static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
389 long npage, unsigned long *pfn_base,
390 unsigned long limit)
391{
392 unsigned long pfn = 0;
393 long ret, pinned = 0, lock_acct = 0;
394 bool rsvd;
395 dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
396
397
398 if (!current->mm)
399 return -ENODEV;
400
401 ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base);
402 if (ret)
403 return ret;
404
405 pinned++;
406 rsvd = is_invalid_reserved_pfn(*pfn_base);
407
408
409
410
411
412 if (!rsvd && !vfio_find_vpfn(dma, iova)) {
413 if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
414 put_pfn(*pfn_base, dma->prot);
415 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
416 limit << PAGE_SHIFT);
417 return -ENOMEM;
418 }
419 lock_acct++;
420 }
421
422 if (unlikely(disable_hugepages))
423 goto out;
424
425
426 for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
427 pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
428 ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn);
429 if (ret)
430 break;
431
432 if (pfn != *pfn_base + pinned ||
433 rsvd != is_invalid_reserved_pfn(pfn)) {
434 put_pfn(pfn, dma->prot);
435 break;
436 }
437
438 if (!rsvd && !vfio_find_vpfn(dma, iova)) {
439 if (!dma->lock_cap &&
440 current->mm->locked_vm + lock_acct + 1 > limit) {
441 put_pfn(pfn, dma->prot);
442 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
443 __func__, limit << PAGE_SHIFT);
444 ret = -ENOMEM;
445 goto unpin_out;
446 }
447 lock_acct++;
448 }
449 }
450
451out:
452 ret = vfio_lock_acct(dma, lock_acct, false);
453
454unpin_out:
455 if (ret) {
456 if (!rsvd) {
457 for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
458 put_pfn(pfn, dma->prot);
459 }
460
461 return ret;
462 }
463
464 return pinned;
465}
466
467static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
468 unsigned long pfn, long npage,
469 bool do_accounting)
470{
471 long unlocked = 0, locked = 0;
472 long i;
473
474 for (i = 0; i < npage; i++, iova += PAGE_SIZE) {
475 if (put_pfn(pfn++, dma->prot)) {
476 unlocked++;
477 if (vfio_find_vpfn(dma, iova))
478 locked++;
479 }
480 }
481
482 if (do_accounting)
483 vfio_lock_acct(dma, locked - unlocked, true);
484
485 return unlocked;
486}
487
488static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
489 unsigned long *pfn_base, bool do_accounting)
490{
491 struct mm_struct *mm;
492 int ret;
493
494 mm = get_task_mm(dma->task);
495 if (!mm)
496 return -ENODEV;
497
498 ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
499 if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
500 ret = vfio_lock_acct(dma, 1, true);
501 if (ret) {
502 put_pfn(*pfn_base, dma->prot);
503 if (ret == -ENOMEM)
504 pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK "
505 "(%ld) exceeded\n", __func__,
506 dma->task->comm, task_pid_nr(dma->task),
507 task_rlimit(dma->task, RLIMIT_MEMLOCK));
508 }
509 }
510
511 mmput(mm);
512 return ret;
513}
514
515static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
516 bool do_accounting)
517{
518 int unlocked;
519 struct vfio_pfn *vpfn = vfio_find_vpfn(dma, iova);
520
521 if (!vpfn)
522 return 0;
523
524 unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
525
526 if (do_accounting)
527 vfio_lock_acct(dma, -unlocked, true);
528
529 return unlocked;
530}
531
532static int vfio_iommu_type1_pin_pages(void *iommu_data,
533 unsigned long *user_pfn,
534 int npage, int prot,
535 unsigned long *phys_pfn)
536{
537 struct vfio_iommu *iommu = iommu_data;
538 int i, j, ret;
539 unsigned long remote_vaddr;
540 struct vfio_dma *dma;
541 bool do_accounting;
542
543 if (!iommu || !user_pfn || !phys_pfn)
544 return -EINVAL;
545
546
547 if (!iommu->v2)
548 return -EACCES;
549
550 mutex_lock(&iommu->lock);
551
552
553 if (!iommu->notifier.head) {
554 ret = -EINVAL;
555 goto pin_done;
556 }
557
558
559
560
561
562
563 do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
564
565 for (i = 0; i < npage; i++) {
566 dma_addr_t iova;
567 struct vfio_pfn *vpfn;
568
569 iova = user_pfn[i] << PAGE_SHIFT;
570 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
571 if (!dma) {
572 ret = -EINVAL;
573 goto pin_unwind;
574 }
575
576 if ((dma->prot & prot) != prot) {
577 ret = -EPERM;
578 goto pin_unwind;
579 }
580
581 vpfn = vfio_iova_get_vfio_pfn(dma, iova);
582 if (vpfn) {
583 phys_pfn[i] = vpfn->pfn;
584 continue;
585 }
586
587 remote_vaddr = dma->vaddr + iova - dma->iova;
588 ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
589 do_accounting);
590 if (ret)
591 goto pin_unwind;
592
593 ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
594 if (ret) {
595 vfio_unpin_page_external(dma, iova, do_accounting);
596 goto pin_unwind;
597 }
598 }
599
600 ret = i;
601 goto pin_done;
602
603pin_unwind:
604 phys_pfn[i] = 0;
605 for (j = 0; j < i; j++) {
606 dma_addr_t iova;
607
608 iova = user_pfn[j] << PAGE_SHIFT;
609 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
610 vfio_unpin_page_external(dma, iova, do_accounting);
611 phys_pfn[j] = 0;
612 }
613pin_done:
614 mutex_unlock(&iommu->lock);
615 return ret;
616}
617
618static int vfio_iommu_type1_unpin_pages(void *iommu_data,
619 unsigned long *user_pfn,
620 int npage)
621{
622 struct vfio_iommu *iommu = iommu_data;
623 bool do_accounting;
624 int i;
625
626 if (!iommu || !user_pfn)
627 return -EINVAL;
628
629
630 if (!iommu->v2)
631 return -EACCES;
632
633 mutex_lock(&iommu->lock);
634
635 do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
636 for (i = 0; i < npage; i++) {
637 struct vfio_dma *dma;
638 dma_addr_t iova;
639
640 iova = user_pfn[i] << PAGE_SHIFT;
641 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
642 if (!dma)
643 goto unpin_exit;
644 vfio_unpin_page_external(dma, iova, do_accounting);
645 }
646
647unpin_exit:
648 mutex_unlock(&iommu->lock);
649 return i > npage ? npage : (i > 0 ? i : -EINVAL);
650}
651
652static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
653 struct list_head *regions)
654{
655 long unlocked = 0;
656 struct vfio_regions *entry, *next;
657
658 iommu_tlb_sync(domain->domain);
659
660 list_for_each_entry_safe(entry, next, regions, list) {
661 unlocked += vfio_unpin_pages_remote(dma,
662 entry->iova,
663 entry->phys >> PAGE_SHIFT,
664 entry->len >> PAGE_SHIFT,
665 false);
666 list_del(&entry->list);
667 kfree(entry);
668 }
669
670 cond_resched();
671
672 return unlocked;
673}
674
675
676
677
678
679
680
681
682#define VFIO_IOMMU_TLB_SYNC_MAX 512
683
684static size_t unmap_unpin_fast(struct vfio_domain *domain,
685 struct vfio_dma *dma, dma_addr_t *iova,
686 size_t len, phys_addr_t phys, long *unlocked,
687 struct list_head *unmapped_list,
688 int *unmapped_cnt)
689{
690 size_t unmapped = 0;
691 struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
692
693 if (entry) {
694 unmapped = iommu_unmap_fast(domain->domain, *iova, len);
695
696 if (!unmapped) {
697 kfree(entry);
698 } else {
699 iommu_tlb_range_add(domain->domain, *iova, unmapped);
700 entry->iova = *iova;
701 entry->phys = phys;
702 entry->len = unmapped;
703 list_add_tail(&entry->list, unmapped_list);
704
705 *iova += unmapped;
706 (*unmapped_cnt)++;
707 }
708 }
709
710
711
712
713
714 if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
715 *unlocked += vfio_sync_unpin(dma, domain,
716 unmapped_list);
717 *unmapped_cnt = 0;
718 }
719
720 return unmapped;
721}
722
723static size_t unmap_unpin_slow(struct vfio_domain *domain,
724 struct vfio_dma *dma, dma_addr_t *iova,
725 size_t len, phys_addr_t phys,
726 long *unlocked)
727{
728 size_t unmapped = iommu_unmap(domain->domain, *iova, len);
729
730 if (unmapped) {
731 *unlocked += vfio_unpin_pages_remote(dma, *iova,
732 phys >> PAGE_SHIFT,
733 unmapped >> PAGE_SHIFT,
734 false);
735 *iova += unmapped;
736 cond_resched();
737 }
738 return unmapped;
739}
740
741static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
742 bool do_accounting)
743{
744 dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
745 struct vfio_domain *domain, *d;
746 LIST_HEAD(unmapped_region_list);
747 int unmapped_region_cnt = 0;
748 long unlocked = 0;
749
750 if (!dma->size)
751 return 0;
752
753 if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
754 return 0;
755
756
757
758
759
760
761
762
763 domain = d = list_first_entry(&iommu->domain_list,
764 struct vfio_domain, next);
765
766 list_for_each_entry_continue(d, &iommu->domain_list, next) {
767 iommu_unmap(d->domain, dma->iova, dma->size);
768 cond_resched();
769 }
770
771 while (iova < end) {
772 size_t unmapped, len;
773 phys_addr_t phys, next;
774
775 phys = iommu_iova_to_phys(domain->domain, iova);
776 if (WARN_ON(!phys)) {
777 iova += PAGE_SIZE;
778 continue;
779 }
780
781
782
783
784
785
786 for (len = PAGE_SIZE;
787 !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
788 next = iommu_iova_to_phys(domain->domain, iova + len);
789 if (next != phys + len)
790 break;
791 }
792
793
794
795
796
797 unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
798 &unlocked, &unmapped_region_list,
799 &unmapped_region_cnt);
800 if (!unmapped) {
801 unmapped = unmap_unpin_slow(domain, dma, &iova, len,
802 phys, &unlocked);
803 if (WARN_ON(!unmapped))
804 break;
805 }
806 }
807
808 dma->iommu_mapped = false;
809
810 if (unmapped_region_cnt)
811 unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
812
813 if (do_accounting) {
814 vfio_lock_acct(dma, -unlocked, true);
815 return 0;
816 }
817 return unlocked;
818}
819
820static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
821{
822 vfio_unmap_unpin(iommu, dma, true);
823 vfio_unlink_dma(iommu, dma);
824 put_task_struct(dma->task);
825 kfree(dma);
826 iommu->dma_avail++;
827}
828
829static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
830{
831 struct vfio_domain *domain;
832 unsigned long bitmap = ULONG_MAX;
833
834 mutex_lock(&iommu->lock);
835 list_for_each_entry(domain, &iommu->domain_list, next)
836 bitmap &= domain->domain->pgsize_bitmap;
837 mutex_unlock(&iommu->lock);
838
839
840
841
842
843
844
845
846
847 if (bitmap & ~PAGE_MASK) {
848 bitmap &= PAGE_MASK;
849 bitmap |= PAGE_SIZE;
850 }
851
852 return bitmap;
853}
854
855static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
856 struct vfio_iommu_type1_dma_unmap *unmap)
857{
858 uint64_t mask;
859 struct vfio_dma *dma, *dma_last = NULL;
860 size_t unmapped = 0;
861 int ret = 0, retries = 0;
862
863 mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
864
865 if (unmap->iova & mask)
866 return -EINVAL;
867 if (!unmap->size || unmap->size & mask)
868 return -EINVAL;
869 if (unmap->iova + unmap->size - 1 < unmap->iova ||
870 unmap->size > SIZE_MAX)
871 return -EINVAL;
872
873 WARN_ON(mask & PAGE_MASK);
874again:
875 mutex_lock(&iommu->lock);
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908 if (iommu->v2) {
909 dma = vfio_find_dma(iommu, unmap->iova, 1);
910 if (dma && dma->iova != unmap->iova) {
911 ret = -EINVAL;
912 goto unlock;
913 }
914 dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
915 if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
916 ret = -EINVAL;
917 goto unlock;
918 }
919 }
920
921 while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
922 if (!iommu->v2 && unmap->iova > dma->iova)
923 break;
924
925
926
927
928 if (dma->task->mm != current->mm)
929 break;
930
931 if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
932 struct vfio_iommu_type1_dma_unmap nb_unmap;
933
934 if (dma_last == dma) {
935 BUG_ON(++retries > 10);
936 } else {
937 dma_last = dma;
938 retries = 0;
939 }
940
941 nb_unmap.iova = dma->iova;
942 nb_unmap.size = dma->size;
943
944
945
946
947
948
949
950 mutex_unlock(&iommu->lock);
951 blocking_notifier_call_chain(&iommu->notifier,
952 VFIO_IOMMU_NOTIFY_DMA_UNMAP,
953 &nb_unmap);
954 goto again;
955 }
956 unmapped += dma->size;
957 vfio_remove_dma(iommu, dma);
958 }
959
960unlock:
961 mutex_unlock(&iommu->lock);
962
963
964 unmap->size = unmapped;
965
966 return ret;
967}
968
969static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
970 unsigned long pfn, long npage, int prot)
971{
972 struct vfio_domain *d;
973 int ret;
974
975 list_for_each_entry(d, &iommu->domain_list, next) {
976 ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
977 npage << PAGE_SHIFT, prot | d->prot);
978 if (ret)
979 goto unwind;
980
981 cond_resched();
982 }
983
984 return 0;
985
986unwind:
987 list_for_each_entry_continue_reverse(d, &iommu->domain_list, next)
988 iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
989
990 return ret;
991}
992
993static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
994 size_t map_size)
995{
996 dma_addr_t iova = dma->iova;
997 unsigned long vaddr = dma->vaddr;
998 size_t size = map_size;
999 long npage;
1000 unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1001 int ret = 0;
1002
1003 while (size) {
1004
1005 npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
1006 size >> PAGE_SHIFT, &pfn, limit);
1007 if (npage <= 0) {
1008 WARN_ON(!npage);
1009 ret = (int)npage;
1010 break;
1011 }
1012
1013
1014 ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
1015 dma->prot);
1016 if (ret) {
1017 vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
1018 npage, true);
1019 break;
1020 }
1021
1022 size -= npage << PAGE_SHIFT;
1023 dma->size += npage << PAGE_SHIFT;
1024 }
1025
1026 dma->iommu_mapped = true;
1027
1028 if (ret)
1029 vfio_remove_dma(iommu, dma);
1030
1031 return ret;
1032}
1033
1034static int vfio_dma_do_map(struct vfio_iommu *iommu,
1035 struct vfio_iommu_type1_dma_map *map)
1036{
1037 dma_addr_t iova = map->iova;
1038 unsigned long vaddr = map->vaddr;
1039 size_t size = map->size;
1040 int ret = 0, prot = 0;
1041 uint64_t mask;
1042 struct vfio_dma *dma;
1043
1044
1045 if (map->size != size || map->vaddr != vaddr || map->iova != iova)
1046 return -EINVAL;
1047
1048 mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
1049
1050 WARN_ON(mask & PAGE_MASK);
1051
1052
1053 if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
1054 prot |= IOMMU_WRITE;
1055 if (map->flags & VFIO_DMA_MAP_FLAG_READ)
1056 prot |= IOMMU_READ;
1057
1058 if (!prot || !size || (size | iova | vaddr) & mask)
1059 return -EINVAL;
1060
1061
1062 if (iova + size - 1 < iova || vaddr + size - 1 < vaddr)
1063 return -EINVAL;
1064
1065 mutex_lock(&iommu->lock);
1066
1067 if (vfio_find_dma(iommu, iova, size)) {
1068 ret = -EEXIST;
1069 goto out_unlock;
1070 }
1071
1072 if (!iommu->dma_avail) {
1073 ret = -ENOSPC;
1074 goto out_unlock;
1075 }
1076
1077 dma = kzalloc(sizeof(*dma), GFP_KERNEL);
1078 if (!dma) {
1079 ret = -ENOMEM;
1080 goto out_unlock;
1081 }
1082
1083 iommu->dma_avail--;
1084 dma->iova = iova;
1085 dma->vaddr = vaddr;
1086 dma->prot = prot;
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113 get_task_struct(current->group_leader);
1114 dma->task = current->group_leader;
1115 dma->lock_cap = capable(CAP_IPC_LOCK);
1116
1117 dma->pfn_list = RB_ROOT;
1118
1119
1120 vfio_link_dma(iommu, dma);
1121
1122
1123 if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
1124 dma->size = size;
1125 else
1126 ret = vfio_pin_map_dma(iommu, dma, size);
1127
1128out_unlock:
1129 mutex_unlock(&iommu->lock);
1130 return ret;
1131}
1132
1133static int vfio_bus_type(struct device *dev, void *data)
1134{
1135 struct bus_type **bus = data;
1136
1137 if (*bus && *bus != dev->bus)
1138 return -EINVAL;
1139
1140 *bus = dev->bus;
1141
1142 return 0;
1143}
1144
1145static int vfio_iommu_replay(struct vfio_iommu *iommu,
1146 struct vfio_domain *domain)
1147{
1148 struct vfio_domain *d;
1149 struct rb_node *n;
1150 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1151 int ret;
1152
1153
1154 d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
1155 n = rb_first(&iommu->dma_list);
1156
1157 for (; n; n = rb_next(n)) {
1158 struct vfio_dma *dma;
1159 dma_addr_t iova;
1160
1161 dma = rb_entry(n, struct vfio_dma, node);
1162 iova = dma->iova;
1163
1164 while (iova < dma->iova + dma->size) {
1165 phys_addr_t phys;
1166 size_t size;
1167
1168 if (dma->iommu_mapped) {
1169 phys_addr_t p;
1170 dma_addr_t i;
1171
1172 phys = iommu_iova_to_phys(d->domain, iova);
1173
1174 if (WARN_ON(!phys)) {
1175 iova += PAGE_SIZE;
1176 continue;
1177 }
1178
1179 size = PAGE_SIZE;
1180 p = phys + size;
1181 i = iova + size;
1182 while (i < dma->iova + dma->size &&
1183 p == iommu_iova_to_phys(d->domain, i)) {
1184 size += PAGE_SIZE;
1185 p += PAGE_SIZE;
1186 i += PAGE_SIZE;
1187 }
1188 } else {
1189 unsigned long pfn;
1190 unsigned long vaddr = dma->vaddr +
1191 (iova - dma->iova);
1192 size_t n = dma->iova + dma->size - iova;
1193 long npage;
1194
1195 npage = vfio_pin_pages_remote(dma, vaddr,
1196 n >> PAGE_SHIFT,
1197 &pfn, limit);
1198 if (npage <= 0) {
1199 WARN_ON(!npage);
1200 ret = (int)npage;
1201 return ret;
1202 }
1203
1204 phys = pfn << PAGE_SHIFT;
1205 size = npage << PAGE_SHIFT;
1206 }
1207
1208 ret = iommu_map(domain->domain, iova, phys,
1209 size, dma->prot | domain->prot);
1210 if (ret)
1211 return ret;
1212
1213 iova += size;
1214 }
1215 dma->iommu_mapped = true;
1216 }
1217 return 0;
1218}
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230static void vfio_test_domain_fgsp(struct vfio_domain *domain)
1231{
1232 struct page *pages;
1233 int ret, order = get_order(PAGE_SIZE * 2);
1234
1235 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
1236 if (!pages)
1237 return;
1238
1239 ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
1240 IOMMU_READ | IOMMU_WRITE | domain->prot);
1241 if (!ret) {
1242 size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
1243
1244 if (unmapped == PAGE_SIZE)
1245 iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
1246 else
1247 domain->fgsp = true;
1248 }
1249
1250 __free_pages(pages, order);
1251}
1252
1253static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
1254 struct iommu_group *iommu_group)
1255{
1256 struct vfio_group *g;
1257
1258 list_for_each_entry(g, &domain->group_list, next) {
1259 if (g->iommu_group == iommu_group)
1260 return g;
1261 }
1262
1263 return NULL;
1264}
1265
1266static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
1267{
1268 struct list_head group_resv_regions;
1269 struct iommu_resv_region *region, *next;
1270 bool ret = false;
1271
1272 INIT_LIST_HEAD(&group_resv_regions);
1273 iommu_get_group_resv_regions(group, &group_resv_regions);
1274 list_for_each_entry(region, &group_resv_regions, list) {
1275
1276
1277
1278
1279
1280 if (region->type == IOMMU_RESV_MSI) {
1281 ret = false;
1282 break;
1283 }
1284
1285 if (region->type == IOMMU_RESV_SW_MSI) {
1286 *base = region->start;
1287 ret = true;
1288 }
1289 }
1290 list_for_each_entry_safe(region, next, &group_resv_regions, list)
1291 kfree(region);
1292 return ret;
1293}
1294
1295static struct device *vfio_mdev_get_iommu_device(struct device *dev)
1296{
1297 struct device *(*fn)(struct device *dev);
1298 struct device *iommu_device;
1299
1300 fn = symbol_get(mdev_get_iommu_device);
1301 if (fn) {
1302 iommu_device = fn(dev);
1303 symbol_put(mdev_get_iommu_device);
1304
1305 return iommu_device;
1306 }
1307
1308 return NULL;
1309}
1310
1311static int vfio_mdev_attach_domain(struct device *dev, void *data)
1312{
1313 struct iommu_domain *domain = data;
1314 struct device *iommu_device;
1315
1316 iommu_device = vfio_mdev_get_iommu_device(dev);
1317 if (iommu_device) {
1318 if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
1319 return iommu_aux_attach_device(domain, iommu_device);
1320 else
1321 return iommu_attach_device(domain, iommu_device);
1322 }
1323
1324 return -EINVAL;
1325}
1326
1327static int vfio_mdev_detach_domain(struct device *dev, void *data)
1328{
1329 struct iommu_domain *domain = data;
1330 struct device *iommu_device;
1331
1332 iommu_device = vfio_mdev_get_iommu_device(dev);
1333 if (iommu_device) {
1334 if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
1335 iommu_aux_detach_device(domain, iommu_device);
1336 else
1337 iommu_detach_device(domain, iommu_device);
1338 }
1339
1340 return 0;
1341}
1342
1343static int vfio_iommu_attach_group(struct vfio_domain *domain,
1344 struct vfio_group *group)
1345{
1346 if (group->mdev_group)
1347 return iommu_group_for_each_dev(group->iommu_group,
1348 domain->domain,
1349 vfio_mdev_attach_domain);
1350 else
1351 return iommu_attach_group(domain->domain, group->iommu_group);
1352}
1353
1354static void vfio_iommu_detach_group(struct vfio_domain *domain,
1355 struct vfio_group *group)
1356{
1357 if (group->mdev_group)
1358 iommu_group_for_each_dev(group->iommu_group, domain->domain,
1359 vfio_mdev_detach_domain);
1360 else
1361 iommu_detach_group(domain->domain, group->iommu_group);
1362}
1363
1364static bool vfio_bus_is_mdev(struct bus_type *bus)
1365{
1366 struct bus_type *mdev_bus;
1367 bool ret = false;
1368
1369 mdev_bus = symbol_get(mdev_bus_type);
1370 if (mdev_bus) {
1371 ret = (bus == mdev_bus);
1372 symbol_put(mdev_bus_type);
1373 }
1374
1375 return ret;
1376}
1377
1378static int vfio_mdev_iommu_device(struct device *dev, void *data)
1379{
1380 struct device **old = data, *new;
1381
1382 new = vfio_mdev_get_iommu_device(dev);
1383 if (!new || (*old && *old != new))
1384 return -EINVAL;
1385
1386 *old = new;
1387
1388 return 0;
1389}
1390
1391static int vfio_iommu_type1_attach_group(void *iommu_data,
1392 struct iommu_group *iommu_group)
1393{
1394 struct vfio_iommu *iommu = iommu_data;
1395 struct vfio_group *group;
1396 struct vfio_domain *domain, *d;
1397 struct bus_type *bus = NULL;
1398 int ret;
1399 bool resv_msi, msi_remap;
1400 phys_addr_t resv_msi_base;
1401
1402 mutex_lock(&iommu->lock);
1403
1404 list_for_each_entry(d, &iommu->domain_list, next) {
1405 if (find_iommu_group(d, iommu_group)) {
1406 mutex_unlock(&iommu->lock);
1407 return -EINVAL;
1408 }
1409 }
1410
1411 if (iommu->external_domain) {
1412 if (find_iommu_group(iommu->external_domain, iommu_group)) {
1413 mutex_unlock(&iommu->lock);
1414 return -EINVAL;
1415 }
1416 }
1417
1418 group = kzalloc(sizeof(*group), GFP_KERNEL);
1419 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1420 if (!group || !domain) {
1421 ret = -ENOMEM;
1422 goto out_free;
1423 }
1424
1425 group->iommu_group = iommu_group;
1426
1427
1428 ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
1429 if (ret)
1430 goto out_free;
1431
1432 if (vfio_bus_is_mdev(bus)) {
1433 struct device *iommu_device = NULL;
1434
1435 group->mdev_group = true;
1436
1437
1438 ret = iommu_group_for_each_dev(iommu_group, &iommu_device,
1439 vfio_mdev_iommu_device);
1440 if (ret || !iommu_device) {
1441 if (!iommu->external_domain) {
1442 INIT_LIST_HEAD(&domain->group_list);
1443 iommu->external_domain = domain;
1444 } else {
1445 kfree(domain);
1446 }
1447
1448 list_add(&group->next,
1449 &iommu->external_domain->group_list);
1450 mutex_unlock(&iommu->lock);
1451
1452 return 0;
1453 }
1454
1455 bus = iommu_device->bus;
1456 }
1457
1458 domain->domain = iommu_domain_alloc(bus);
1459 if (!domain->domain) {
1460 ret = -EIO;
1461 goto out_free;
1462 }
1463
1464 if (iommu->nesting) {
1465 int attr = 1;
1466
1467 ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
1468 &attr);
1469 if (ret)
1470 goto out_domain;
1471 }
1472
1473 ret = vfio_iommu_attach_group(domain, group);
1474 if (ret)
1475 goto out_domain;
1476
1477 resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
1478
1479 INIT_LIST_HEAD(&domain->group_list);
1480 list_add(&group->next, &domain->group_list);
1481
1482 msi_remap = irq_domain_check_msi_remap() ||
1483 iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
1484
1485 if (!allow_unsafe_interrupts && !msi_remap) {
1486 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
1487 __func__);
1488 ret = -EPERM;
1489 goto out_detach;
1490 }
1491
1492 if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
1493 domain->prot |= IOMMU_CACHE;
1494
1495
1496
1497
1498
1499
1500
1501
1502 list_for_each_entry(d, &iommu->domain_list, next) {
1503 if (d->domain->ops == domain->domain->ops &&
1504 d->prot == domain->prot) {
1505 vfio_iommu_detach_group(domain, group);
1506 if (!vfio_iommu_attach_group(d, group)) {
1507 list_add(&group->next, &d->group_list);
1508 iommu_domain_free(domain->domain);
1509 kfree(domain);
1510 mutex_unlock(&iommu->lock);
1511 return 0;
1512 }
1513
1514 ret = vfio_iommu_attach_group(domain, group);
1515 if (ret)
1516 goto out_domain;
1517 }
1518 }
1519
1520 vfio_test_domain_fgsp(domain);
1521
1522
1523 ret = vfio_iommu_replay(iommu, domain);
1524 if (ret)
1525 goto out_detach;
1526
1527 if (resv_msi) {
1528 ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
1529 if (ret)
1530 goto out_detach;
1531 }
1532
1533 list_add(&domain->next, &iommu->domain_list);
1534
1535 mutex_unlock(&iommu->lock);
1536
1537 return 0;
1538
1539out_detach:
1540 vfio_iommu_detach_group(domain, group);
1541out_domain:
1542 iommu_domain_free(domain->domain);
1543out_free:
1544 kfree(domain);
1545 kfree(group);
1546 mutex_unlock(&iommu->lock);
1547 return ret;
1548}
1549
1550static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
1551{
1552 struct rb_node *node;
1553
1554 while ((node = rb_first(&iommu->dma_list)))
1555 vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
1556}
1557
1558static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
1559{
1560 struct rb_node *n, *p;
1561
1562 n = rb_first(&iommu->dma_list);
1563 for (; n; n = rb_next(n)) {
1564 struct vfio_dma *dma;
1565 long locked = 0, unlocked = 0;
1566
1567 dma = rb_entry(n, struct vfio_dma, node);
1568 unlocked += vfio_unmap_unpin(iommu, dma, false);
1569 p = rb_first(&dma->pfn_list);
1570 for (; p; p = rb_next(p)) {
1571 struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
1572 node);
1573
1574 if (!is_invalid_reserved_pfn(vpfn->pfn))
1575 locked++;
1576 }
1577 vfio_lock_acct(dma, locked - unlocked, true);
1578 }
1579}
1580
1581static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
1582{
1583 struct rb_node *n;
1584
1585 n = rb_first(&iommu->dma_list);
1586 for (; n; n = rb_next(n)) {
1587 struct vfio_dma *dma;
1588
1589 dma = rb_entry(n, struct vfio_dma, node);
1590
1591 if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
1592 break;
1593 }
1594
1595 WARN_ON(iommu->notifier.head);
1596}
1597
1598static void vfio_iommu_type1_detach_group(void *iommu_data,
1599 struct iommu_group *iommu_group)
1600{
1601 struct vfio_iommu *iommu = iommu_data;
1602 struct vfio_domain *domain;
1603 struct vfio_group *group;
1604
1605 mutex_lock(&iommu->lock);
1606
1607 if (iommu->external_domain) {
1608 group = find_iommu_group(iommu->external_domain, iommu_group);
1609 if (group) {
1610 list_del(&group->next);
1611 kfree(group);
1612
1613 if (list_empty(&iommu->external_domain->group_list)) {
1614 vfio_sanity_check_pfn_list(iommu);
1615
1616 if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
1617 vfio_iommu_unmap_unpin_all(iommu);
1618
1619 kfree(iommu->external_domain);
1620 iommu->external_domain = NULL;
1621 }
1622 goto detach_group_done;
1623 }
1624 }
1625
1626 list_for_each_entry(domain, &iommu->domain_list, next) {
1627 group = find_iommu_group(domain, iommu_group);
1628 if (!group)
1629 continue;
1630
1631 vfio_iommu_detach_group(domain, group);
1632 list_del(&group->next);
1633 kfree(group);
1634
1635
1636
1637
1638
1639
1640
1641 if (list_empty(&domain->group_list)) {
1642 if (list_is_singular(&iommu->domain_list)) {
1643 if (!iommu->external_domain)
1644 vfio_iommu_unmap_unpin_all(iommu);
1645 else
1646 vfio_iommu_unmap_unpin_reaccount(iommu);
1647 }
1648 iommu_domain_free(domain->domain);
1649 list_del(&domain->next);
1650 kfree(domain);
1651 }
1652 break;
1653 }
1654
1655detach_group_done:
1656 mutex_unlock(&iommu->lock);
1657}
1658
1659static void *vfio_iommu_type1_open(unsigned long arg)
1660{
1661 struct vfio_iommu *iommu;
1662
1663 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1664 if (!iommu)
1665 return ERR_PTR(-ENOMEM);
1666
1667 switch (arg) {
1668 case VFIO_TYPE1_IOMMU:
1669 break;
1670 case VFIO_TYPE1_NESTING_IOMMU:
1671 iommu->nesting = true;
1672
1673 case VFIO_TYPE1v2_IOMMU:
1674 iommu->v2 = true;
1675 break;
1676 default:
1677 kfree(iommu);
1678 return ERR_PTR(-EINVAL);
1679 }
1680
1681 INIT_LIST_HEAD(&iommu->domain_list);
1682 iommu->dma_list = RB_ROOT;
1683 iommu->dma_avail = dma_entry_limit;
1684 mutex_init(&iommu->lock);
1685 BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
1686
1687 return iommu;
1688}
1689
1690static void vfio_release_domain(struct vfio_domain *domain, bool external)
1691{
1692 struct vfio_group *group, *group_tmp;
1693
1694 list_for_each_entry_safe(group, group_tmp,
1695 &domain->group_list, next) {
1696 if (!external)
1697 vfio_iommu_detach_group(domain, group);
1698 list_del(&group->next);
1699 kfree(group);
1700 }
1701
1702 if (!external)
1703 iommu_domain_free(domain->domain);
1704}
1705
1706static void vfio_iommu_type1_release(void *iommu_data)
1707{
1708 struct vfio_iommu *iommu = iommu_data;
1709 struct vfio_domain *domain, *domain_tmp;
1710
1711 if (iommu->external_domain) {
1712 vfio_release_domain(iommu->external_domain, true);
1713 vfio_sanity_check_pfn_list(iommu);
1714 kfree(iommu->external_domain);
1715 }
1716
1717 vfio_iommu_unmap_unpin_all(iommu);
1718
1719 list_for_each_entry_safe(domain, domain_tmp,
1720 &iommu->domain_list, next) {
1721 vfio_release_domain(domain, false);
1722 list_del(&domain->next);
1723 kfree(domain);
1724 }
1725 kfree(iommu);
1726}
1727
1728static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
1729{
1730 struct vfio_domain *domain;
1731 int ret = 1;
1732
1733 mutex_lock(&iommu->lock);
1734 list_for_each_entry(domain, &iommu->domain_list, next) {
1735 if (!(domain->prot & IOMMU_CACHE)) {
1736 ret = 0;
1737 break;
1738 }
1739 }
1740 mutex_unlock(&iommu->lock);
1741
1742 return ret;
1743}
1744
1745static long vfio_iommu_type1_ioctl(void *iommu_data,
1746 unsigned int cmd, unsigned long arg)
1747{
1748 struct vfio_iommu *iommu = iommu_data;
1749 unsigned long minsz;
1750
1751 if (cmd == VFIO_CHECK_EXTENSION) {
1752 switch (arg) {
1753 case VFIO_TYPE1_IOMMU:
1754 case VFIO_TYPE1v2_IOMMU:
1755 case VFIO_TYPE1_NESTING_IOMMU:
1756 return 1;
1757 case VFIO_DMA_CC_IOMMU:
1758 if (!iommu)
1759 return 0;
1760 return vfio_domains_have_iommu_cache(iommu);
1761 default:
1762 return 0;
1763 }
1764 } else if (cmd == VFIO_IOMMU_GET_INFO) {
1765 struct vfio_iommu_type1_info info;
1766
1767 minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
1768
1769 if (copy_from_user(&info, (void __user *)arg, minsz))
1770 return -EFAULT;
1771
1772 if (info.argsz < minsz)
1773 return -EINVAL;
1774
1775 info.flags = VFIO_IOMMU_INFO_PGSIZES;
1776
1777 info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
1778
1779 return copy_to_user((void __user *)arg, &info, minsz) ?
1780 -EFAULT : 0;
1781
1782 } else if (cmd == VFIO_IOMMU_MAP_DMA) {
1783 struct vfio_iommu_type1_dma_map map;
1784 uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
1785 VFIO_DMA_MAP_FLAG_WRITE;
1786
1787 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
1788
1789 if (copy_from_user(&map, (void __user *)arg, minsz))
1790 return -EFAULT;
1791
1792 if (map.argsz < minsz || map.flags & ~mask)
1793 return -EINVAL;
1794
1795 return vfio_dma_do_map(iommu, &map);
1796
1797 } else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
1798 struct vfio_iommu_type1_dma_unmap unmap;
1799 long ret;
1800
1801 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
1802
1803 if (copy_from_user(&unmap, (void __user *)arg, minsz))
1804 return -EFAULT;
1805
1806 if (unmap.argsz < minsz || unmap.flags)
1807 return -EINVAL;
1808
1809 ret = vfio_dma_do_unmap(iommu, &unmap);
1810 if (ret)
1811 return ret;
1812
1813 return copy_to_user((void __user *)arg, &unmap, minsz) ?
1814 -EFAULT : 0;
1815 }
1816
1817 return -ENOTTY;
1818}
1819
1820static int vfio_iommu_type1_register_notifier(void *iommu_data,
1821 unsigned long *events,
1822 struct notifier_block *nb)
1823{
1824 struct vfio_iommu *iommu = iommu_data;
1825
1826
1827 *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1828
1829
1830 if (*events)
1831 return -EINVAL;
1832
1833 return blocking_notifier_chain_register(&iommu->notifier, nb);
1834}
1835
1836static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
1837 struct notifier_block *nb)
1838{
1839 struct vfio_iommu *iommu = iommu_data;
1840
1841 return blocking_notifier_chain_unregister(&iommu->notifier, nb);
1842}
1843
1844static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
1845 .name = "vfio-iommu-type1",
1846 .owner = THIS_MODULE,
1847 .open = vfio_iommu_type1_open,
1848 .release = vfio_iommu_type1_release,
1849 .ioctl = vfio_iommu_type1_ioctl,
1850 .attach_group = vfio_iommu_type1_attach_group,
1851 .detach_group = vfio_iommu_type1_detach_group,
1852 .pin_pages = vfio_iommu_type1_pin_pages,
1853 .unpin_pages = vfio_iommu_type1_unpin_pages,
1854 .register_notifier = vfio_iommu_type1_register_notifier,
1855 .unregister_notifier = vfio_iommu_type1_unregister_notifier,
1856};
1857
1858static int __init vfio_iommu_type1_init(void)
1859{
1860 return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
1861}
1862
1863static void __exit vfio_iommu_type1_cleanup(void)
1864{
1865 vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1);
1866}
1867
1868module_init(vfio_iommu_type1_init);
1869module_exit(vfio_iommu_type1_cleanup);
1870
1871MODULE_VERSION(DRIVER_VERSION);
1872MODULE_LICENSE("GPL v2");
1873MODULE_AUTHOR(DRIVER_AUTHOR);
1874MODULE_DESCRIPTION(DRIVER_DESC);
1875