1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
23#include <linux/sched/mm.h>
24#include <linux/sched/signal.h>
25
26#include <asm/iommu.h>
27#include <asm/tce.h>
28#include <asm/mmu_context.h>
29
30#define DRIVER_VERSION "0.1"
31#define DRIVER_AUTHOR "aik@ozlabs.ru"
32#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
33
34static void tce_iommu_detach_group(void *iommu_data,
35 struct iommu_group *iommu_group);
36
37static long try_increment_locked_vm(struct mm_struct *mm, long npages)
38{
39 long ret = 0, locked, lock_limit;
40
41 if (WARN_ON_ONCE(!mm))
42 return -EPERM;
43
44 if (!npages)
45 return 0;
46
47 down_write(&mm->mmap_sem);
48 locked = mm->locked_vm + npages;
49 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
50 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
51 ret = -ENOMEM;
52 else
53 mm->locked_vm += npages;
54
55 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
56 npages << PAGE_SHIFT,
57 mm->locked_vm << PAGE_SHIFT,
58 rlimit(RLIMIT_MEMLOCK),
59 ret ? " - exceeded" : "");
60
61 up_write(&mm->mmap_sem);
62
63 return ret;
64}
65
66static void decrement_locked_vm(struct mm_struct *mm, long npages)
67{
68 if (!mm || !npages)
69 return;
70
71 down_write(&mm->mmap_sem);
72 if (WARN_ON_ONCE(npages > mm->locked_vm))
73 npages = mm->locked_vm;
74 mm->locked_vm -= npages;
75 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
76 npages << PAGE_SHIFT,
77 mm->locked_vm << PAGE_SHIFT,
78 rlimit(RLIMIT_MEMLOCK));
79 up_write(&mm->mmap_sem);
80}
81
82
83
84
85
86
87
88
89struct tce_iommu_group {
90 struct list_head next;
91 struct iommu_group *grp;
92};
93
94
95
96
97
98struct tce_iommu_prereg {
99 struct list_head next;
100 struct mm_iommu_table_group_mem_t *mem;
101};
102
103
104
105
106
107
108struct tce_container {
109 struct mutex lock;
110 bool enabled;
111 bool v2;
112 bool def_window_pending;
113 unsigned long locked_pages;
114 struct mm_struct *mm;
115 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
116 struct list_head group_list;
117 struct list_head prereg_list;
118};
119
120static long tce_iommu_mm_set(struct tce_container *container)
121{
122 if (container->mm) {
123 if (container->mm == current->mm)
124 return 0;
125 return -EPERM;
126 }
127 BUG_ON(!current->mm);
128 container->mm = current->mm;
129 atomic_inc(&container->mm->mm_count);
130
131 return 0;
132}
133
134static long tce_iommu_prereg_free(struct tce_container *container,
135 struct tce_iommu_prereg *tcemem)
136{
137 long ret;
138
139 ret = mm_iommu_put(container->mm, tcemem->mem);
140 if (ret)
141 return ret;
142
143 list_del(&tcemem->next);
144 kfree(tcemem);
145
146 return 0;
147}
148
149static long tce_iommu_unregister_pages(struct tce_container *container,
150 __u64 vaddr, __u64 size)
151{
152 struct mm_iommu_table_group_mem_t *mem;
153 struct tce_iommu_prereg *tcemem;
154 bool found = false;
155
156 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
157 return -EINVAL;
158
159 mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
160 if (!mem)
161 return -ENOENT;
162
163 list_for_each_entry(tcemem, &container->prereg_list, next) {
164 if (tcemem->mem == mem) {
165 found = true;
166 break;
167 }
168 }
169
170 if (!found)
171 return -ENOENT;
172
173 return tce_iommu_prereg_free(container, tcemem);
174}
175
176static long tce_iommu_register_pages(struct tce_container *container,
177 __u64 vaddr, __u64 size)
178{
179 long ret = 0;
180 struct mm_iommu_table_group_mem_t *mem = NULL;
181 struct tce_iommu_prereg *tcemem;
182 unsigned long entries = size >> PAGE_SHIFT;
183
184 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
185 ((vaddr + size) < vaddr))
186 return -EINVAL;
187
188 mem = mm_iommu_find(container->mm, vaddr, entries);
189 if (mem) {
190 list_for_each_entry(tcemem, &container->prereg_list, next) {
191 if (tcemem->mem == mem)
192 return -EBUSY;
193 }
194 }
195
196 ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
197 if (ret)
198 return ret;
199
200 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
201 if (!tcemem) {
202 mm_iommu_put(container->mm, mem);
203 return -ENOMEM;
204 }
205
206 tcemem->mem = mem;
207 list_add(&tcemem->next, &container->prereg_list);
208
209 container->enabled = true;
210
211 return 0;
212}
213
214static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
215 struct mm_struct *mm)
216{
217 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
218 tbl->it_size, PAGE_SIZE);
219 unsigned long *uas;
220 long ret;
221
222 BUG_ON(tbl->it_userspace);
223
224 ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
225 if (ret)
226 return ret;
227
228 uas = vzalloc(cb);
229 if (!uas) {
230 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
231 return -ENOMEM;
232 }
233 tbl->it_userspace = uas;
234
235 return 0;
236}
237
238static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
239 struct mm_struct *mm)
240{
241 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
242 tbl->it_size, PAGE_SIZE);
243
244 if (!tbl->it_userspace)
245 return;
246
247 vfree(tbl->it_userspace);
248 tbl->it_userspace = NULL;
249 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
250}
251
252static bool tce_page_is_contained(struct page *page, unsigned page_shift)
253{
254
255
256
257
258
259 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
260}
261
262static inline bool tce_groups_attached(struct tce_container *container)
263{
264 return !list_empty(&container->group_list);
265}
266
267static long tce_iommu_find_table(struct tce_container *container,
268 phys_addr_t ioba, struct iommu_table **ptbl)
269{
270 long i;
271
272 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
273 struct iommu_table *tbl = container->tables[i];
274
275 if (tbl) {
276 unsigned long entry = ioba >> tbl->it_page_shift;
277 unsigned long start = tbl->it_offset;
278 unsigned long end = start + tbl->it_size;
279
280 if ((start <= entry) && (entry < end)) {
281 *ptbl = tbl;
282 return i;
283 }
284 }
285 }
286
287 return -1;
288}
289
290static int tce_iommu_find_free_table(struct tce_container *container)
291{
292 int i;
293
294 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
295 if (!container->tables[i])
296 return i;
297 }
298
299 return -ENOSPC;
300}
301
302static int tce_iommu_enable(struct tce_container *container)
303{
304 int ret = 0;
305 unsigned long locked;
306 struct iommu_table_group *table_group;
307 struct tce_iommu_group *tcegrp;
308
309 if (container->enabled)
310 return -EBUSY;
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 if (!tce_groups_attached(container))
342 return -ENODEV;
343
344 tcegrp = list_first_entry(&container->group_list,
345 struct tce_iommu_group, next);
346 table_group = iommu_group_get_iommudata(tcegrp->grp);
347 if (!table_group)
348 return -ENODEV;
349
350 if (!table_group->tce32_size)
351 return -EPERM;
352
353 ret = tce_iommu_mm_set(container);
354 if (ret)
355 return ret;
356
357 locked = table_group->tce32_size >> PAGE_SHIFT;
358 ret = try_increment_locked_vm(container->mm, locked);
359 if (ret)
360 return ret;
361
362 container->locked_pages = locked;
363
364 container->enabled = true;
365
366 return ret;
367}
368
369static void tce_iommu_disable(struct tce_container *container)
370{
371 if (!container->enabled)
372 return;
373
374 container->enabled = false;
375
376 BUG_ON(!container->mm);
377 decrement_locked_vm(container->mm, container->locked_pages);
378}
379
380static void *tce_iommu_open(unsigned long arg)
381{
382 struct tce_container *container;
383
384 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
385 pr_err("tce_vfio: Wrong IOMMU type\n");
386 return ERR_PTR(-EINVAL);
387 }
388
389 container = kzalloc(sizeof(*container), GFP_KERNEL);
390 if (!container)
391 return ERR_PTR(-ENOMEM);
392
393 mutex_init(&container->lock);
394 INIT_LIST_HEAD_RCU(&container->group_list);
395 INIT_LIST_HEAD_RCU(&container->prereg_list);
396
397 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
398
399 return container;
400}
401
402static int tce_iommu_clear(struct tce_container *container,
403 struct iommu_table *tbl,
404 unsigned long entry, unsigned long pages);
405static void tce_iommu_free_table(struct tce_container *container,
406 struct iommu_table *tbl);
407
408static void tce_iommu_release(void *iommu_data)
409{
410 struct tce_container *container = iommu_data;
411 struct tce_iommu_group *tcegrp;
412 long i;
413
414 while (tce_groups_attached(container)) {
415 tcegrp = list_first_entry(&container->group_list,
416 struct tce_iommu_group, next);
417 tce_iommu_detach_group(iommu_data, tcegrp->grp);
418 }
419
420
421
422
423
424 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
425 struct iommu_table *tbl = container->tables[i];
426
427 if (!tbl)
428 continue;
429
430 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
431 tce_iommu_free_table(container, tbl);
432 }
433
434 while (!list_empty(&container->prereg_list)) {
435 struct tce_iommu_prereg *tcemem;
436
437 tcemem = list_first_entry(&container->prereg_list,
438 struct tce_iommu_prereg, next);
439 WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
440 }
441
442 tce_iommu_disable(container);
443 if (container->mm)
444 mmdrop(container->mm);
445 mutex_destroy(&container->lock);
446
447 kfree(container);
448}
449
450static void tce_iommu_unuse_page(struct tce_container *container,
451 unsigned long hpa)
452{
453 struct page *page;
454
455 page = pfn_to_page(hpa >> PAGE_SHIFT);
456 put_page(page);
457}
458
459static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
460 unsigned long tce, unsigned long size,
461 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
462{
463 long ret = 0;
464 struct mm_iommu_table_group_mem_t *mem;
465
466 mem = mm_iommu_lookup(container->mm, tce, size);
467 if (!mem)
468 return -EINVAL;
469
470 ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
471 if (ret)
472 return -EINVAL;
473
474 *pmem = mem;
475
476 return 0;
477}
478
479static void tce_iommu_unuse_page_v2(struct tce_container *container,
480 struct iommu_table *tbl, unsigned long entry)
481{
482 struct mm_iommu_table_group_mem_t *mem = NULL;
483 int ret;
484 unsigned long hpa = 0;
485 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
486
487 if (!pua)
488 return;
489
490 ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl),
491 &hpa, &mem);
492 if (ret)
493 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
494 __func__, *pua, entry, ret);
495 if (mem)
496 mm_iommu_mapped_dec(mem);
497
498 *pua = 0;
499}
500
501static int tce_iommu_clear(struct tce_container *container,
502 struct iommu_table *tbl,
503 unsigned long entry, unsigned long pages)
504{
505 unsigned long oldhpa;
506 long ret;
507 enum dma_data_direction direction;
508
509 for ( ; pages; --pages, ++entry) {
510 cond_resched();
511
512 direction = DMA_NONE;
513 oldhpa = 0;
514 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
515 if (ret)
516 continue;
517
518 if (direction == DMA_NONE)
519 continue;
520
521 if (container->v2) {
522 tce_iommu_unuse_page_v2(container, tbl, entry);
523 continue;
524 }
525
526 tce_iommu_unuse_page(container, oldhpa);
527 }
528
529 return 0;
530}
531
532static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
533{
534 struct page *page = NULL;
535 enum dma_data_direction direction = iommu_tce_direction(tce);
536
537 if (get_user_pages_fast(tce & PAGE_MASK, 1,
538 direction != DMA_TO_DEVICE, &page) != 1)
539 return -EFAULT;
540
541 *hpa = __pa((unsigned long) page_address(page));
542
543 return 0;
544}
545
546static long tce_iommu_build(struct tce_container *container,
547 struct iommu_table *tbl,
548 unsigned long entry, unsigned long tce, unsigned long pages,
549 enum dma_data_direction direction)
550{
551 long i, ret = 0;
552 struct page *page;
553 unsigned long hpa;
554 enum dma_data_direction dirtmp;
555
556 for (i = 0; i < pages; ++i) {
557 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
558
559 ret = tce_iommu_use_page(tce, &hpa);
560 if (ret)
561 break;
562
563 page = pfn_to_page(hpa >> PAGE_SHIFT);
564 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
565 ret = -EPERM;
566 break;
567 }
568
569 hpa |= offset;
570 dirtmp = direction;
571 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
572 if (ret) {
573 tce_iommu_unuse_page(container, hpa);
574 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
575 __func__, entry << tbl->it_page_shift,
576 tce, ret);
577 break;
578 }
579
580 if (dirtmp != DMA_NONE)
581 tce_iommu_unuse_page(container, hpa);
582
583 tce += IOMMU_PAGE_SIZE(tbl);
584 }
585
586 if (ret)
587 tce_iommu_clear(container, tbl, entry, i);
588
589 return ret;
590}
591
592static long tce_iommu_build_v2(struct tce_container *container,
593 struct iommu_table *tbl,
594 unsigned long entry, unsigned long tce, unsigned long pages,
595 enum dma_data_direction direction)
596{
597 long i, ret = 0;
598 struct page *page;
599 unsigned long hpa;
600 enum dma_data_direction dirtmp;
601
602 if (!tbl->it_userspace) {
603 ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
604 if (ret)
605 return ret;
606 }
607
608 for (i = 0; i < pages; ++i) {
609 struct mm_iommu_table_group_mem_t *mem = NULL;
610 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
611 entry + i);
612
613 ret = tce_iommu_prereg_ua_to_hpa(container,
614 tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
615 if (ret)
616 break;
617
618 page = pfn_to_page(hpa >> PAGE_SHIFT);
619 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
620 ret = -EPERM;
621 break;
622 }
623
624
625 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
626 dirtmp = direction;
627
628
629 if (mm_iommu_mapped_inc(mem))
630 break;
631
632 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
633 if (ret) {
634
635 tce_iommu_unuse_page_v2(container, tbl, entry + i);
636 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
637 __func__, entry << tbl->it_page_shift,
638 tce, ret);
639 break;
640 }
641
642 if (dirtmp != DMA_NONE)
643 tce_iommu_unuse_page_v2(container, tbl, entry + i);
644
645 *pua = tce;
646
647 tce += IOMMU_PAGE_SIZE(tbl);
648 }
649
650 if (ret)
651 tce_iommu_clear(container, tbl, entry, i);
652
653 return ret;
654}
655
656static long tce_iommu_create_table(struct tce_container *container,
657 struct iommu_table_group *table_group,
658 int num,
659 __u32 page_shift,
660 __u64 window_size,
661 __u32 levels,
662 struct iommu_table **ptbl)
663{
664 long ret, table_size;
665
666 table_size = table_group->ops->get_table_size(page_shift, window_size,
667 levels);
668 if (!table_size)
669 return -EINVAL;
670
671 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
672 if (ret)
673 return ret;
674
675 ret = table_group->ops->create_table(table_group, num,
676 page_shift, window_size, levels, ptbl);
677
678 WARN_ON(!ret && !(*ptbl)->it_ops->free);
679 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
680
681 return ret;
682}
683
684static void tce_iommu_free_table(struct tce_container *container,
685 struct iommu_table *tbl)
686{
687 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
688
689 tce_iommu_userspace_view_free(tbl, container->mm);
690 iommu_tce_table_put(tbl);
691 decrement_locked_vm(container->mm, pages);
692}
693
694static long tce_iommu_create_window(struct tce_container *container,
695 __u32 page_shift, __u64 window_size, __u32 levels,
696 __u64 *start_addr)
697{
698 struct tce_iommu_group *tcegrp;
699 struct iommu_table_group *table_group;
700 struct iommu_table *tbl = NULL;
701 long ret, num;
702
703 num = tce_iommu_find_free_table(container);
704 if (num < 0)
705 return num;
706
707
708 tcegrp = list_first_entry(&container->group_list,
709 struct tce_iommu_group, next);
710 table_group = iommu_group_get_iommudata(tcegrp->grp);
711 if (!table_group)
712 return -EFAULT;
713
714 if (!(table_group->pgsizes & (1ULL << page_shift)))
715 return -EINVAL;
716
717 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
718 !table_group->ops->get_table_size ||
719 !table_group->ops->create_table)
720 return -EPERM;
721
722
723 ret = tce_iommu_create_table(container, table_group, num,
724 page_shift, window_size, levels, &tbl);
725 if (ret)
726 return ret;
727
728 BUG_ON(!tbl->it_ops->free);
729
730
731
732
733
734 list_for_each_entry(tcegrp, &container->group_list, next) {
735 table_group = iommu_group_get_iommudata(tcegrp->grp);
736
737 ret = table_group->ops->set_window(table_group, num, tbl);
738 if (ret)
739 goto unset_exit;
740 }
741
742 container->tables[num] = tbl;
743
744
745 *start_addr = tbl->it_offset << tbl->it_page_shift;
746
747 return 0;
748
749unset_exit:
750 list_for_each_entry(tcegrp, &container->group_list, next) {
751 table_group = iommu_group_get_iommudata(tcegrp->grp);
752 table_group->ops->unset_window(table_group, num);
753 }
754 tce_iommu_free_table(container, tbl);
755
756 return ret;
757}
758
759static long tce_iommu_remove_window(struct tce_container *container,
760 __u64 start_addr)
761{
762 struct iommu_table_group *table_group = NULL;
763 struct iommu_table *tbl;
764 struct tce_iommu_group *tcegrp;
765 int num;
766
767 num = tce_iommu_find_table(container, start_addr, &tbl);
768 if (num < 0)
769 return -EINVAL;
770
771 BUG_ON(!tbl->it_size);
772
773
774 list_for_each_entry(tcegrp, &container->group_list, next) {
775 table_group = iommu_group_get_iommudata(tcegrp->grp);
776
777
778
779
780
781
782
783
784 if (!table_group->ops || !table_group->ops->unset_window)
785 return -EPERM;
786
787 table_group->ops->unset_window(table_group, num);
788 }
789
790
791 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
792 tce_iommu_free_table(container, tbl);
793 container->tables[num] = NULL;
794
795 return 0;
796}
797
798static long tce_iommu_create_default_window(struct tce_container *container)
799{
800 long ret;
801 __u64 start_addr = 0;
802 struct tce_iommu_group *tcegrp;
803 struct iommu_table_group *table_group;
804
805 if (!container->def_window_pending)
806 return 0;
807
808 if (!tce_groups_attached(container))
809 return -ENODEV;
810
811 tcegrp = list_first_entry(&container->group_list,
812 struct tce_iommu_group, next);
813 table_group = iommu_group_get_iommudata(tcegrp->grp);
814 if (!table_group)
815 return -ENODEV;
816
817 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
818 table_group->tce32_size, 1, &start_addr);
819 WARN_ON_ONCE(!ret && start_addr);
820
821 if (!ret)
822 container->def_window_pending = false;
823
824 return ret;
825}
826
827static long tce_iommu_ioctl(void *iommu_data,
828 unsigned int cmd, unsigned long arg)
829{
830 struct tce_container *container = iommu_data;
831 unsigned long minsz, ddwsz;
832 long ret;
833
834 switch (cmd) {
835 case VFIO_CHECK_EXTENSION:
836 switch (arg) {
837 case VFIO_SPAPR_TCE_IOMMU:
838 case VFIO_SPAPR_TCE_v2_IOMMU:
839 ret = 1;
840 break;
841 default:
842 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
843 break;
844 }
845
846 return (ret < 0) ? 0 : ret;
847 }
848
849
850
851
852
853 BUG_ON(!container);
854 if (container->mm && container->mm != current->mm)
855 return -EPERM;
856
857 switch (cmd) {
858 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
859 struct vfio_iommu_spapr_tce_info info;
860 struct tce_iommu_group *tcegrp;
861 struct iommu_table_group *table_group;
862
863 if (!tce_groups_attached(container))
864 return -ENXIO;
865
866 tcegrp = list_first_entry(&container->group_list,
867 struct tce_iommu_group, next);
868 table_group = iommu_group_get_iommudata(tcegrp->grp);
869
870 if (!table_group)
871 return -ENXIO;
872
873 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
874 dma32_window_size);
875
876 if (copy_from_user(&info, (void __user *)arg, minsz))
877 return -EFAULT;
878
879 if (info.argsz < minsz)
880 return -EINVAL;
881
882 info.dma32_window_start = table_group->tce32_start;
883 info.dma32_window_size = table_group->tce32_size;
884 info.flags = 0;
885 memset(&info.ddw, 0, sizeof(info.ddw));
886
887 if (table_group->max_dynamic_windows_supported &&
888 container->v2) {
889 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
890 info.ddw.pgsizes = table_group->pgsizes;
891 info.ddw.max_dynamic_windows_supported =
892 table_group->max_dynamic_windows_supported;
893 info.ddw.levels = table_group->max_levels;
894 }
895
896 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
897
898 if (info.argsz >= ddwsz)
899 minsz = ddwsz;
900
901 if (copy_to_user((void __user *)arg, &info, minsz))
902 return -EFAULT;
903
904 return 0;
905 }
906 case VFIO_IOMMU_MAP_DMA: {
907 struct vfio_iommu_type1_dma_map param;
908 struct iommu_table *tbl = NULL;
909 long num;
910 enum dma_data_direction direction;
911
912 if (!container->enabled)
913 return -EPERM;
914
915 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
916
917 if (copy_from_user(¶m, (void __user *)arg, minsz))
918 return -EFAULT;
919
920 if (param.argsz < minsz)
921 return -EINVAL;
922
923 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
924 VFIO_DMA_MAP_FLAG_WRITE))
925 return -EINVAL;
926
927 ret = tce_iommu_create_default_window(container);
928 if (ret)
929 return ret;
930
931 num = tce_iommu_find_table(container, param.iova, &tbl);
932 if (num < 0)
933 return -ENXIO;
934
935 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
936 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
937 return -EINVAL;
938
939
940 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
941 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
942 direction = DMA_BIDIRECTIONAL;
943 else
944 direction = DMA_TO_DEVICE;
945 } else {
946 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
947 direction = DMA_FROM_DEVICE;
948 else
949 return -EINVAL;
950 }
951
952 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
953 if (ret)
954 return ret;
955
956 if (container->v2)
957 ret = tce_iommu_build_v2(container, tbl,
958 param.iova >> tbl->it_page_shift,
959 param.vaddr,
960 param.size >> tbl->it_page_shift,
961 direction);
962 else
963 ret = tce_iommu_build(container, tbl,
964 param.iova >> tbl->it_page_shift,
965 param.vaddr,
966 param.size >> tbl->it_page_shift,
967 direction);
968
969 iommu_flush_tce(tbl);
970
971 return ret;
972 }
973 case VFIO_IOMMU_UNMAP_DMA: {
974 struct vfio_iommu_type1_dma_unmap param;
975 struct iommu_table *tbl = NULL;
976 long num;
977
978 if (!container->enabled)
979 return -EPERM;
980
981 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
982 size);
983
984 if (copy_from_user(¶m, (void __user *)arg, minsz))
985 return -EFAULT;
986
987 if (param.argsz < minsz)
988 return -EINVAL;
989
990
991 if (param.flags)
992 return -EINVAL;
993
994 ret = tce_iommu_create_default_window(container);
995 if (ret)
996 return ret;
997
998 num = tce_iommu_find_table(container, param.iova, &tbl);
999 if (num < 0)
1000 return -ENXIO;
1001
1002 if (param.size & ~IOMMU_PAGE_MASK(tbl))
1003 return -EINVAL;
1004
1005 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
1006 param.size >> tbl->it_page_shift);
1007 if (ret)
1008 return ret;
1009
1010 ret = tce_iommu_clear(container, tbl,
1011 param.iova >> tbl->it_page_shift,
1012 param.size >> tbl->it_page_shift);
1013 iommu_flush_tce(tbl);
1014
1015 return ret;
1016 }
1017 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
1018 struct vfio_iommu_spapr_register_memory param;
1019
1020 if (!container->v2)
1021 break;
1022
1023 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1024 size);
1025
1026 ret = tce_iommu_mm_set(container);
1027 if (ret)
1028 return ret;
1029
1030 if (copy_from_user(¶m, (void __user *)arg, minsz))
1031 return -EFAULT;
1032
1033 if (param.argsz < minsz)
1034 return -EINVAL;
1035
1036
1037 if (param.flags)
1038 return -EINVAL;
1039
1040 mutex_lock(&container->lock);
1041 ret = tce_iommu_register_pages(container, param.vaddr,
1042 param.size);
1043 mutex_unlock(&container->lock);
1044
1045 return ret;
1046 }
1047 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1048 struct vfio_iommu_spapr_register_memory param;
1049
1050 if (!container->v2)
1051 break;
1052
1053 if (!container->mm)
1054 return -EPERM;
1055
1056 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1057 size);
1058
1059 if (copy_from_user(¶m, (void __user *)arg, minsz))
1060 return -EFAULT;
1061
1062 if (param.argsz < minsz)
1063 return -EINVAL;
1064
1065
1066 if (param.flags)
1067 return -EINVAL;
1068
1069 mutex_lock(&container->lock);
1070 ret = tce_iommu_unregister_pages(container, param.vaddr,
1071 param.size);
1072 mutex_unlock(&container->lock);
1073
1074 return ret;
1075 }
1076 case VFIO_IOMMU_ENABLE:
1077 if (container->v2)
1078 break;
1079
1080 mutex_lock(&container->lock);
1081 ret = tce_iommu_enable(container);
1082 mutex_unlock(&container->lock);
1083 return ret;
1084
1085
1086 case VFIO_IOMMU_DISABLE:
1087 if (container->v2)
1088 break;
1089
1090 mutex_lock(&container->lock);
1091 tce_iommu_disable(container);
1092 mutex_unlock(&container->lock);
1093 return 0;
1094
1095 case VFIO_EEH_PE_OP: {
1096 struct tce_iommu_group *tcegrp;
1097
1098 ret = 0;
1099 list_for_each_entry(tcegrp, &container->group_list, next) {
1100 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1101 cmd, arg);
1102 if (ret)
1103 return ret;
1104 }
1105 return ret;
1106 }
1107
1108 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1109 struct vfio_iommu_spapr_tce_create create;
1110
1111 if (!container->v2)
1112 break;
1113
1114 ret = tce_iommu_mm_set(container);
1115 if (ret)
1116 return ret;
1117
1118 if (!tce_groups_attached(container))
1119 return -ENXIO;
1120
1121 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1122 start_addr);
1123
1124 if (copy_from_user(&create, (void __user *)arg, minsz))
1125 return -EFAULT;
1126
1127 if (create.argsz < minsz)
1128 return -EINVAL;
1129
1130 if (create.flags)
1131 return -EINVAL;
1132
1133 mutex_lock(&container->lock);
1134
1135 ret = tce_iommu_create_default_window(container);
1136 if (!ret)
1137 ret = tce_iommu_create_window(container,
1138 create.page_shift,
1139 create.window_size, create.levels,
1140 &create.start_addr);
1141
1142 mutex_unlock(&container->lock);
1143
1144 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1145 ret = -EFAULT;
1146
1147 return ret;
1148 }
1149 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1150 struct vfio_iommu_spapr_tce_remove remove;
1151
1152 if (!container->v2)
1153 break;
1154
1155 ret = tce_iommu_mm_set(container);
1156 if (ret)
1157 return ret;
1158
1159 if (!tce_groups_attached(container))
1160 return -ENXIO;
1161
1162 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1163 start_addr);
1164
1165 if (copy_from_user(&remove, (void __user *)arg, minsz))
1166 return -EFAULT;
1167
1168 if (remove.argsz < minsz)
1169 return -EINVAL;
1170
1171 if (remove.flags)
1172 return -EINVAL;
1173
1174 if (container->def_window_pending && !remove.start_addr) {
1175 container->def_window_pending = false;
1176 return 0;
1177 }
1178
1179 mutex_lock(&container->lock);
1180
1181 ret = tce_iommu_remove_window(container, remove.start_addr);
1182
1183 mutex_unlock(&container->lock);
1184
1185 return ret;
1186 }
1187 }
1188
1189 return -ENOTTY;
1190}
1191
1192static void tce_iommu_release_ownership(struct tce_container *container,
1193 struct iommu_table_group *table_group)
1194{
1195 int i;
1196
1197 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1198 struct iommu_table *tbl = container->tables[i];
1199
1200 if (!tbl)
1201 continue;
1202
1203 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1204 tce_iommu_userspace_view_free(tbl, container->mm);
1205 if (tbl->it_map)
1206 iommu_release_ownership(tbl);
1207
1208 container->tables[i] = NULL;
1209 }
1210}
1211
1212static int tce_iommu_take_ownership(struct tce_container *container,
1213 struct iommu_table_group *table_group)
1214{
1215 int i, j, rc = 0;
1216
1217 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1218 struct iommu_table *tbl = table_group->tables[i];
1219
1220 if (!tbl || !tbl->it_map)
1221 continue;
1222
1223 rc = iommu_take_ownership(tbl);
1224 if (rc) {
1225 for (j = 0; j < i; ++j)
1226 iommu_release_ownership(
1227 table_group->tables[j]);
1228
1229 return rc;
1230 }
1231 }
1232
1233 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1234 container->tables[i] = table_group->tables[i];
1235
1236 return 0;
1237}
1238
1239static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1240 struct iommu_table_group *table_group)
1241{
1242 long i;
1243
1244 if (!table_group->ops->unset_window) {
1245 WARN_ON_ONCE(1);
1246 return;
1247 }
1248
1249 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1250 table_group->ops->unset_window(table_group, i);
1251
1252 table_group->ops->release_ownership(table_group);
1253}
1254
1255static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1256 struct iommu_table_group *table_group)
1257{
1258 long i, ret = 0;
1259
1260 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1261 !table_group->ops->release_ownership) {
1262 WARN_ON_ONCE(1);
1263 return -EFAULT;
1264 }
1265
1266 table_group->ops->take_ownership(table_group);
1267
1268
1269 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1270 struct iommu_table *tbl = container->tables[i];
1271
1272 if (!tbl)
1273 continue;
1274
1275 ret = table_group->ops->set_window(table_group, i, tbl);
1276 if (ret)
1277 goto release_exit;
1278 }
1279
1280 return 0;
1281
1282release_exit:
1283 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1284 table_group->ops->unset_window(table_group, i);
1285
1286 table_group->ops->release_ownership(table_group);
1287
1288 return ret;
1289}
1290
1291static int tce_iommu_attach_group(void *iommu_data,
1292 struct iommu_group *iommu_group)
1293{
1294 int ret;
1295 struct tce_container *container = iommu_data;
1296 struct iommu_table_group *table_group;
1297 struct tce_iommu_group *tcegrp = NULL;
1298
1299 mutex_lock(&container->lock);
1300
1301
1302
1303 table_group = iommu_group_get_iommudata(iommu_group);
1304 if (!table_group) {
1305 ret = -ENODEV;
1306 goto unlock_exit;
1307 }
1308
1309 if (tce_groups_attached(container) && (!table_group->ops ||
1310 !table_group->ops->take_ownership ||
1311 !table_group->ops->release_ownership)) {
1312 ret = -EBUSY;
1313 goto unlock_exit;
1314 }
1315
1316
1317 list_for_each_entry(tcegrp, &container->group_list, next) {
1318 struct iommu_table_group *table_group_tmp;
1319
1320 if (tcegrp->grp == iommu_group) {
1321 pr_warn("tce_vfio: Group %d is already attached\n",
1322 iommu_group_id(iommu_group));
1323 ret = -EBUSY;
1324 goto unlock_exit;
1325 }
1326 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1327 if (table_group_tmp->ops->create_table !=
1328 table_group->ops->create_table) {
1329 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1330 iommu_group_id(iommu_group),
1331 iommu_group_id(tcegrp->grp));
1332 ret = -EPERM;
1333 goto unlock_exit;
1334 }
1335 }
1336
1337 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1338 if (!tcegrp) {
1339 ret = -ENOMEM;
1340 goto unlock_exit;
1341 }
1342
1343 if (!table_group->ops || !table_group->ops->take_ownership ||
1344 !table_group->ops->release_ownership) {
1345 if (container->v2) {
1346 ret = -EPERM;
1347 goto unlock_exit;
1348 }
1349 ret = tce_iommu_take_ownership(container, table_group);
1350 } else {
1351 if (!container->v2) {
1352 ret = -EPERM;
1353 goto unlock_exit;
1354 }
1355 ret = tce_iommu_take_ownership_ddw(container, table_group);
1356 if (!tce_groups_attached(container) && !container->tables[0])
1357 container->def_window_pending = true;
1358 }
1359
1360 if (!ret) {
1361 tcegrp->grp = iommu_group;
1362 list_add(&tcegrp->next, &container->group_list);
1363 }
1364
1365unlock_exit:
1366 if (ret && tcegrp)
1367 kfree(tcegrp);
1368
1369 mutex_unlock(&container->lock);
1370
1371 return ret;
1372}
1373
1374static void tce_iommu_detach_group(void *iommu_data,
1375 struct iommu_group *iommu_group)
1376{
1377 struct tce_container *container = iommu_data;
1378 struct iommu_table_group *table_group;
1379 bool found = false;
1380 struct tce_iommu_group *tcegrp;
1381
1382 mutex_lock(&container->lock);
1383
1384 list_for_each_entry(tcegrp, &container->group_list, next) {
1385 if (tcegrp->grp == iommu_group) {
1386 found = true;
1387 break;
1388 }
1389 }
1390
1391 if (!found) {
1392 pr_warn("tce_vfio: detaching unattached group #%u\n",
1393 iommu_group_id(iommu_group));
1394 goto unlock_exit;
1395 }
1396
1397 list_del(&tcegrp->next);
1398 kfree(tcegrp);
1399
1400 table_group = iommu_group_get_iommudata(iommu_group);
1401 BUG_ON(!table_group);
1402
1403 if (!table_group->ops || !table_group->ops->release_ownership)
1404 tce_iommu_release_ownership(container, table_group);
1405 else
1406 tce_iommu_release_ownership_ddw(container, table_group);
1407
1408unlock_exit:
1409 mutex_unlock(&container->lock);
1410}
1411
1412const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1413 .name = "iommu-vfio-powerpc",
1414 .owner = THIS_MODULE,
1415 .open = tce_iommu_open,
1416 .release = tce_iommu_release,
1417 .ioctl = tce_iommu_ioctl,
1418 .attach_group = tce_iommu_attach_group,
1419 .detach_group = tce_iommu_detach_group,
1420};
1421
1422static int __init tce_iommu_init(void)
1423{
1424 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1425}
1426
1427static void __exit tce_iommu_cleanup(void)
1428{
1429 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1430}
1431
1432module_init(tce_iommu_init);
1433module_exit(tce_iommu_cleanup);
1434
1435MODULE_VERSION(DRIVER_VERSION);
1436MODULE_LICENSE("GPL v2");
1437MODULE_AUTHOR(DRIVER_AUTHOR);
1438MODULE_DESCRIPTION(DRIVER_DESC);
1439
1440