1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
23#include <linux/sched/mm.h>
24#include <linux/sched/signal.h>
25#include <linux/mm.h>
26
27#include <asm/iommu.h>
28#include <asm/tce.h>
29#include <asm/mmu_context.h>
30
31#define DRIVER_VERSION "0.1"
32#define DRIVER_AUTHOR "aik@ozlabs.ru"
33#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
34
35static void tce_iommu_detach_group(void *iommu_data,
36 struct iommu_group *iommu_group);
37
38
39
40
41
42
43
44
45struct tce_iommu_group {
46 struct list_head next;
47 struct iommu_group *grp;
48};
49
50
51
52
53
54struct tce_iommu_prereg {
55 struct list_head next;
56 struct mm_iommu_table_group_mem_t *mem;
57};
58
59
60
61
62
63
64struct tce_container {
65 struct mutex lock;
66 bool enabled;
67 bool v2;
68 bool def_window_pending;
69 unsigned long locked_pages;
70 struct mm_struct *mm;
71 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
72 struct list_head group_list;
73 struct list_head prereg_list;
74};
75
76static long tce_iommu_mm_set(struct tce_container *container)
77{
78 if (container->mm) {
79 if (container->mm == current->mm)
80 return 0;
81 return -EPERM;
82 }
83 BUG_ON(!current->mm);
84 container->mm = current->mm;
85 atomic_inc(&container->mm->mm_count);
86
87 return 0;
88}
89
90static long tce_iommu_prereg_free(struct tce_container *container,
91 struct tce_iommu_prereg *tcemem)
92{
93 long ret;
94
95 ret = mm_iommu_put(container->mm, tcemem->mem);
96 if (ret)
97 return ret;
98
99 list_del(&tcemem->next);
100 kfree(tcemem);
101
102 return 0;
103}
104
105static long tce_iommu_unregister_pages(struct tce_container *container,
106 __u64 vaddr, __u64 size)
107{
108 struct mm_iommu_table_group_mem_t *mem;
109 struct tce_iommu_prereg *tcemem;
110 bool found = false;
111 long ret;
112
113 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
114 return -EINVAL;
115
116 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
117 if (!mem)
118 return -ENOENT;
119
120 list_for_each_entry(tcemem, &container->prereg_list, next) {
121 if (tcemem->mem == mem) {
122 found = true;
123 break;
124 }
125 }
126
127 if (!found)
128 ret = -ENOENT;
129 else
130 ret = tce_iommu_prereg_free(container, tcemem);
131
132 mm_iommu_put(container->mm, mem);
133
134 return ret;
135}
136
137static long tce_iommu_register_pages(struct tce_container *container,
138 __u64 vaddr, __u64 size)
139{
140 long ret = 0;
141 struct mm_iommu_table_group_mem_t *mem = NULL;
142 struct tce_iommu_prereg *tcemem;
143 unsigned long entries = size >> PAGE_SHIFT;
144
145 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
146 ((vaddr + size) < vaddr))
147 return -EINVAL;
148
149 mem = mm_iommu_get(container->mm, vaddr, entries);
150 if (mem) {
151 list_for_each_entry(tcemem, &container->prereg_list, next) {
152 if (tcemem->mem == mem) {
153 ret = -EBUSY;
154 goto put_exit;
155 }
156 }
157 } else {
158 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
159 if (ret)
160 return ret;
161 }
162
163 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
164 if (!tcemem) {
165 ret = -ENOMEM;
166 goto put_exit;
167 }
168
169 tcemem->mem = mem;
170 list_add(&tcemem->next, &container->prereg_list);
171
172 container->enabled = true;
173
174 return 0;
175
176put_exit:
177 mm_iommu_put(container->mm, mem);
178 return ret;
179}
180
181static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
182 unsigned int page_shift)
183{
184 struct page *page;
185 unsigned long size = 0;
186
187 if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
188 return size == (1UL << page_shift);
189
190 page = pfn_to_page(hpa >> PAGE_SHIFT);
191
192
193
194
195
196 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
197}
198
199static inline bool tce_groups_attached(struct tce_container *container)
200{
201 return !list_empty(&container->group_list);
202}
203
204static long tce_iommu_find_table(struct tce_container *container,
205 phys_addr_t ioba, struct iommu_table **ptbl)
206{
207 long i;
208
209 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
210 struct iommu_table *tbl = container->tables[i];
211
212 if (tbl) {
213 unsigned long entry = ioba >> tbl->it_page_shift;
214 unsigned long start = tbl->it_offset;
215 unsigned long end = start + tbl->it_size;
216
217 if ((start <= entry) && (entry < end)) {
218 *ptbl = tbl;
219 return i;
220 }
221 }
222 }
223
224 return -1;
225}
226
227static int tce_iommu_find_free_table(struct tce_container *container)
228{
229 int i;
230
231 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
232 if (!container->tables[i])
233 return i;
234 }
235
236 return -ENOSPC;
237}
238
239static int tce_iommu_enable(struct tce_container *container)
240{
241 int ret = 0;
242 unsigned long locked;
243 struct iommu_table_group *table_group;
244 struct tce_iommu_group *tcegrp;
245
246 if (container->enabled)
247 return -EBUSY;
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278 if (!tce_groups_attached(container))
279 return -ENODEV;
280
281 tcegrp = list_first_entry(&container->group_list,
282 struct tce_iommu_group, next);
283 table_group = iommu_group_get_iommudata(tcegrp->grp);
284 if (!table_group)
285 return -ENODEV;
286
287 if (!table_group->tce32_size)
288 return -EPERM;
289
290 ret = tce_iommu_mm_set(container);
291 if (ret)
292 return ret;
293
294 locked = table_group->tce32_size >> PAGE_SHIFT;
295 ret = account_locked_vm(container->mm, locked, true);
296 if (ret)
297 return ret;
298
299 container->locked_pages = locked;
300
301 container->enabled = true;
302
303 return ret;
304}
305
306static void tce_iommu_disable(struct tce_container *container)
307{
308 if (!container->enabled)
309 return;
310
311 container->enabled = false;
312
313 BUG_ON(!container->mm);
314 account_locked_vm(container->mm, container->locked_pages, false);
315}
316
317static void *tce_iommu_open(unsigned long arg)
318{
319 struct tce_container *container;
320
321 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
322 pr_err("tce_vfio: Wrong IOMMU type\n");
323 return ERR_PTR(-EINVAL);
324 }
325
326 container = kzalloc(sizeof(*container), GFP_KERNEL);
327 if (!container)
328 return ERR_PTR(-ENOMEM);
329
330 mutex_init(&container->lock);
331 INIT_LIST_HEAD_RCU(&container->group_list);
332 INIT_LIST_HEAD_RCU(&container->prereg_list);
333
334 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
335
336 return container;
337}
338
339static int tce_iommu_clear(struct tce_container *container,
340 struct iommu_table *tbl,
341 unsigned long entry, unsigned long pages);
342static void tce_iommu_free_table(struct tce_container *container,
343 struct iommu_table *tbl);
344
345static void tce_iommu_release(void *iommu_data)
346{
347 struct tce_container *container = iommu_data;
348 struct tce_iommu_group *tcegrp;
349 long i;
350
351 while (tce_groups_attached(container)) {
352 tcegrp = list_first_entry(&container->group_list,
353 struct tce_iommu_group, next);
354 tce_iommu_detach_group(iommu_data, tcegrp->grp);
355 }
356
357
358
359
360
361 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
362 struct iommu_table *tbl = container->tables[i];
363
364 if (!tbl)
365 continue;
366
367 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
368 tce_iommu_free_table(container, tbl);
369 }
370
371 while (!list_empty(&container->prereg_list)) {
372 struct tce_iommu_prereg *tcemem;
373
374 tcemem = list_first_entry(&container->prereg_list,
375 struct tce_iommu_prereg, next);
376 WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
377 }
378
379 tce_iommu_disable(container);
380 if (container->mm)
381 mmdrop(container->mm);
382 mutex_destroy(&container->lock);
383
384 kfree(container);
385}
386
387static void tce_iommu_unuse_page(struct tce_container *container,
388 unsigned long hpa)
389{
390 struct page *page;
391
392 page = pfn_to_page(hpa >> PAGE_SHIFT);
393 put_page(page);
394}
395
396static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
397 unsigned long tce, unsigned long shift,
398 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
399{
400 long ret = 0;
401 struct mm_iommu_table_group_mem_t *mem;
402
403 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
404 if (!mem)
405 return -EINVAL;
406
407 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
408 if (ret)
409 return -EINVAL;
410
411 *pmem = mem;
412
413 return 0;
414}
415
416static void tce_iommu_unuse_page_v2(struct tce_container *container,
417 struct iommu_table *tbl, unsigned long entry)
418{
419 struct mm_iommu_table_group_mem_t *mem = NULL;
420 int ret;
421 unsigned long hpa = 0;
422 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
423
424 if (!pua)
425 return;
426
427 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), tbl->it_page_shift,
428 &hpa, &mem);
429 if (ret)
430 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
431 __func__, be64_to_cpu(*pua), entry, ret);
432 if (mem)
433 mm_iommu_mapped_dec(mem);
434
435 *pua = cpu_to_be64(0);
436}
437
438static int tce_iommu_clear(struct tce_container *container,
439 struct iommu_table *tbl,
440 unsigned long entry, unsigned long pages)
441{
442 unsigned long oldhpa;
443 long ret;
444 enum dma_data_direction direction;
445 unsigned long lastentry = entry + pages;
446
447 for ( ; entry < lastentry; ++entry) {
448 if (tbl->it_indirect_levels && tbl->it_userspace) {
449
450
451
452
453
454
455
456
457 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
458 entry);
459 if (!pua) {
460
461 entry |= tbl->it_level_size - 1;
462 continue;
463 }
464 }
465
466 cond_resched();
467
468 direction = DMA_NONE;
469 oldhpa = 0;
470 ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
471 &direction);
472 if (ret)
473 continue;
474
475 if (direction == DMA_NONE)
476 continue;
477
478 if (container->v2) {
479 tce_iommu_unuse_page_v2(container, tbl, entry);
480 continue;
481 }
482
483 tce_iommu_unuse_page(container, oldhpa);
484 }
485
486 return 0;
487}
488
489static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
490{
491 struct page *page = NULL;
492 enum dma_data_direction direction = iommu_tce_direction(tce);
493
494 if (get_user_pages_fast(tce & PAGE_MASK, 1,
495 direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
496 &page) != 1)
497 return -EFAULT;
498
499 *hpa = __pa((unsigned long) page_address(page));
500
501 return 0;
502}
503
504static long tce_iommu_build(struct tce_container *container,
505 struct iommu_table *tbl,
506 unsigned long entry, unsigned long tce, unsigned long pages,
507 enum dma_data_direction direction)
508{
509 long i, ret = 0;
510 unsigned long hpa;
511 enum dma_data_direction dirtmp;
512
513 for (i = 0; i < pages; ++i) {
514 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
515
516 ret = tce_iommu_use_page(tce, &hpa);
517 if (ret)
518 break;
519
520 if (!tce_page_is_contained(container->mm, hpa,
521 tbl->it_page_shift)) {
522 ret = -EPERM;
523 break;
524 }
525
526 hpa |= offset;
527 dirtmp = direction;
528 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
529 &dirtmp);
530 if (ret) {
531 tce_iommu_unuse_page(container, hpa);
532 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
533 __func__, entry << tbl->it_page_shift,
534 tce, ret);
535 break;
536 }
537
538 if (dirtmp != DMA_NONE)
539 tce_iommu_unuse_page(container, hpa);
540
541 tce += IOMMU_PAGE_SIZE(tbl);
542 }
543
544 if (ret)
545 tce_iommu_clear(container, tbl, entry, i);
546
547 return ret;
548}
549
550static long tce_iommu_build_v2(struct tce_container *container,
551 struct iommu_table *tbl,
552 unsigned long entry, unsigned long tce, unsigned long pages,
553 enum dma_data_direction direction)
554{
555 long i, ret = 0;
556 unsigned long hpa;
557 enum dma_data_direction dirtmp;
558
559 for (i = 0; i < pages; ++i) {
560 struct mm_iommu_table_group_mem_t *mem = NULL;
561 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
562
563 ret = tce_iommu_prereg_ua_to_hpa(container,
564 tce, tbl->it_page_shift, &hpa, &mem);
565 if (ret)
566 break;
567
568 if (!tce_page_is_contained(container->mm, hpa,
569 tbl->it_page_shift)) {
570 ret = -EPERM;
571 break;
572 }
573
574
575 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
576 dirtmp = direction;
577
578
579 if (mm_iommu_mapped_inc(mem))
580 break;
581
582 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
583 &dirtmp);
584 if (ret) {
585
586 tce_iommu_unuse_page_v2(container, tbl, entry + i);
587 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
588 __func__, entry << tbl->it_page_shift,
589 tce, ret);
590 break;
591 }
592
593 if (dirtmp != DMA_NONE)
594 tce_iommu_unuse_page_v2(container, tbl, entry + i);
595
596 *pua = cpu_to_be64(tce);
597
598 tce += IOMMU_PAGE_SIZE(tbl);
599 }
600
601 if (ret)
602 tce_iommu_clear(container, tbl, entry, i);
603
604 return ret;
605}
606
607static long tce_iommu_create_table(struct tce_container *container,
608 struct iommu_table_group *table_group,
609 int num,
610 __u32 page_shift,
611 __u64 window_size,
612 __u32 levels,
613 struct iommu_table **ptbl)
614{
615 long ret, table_size;
616
617 table_size = table_group->ops->get_table_size(page_shift, window_size,
618 levels);
619 if (!table_size)
620 return -EINVAL;
621
622 ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
623 if (ret)
624 return ret;
625
626 ret = table_group->ops->create_table(table_group, num,
627 page_shift, window_size, levels, ptbl);
628
629 WARN_ON(!ret && !(*ptbl)->it_ops->free);
630 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
631
632 return ret;
633}
634
635static void tce_iommu_free_table(struct tce_container *container,
636 struct iommu_table *tbl)
637{
638 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
639
640 iommu_tce_table_put(tbl);
641 account_locked_vm(container->mm, pages, false);
642}
643
644static long tce_iommu_create_window(struct tce_container *container,
645 __u32 page_shift, __u64 window_size, __u32 levels,
646 __u64 *start_addr)
647{
648 struct tce_iommu_group *tcegrp;
649 struct iommu_table_group *table_group;
650 struct iommu_table *tbl = NULL;
651 long ret, num;
652
653 num = tce_iommu_find_free_table(container);
654 if (num < 0)
655 return num;
656
657
658 tcegrp = list_first_entry(&container->group_list,
659 struct tce_iommu_group, next);
660 table_group = iommu_group_get_iommudata(tcegrp->grp);
661 if (!table_group)
662 return -EFAULT;
663
664 if (!(table_group->pgsizes & (1ULL << page_shift)))
665 return -EINVAL;
666
667 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
668 !table_group->ops->get_table_size ||
669 !table_group->ops->create_table)
670 return -EPERM;
671
672
673 ret = tce_iommu_create_table(container, table_group, num,
674 page_shift, window_size, levels, &tbl);
675 if (ret)
676 return ret;
677
678 BUG_ON(!tbl->it_ops->free);
679
680
681
682
683
684 list_for_each_entry(tcegrp, &container->group_list, next) {
685 table_group = iommu_group_get_iommudata(tcegrp->grp);
686
687 ret = table_group->ops->set_window(table_group, num, tbl);
688 if (ret)
689 goto unset_exit;
690 }
691
692 container->tables[num] = tbl;
693
694
695 *start_addr = tbl->it_offset << tbl->it_page_shift;
696
697 return 0;
698
699unset_exit:
700 list_for_each_entry(tcegrp, &container->group_list, next) {
701 table_group = iommu_group_get_iommudata(tcegrp->grp);
702 table_group->ops->unset_window(table_group, num);
703 }
704 tce_iommu_free_table(container, tbl);
705
706 return ret;
707}
708
709static long tce_iommu_remove_window(struct tce_container *container,
710 __u64 start_addr)
711{
712 struct iommu_table_group *table_group = NULL;
713 struct iommu_table *tbl;
714 struct tce_iommu_group *tcegrp;
715 int num;
716
717 num = tce_iommu_find_table(container, start_addr, &tbl);
718 if (num < 0)
719 return -EINVAL;
720
721 BUG_ON(!tbl->it_size);
722
723
724 list_for_each_entry(tcegrp, &container->group_list, next) {
725 table_group = iommu_group_get_iommudata(tcegrp->grp);
726
727
728
729
730
731
732
733
734 if (!table_group->ops || !table_group->ops->unset_window)
735 return -EPERM;
736
737 table_group->ops->unset_window(table_group, num);
738 }
739
740
741 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
742 tce_iommu_free_table(container, tbl);
743 container->tables[num] = NULL;
744
745 return 0;
746}
747
748static long tce_iommu_create_default_window(struct tce_container *container)
749{
750 long ret;
751 __u64 start_addr = 0;
752 struct tce_iommu_group *tcegrp;
753 struct iommu_table_group *table_group;
754
755 if (!container->def_window_pending)
756 return 0;
757
758 if (!tce_groups_attached(container))
759 return -ENODEV;
760
761 tcegrp = list_first_entry(&container->group_list,
762 struct tce_iommu_group, next);
763 table_group = iommu_group_get_iommudata(tcegrp->grp);
764 if (!table_group)
765 return -ENODEV;
766
767 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
768 table_group->tce32_size, 1, &start_addr);
769 WARN_ON_ONCE(!ret && start_addr);
770
771 if (!ret)
772 container->def_window_pending = false;
773
774 return ret;
775}
776
777static long tce_iommu_ioctl(void *iommu_data,
778 unsigned int cmd, unsigned long arg)
779{
780 struct tce_container *container = iommu_data;
781 unsigned long minsz, ddwsz;
782 long ret;
783
784 switch (cmd) {
785 case VFIO_CHECK_EXTENSION:
786 switch (arg) {
787 case VFIO_SPAPR_TCE_IOMMU:
788 case VFIO_SPAPR_TCE_v2_IOMMU:
789 ret = 1;
790 break;
791 default:
792 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
793 break;
794 }
795
796 return (ret < 0) ? 0 : ret;
797 }
798
799
800
801
802
803 BUG_ON(!container);
804 if (container->mm && container->mm != current->mm)
805 return -EPERM;
806
807 switch (cmd) {
808 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
809 struct vfio_iommu_spapr_tce_info info;
810 struct tce_iommu_group *tcegrp;
811 struct iommu_table_group *table_group;
812
813 if (!tce_groups_attached(container))
814 return -ENXIO;
815
816 tcegrp = list_first_entry(&container->group_list,
817 struct tce_iommu_group, next);
818 table_group = iommu_group_get_iommudata(tcegrp->grp);
819
820 if (!table_group)
821 return -ENXIO;
822
823 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
824 dma32_window_size);
825
826 if (copy_from_user(&info, (void __user *)arg, minsz))
827 return -EFAULT;
828
829 if (info.argsz < minsz)
830 return -EINVAL;
831
832 info.dma32_window_start = table_group->tce32_start;
833 info.dma32_window_size = table_group->tce32_size;
834 info.flags = 0;
835 memset(&info.ddw, 0, sizeof(info.ddw));
836
837 if (table_group->max_dynamic_windows_supported &&
838 container->v2) {
839 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
840 info.ddw.pgsizes = table_group->pgsizes;
841 info.ddw.max_dynamic_windows_supported =
842 table_group->max_dynamic_windows_supported;
843 info.ddw.levels = table_group->max_levels;
844 }
845
846 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
847
848 if (info.argsz >= ddwsz)
849 minsz = ddwsz;
850
851 if (copy_to_user((void __user *)arg, &info, minsz))
852 return -EFAULT;
853
854 return 0;
855 }
856 case VFIO_IOMMU_MAP_DMA: {
857 struct vfio_iommu_type1_dma_map param;
858 struct iommu_table *tbl = NULL;
859 long num;
860 enum dma_data_direction direction;
861
862 if (!container->enabled)
863 return -EPERM;
864
865 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
866
867 if (copy_from_user(¶m, (void __user *)arg, minsz))
868 return -EFAULT;
869
870 if (param.argsz < minsz)
871 return -EINVAL;
872
873 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
874 VFIO_DMA_MAP_FLAG_WRITE))
875 return -EINVAL;
876
877 ret = tce_iommu_create_default_window(container);
878 if (ret)
879 return ret;
880
881 num = tce_iommu_find_table(container, param.iova, &tbl);
882 if (num < 0)
883 return -ENXIO;
884
885 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
886 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
887 return -EINVAL;
888
889
890 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
891 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
892 direction = DMA_BIDIRECTIONAL;
893 else
894 direction = DMA_TO_DEVICE;
895 } else {
896 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
897 direction = DMA_FROM_DEVICE;
898 else
899 return -EINVAL;
900 }
901
902 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
903 if (ret)
904 return ret;
905
906 if (container->v2)
907 ret = tce_iommu_build_v2(container, tbl,
908 param.iova >> tbl->it_page_shift,
909 param.vaddr,
910 param.size >> tbl->it_page_shift,
911 direction);
912 else
913 ret = tce_iommu_build(container, tbl,
914 param.iova >> tbl->it_page_shift,
915 param.vaddr,
916 param.size >> tbl->it_page_shift,
917 direction);
918
919 iommu_flush_tce(tbl);
920
921 return ret;
922 }
923 case VFIO_IOMMU_UNMAP_DMA: {
924 struct vfio_iommu_type1_dma_unmap param;
925 struct iommu_table *tbl = NULL;
926 long num;
927
928 if (!container->enabled)
929 return -EPERM;
930
931 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
932 size);
933
934 if (copy_from_user(¶m, (void __user *)arg, minsz))
935 return -EFAULT;
936
937 if (param.argsz < minsz)
938 return -EINVAL;
939
940
941 if (param.flags)
942 return -EINVAL;
943
944 ret = tce_iommu_create_default_window(container);
945 if (ret)
946 return ret;
947
948 num = tce_iommu_find_table(container, param.iova, &tbl);
949 if (num < 0)
950 return -ENXIO;
951
952 if (param.size & ~IOMMU_PAGE_MASK(tbl))
953 return -EINVAL;
954
955 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
956 param.size >> tbl->it_page_shift);
957 if (ret)
958 return ret;
959
960 ret = tce_iommu_clear(container, tbl,
961 param.iova >> tbl->it_page_shift,
962 param.size >> tbl->it_page_shift);
963 iommu_flush_tce(tbl);
964
965 return ret;
966 }
967 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
968 struct vfio_iommu_spapr_register_memory param;
969
970 if (!container->v2)
971 break;
972
973 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
974 size);
975
976 ret = tce_iommu_mm_set(container);
977 if (ret)
978 return ret;
979
980 if (copy_from_user(¶m, (void __user *)arg, minsz))
981 return -EFAULT;
982
983 if (param.argsz < minsz)
984 return -EINVAL;
985
986
987 if (param.flags)
988 return -EINVAL;
989
990 mutex_lock(&container->lock);
991 ret = tce_iommu_register_pages(container, param.vaddr,
992 param.size);
993 mutex_unlock(&container->lock);
994
995 return ret;
996 }
997 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
998 struct vfio_iommu_spapr_register_memory param;
999
1000 if (!container->v2)
1001 break;
1002
1003 if (!container->mm)
1004 return -EPERM;
1005
1006 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1007 size);
1008
1009 if (copy_from_user(¶m, (void __user *)arg, minsz))
1010 return -EFAULT;
1011
1012 if (param.argsz < minsz)
1013 return -EINVAL;
1014
1015
1016 if (param.flags)
1017 return -EINVAL;
1018
1019 mutex_lock(&container->lock);
1020 ret = tce_iommu_unregister_pages(container, param.vaddr,
1021 param.size);
1022 mutex_unlock(&container->lock);
1023
1024 return ret;
1025 }
1026 case VFIO_IOMMU_ENABLE:
1027 if (container->v2)
1028 break;
1029
1030 mutex_lock(&container->lock);
1031 ret = tce_iommu_enable(container);
1032 mutex_unlock(&container->lock);
1033 return ret;
1034
1035
1036 case VFIO_IOMMU_DISABLE:
1037 if (container->v2)
1038 break;
1039
1040 mutex_lock(&container->lock);
1041 tce_iommu_disable(container);
1042 mutex_unlock(&container->lock);
1043 return 0;
1044
1045 case VFIO_EEH_PE_OP: {
1046 struct tce_iommu_group *tcegrp;
1047
1048 ret = 0;
1049 list_for_each_entry(tcegrp, &container->group_list, next) {
1050 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1051 cmd, arg);
1052 if (ret)
1053 return ret;
1054 }
1055 return ret;
1056 }
1057
1058 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1059 struct vfio_iommu_spapr_tce_create create;
1060
1061 if (!container->v2)
1062 break;
1063
1064 ret = tce_iommu_mm_set(container);
1065 if (ret)
1066 return ret;
1067
1068 if (!tce_groups_attached(container))
1069 return -ENXIO;
1070
1071 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1072 start_addr);
1073
1074 if (copy_from_user(&create, (void __user *)arg, minsz))
1075 return -EFAULT;
1076
1077 if (create.argsz < minsz)
1078 return -EINVAL;
1079
1080 if (create.flags)
1081 return -EINVAL;
1082
1083 mutex_lock(&container->lock);
1084
1085 ret = tce_iommu_create_default_window(container);
1086 if (!ret)
1087 ret = tce_iommu_create_window(container,
1088 create.page_shift,
1089 create.window_size, create.levels,
1090 &create.start_addr);
1091
1092 mutex_unlock(&container->lock);
1093
1094 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1095 ret = -EFAULT;
1096
1097 return ret;
1098 }
1099 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1100 struct vfio_iommu_spapr_tce_remove remove;
1101
1102 if (!container->v2)
1103 break;
1104
1105 ret = tce_iommu_mm_set(container);
1106 if (ret)
1107 return ret;
1108
1109 if (!tce_groups_attached(container))
1110 return -ENXIO;
1111
1112 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1113 start_addr);
1114
1115 if (copy_from_user(&remove, (void __user *)arg, minsz))
1116 return -EFAULT;
1117
1118 if (remove.argsz < minsz)
1119 return -EINVAL;
1120
1121 if (remove.flags)
1122 return -EINVAL;
1123
1124 if (container->def_window_pending && !remove.start_addr) {
1125 container->def_window_pending = false;
1126 return 0;
1127 }
1128
1129 mutex_lock(&container->lock);
1130
1131 ret = tce_iommu_remove_window(container, remove.start_addr);
1132
1133 mutex_unlock(&container->lock);
1134
1135 return ret;
1136 }
1137 }
1138
1139 return -ENOTTY;
1140}
1141
1142static void tce_iommu_release_ownership(struct tce_container *container,
1143 struct iommu_table_group *table_group)
1144{
1145 int i;
1146
1147 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1148 struct iommu_table *tbl = container->tables[i];
1149
1150 if (!tbl)
1151 continue;
1152
1153 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1154 if (tbl->it_map)
1155 iommu_release_ownership(tbl);
1156
1157 container->tables[i] = NULL;
1158 }
1159}
1160
1161static int tce_iommu_take_ownership(struct tce_container *container,
1162 struct iommu_table_group *table_group)
1163{
1164 int i, j, rc = 0;
1165
1166 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1167 struct iommu_table *tbl = table_group->tables[i];
1168
1169 if (!tbl || !tbl->it_map)
1170 continue;
1171
1172 rc = iommu_take_ownership(tbl);
1173 if (rc) {
1174 for (j = 0; j < i; ++j)
1175 iommu_release_ownership(
1176 table_group->tables[j]);
1177
1178 return rc;
1179 }
1180 }
1181
1182 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1183 container->tables[i] = table_group->tables[i];
1184
1185 return 0;
1186}
1187
1188static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1189 struct iommu_table_group *table_group)
1190{
1191 long i;
1192
1193 if (!table_group->ops->unset_window) {
1194 WARN_ON_ONCE(1);
1195 return;
1196 }
1197
1198 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1199 table_group->ops->unset_window(table_group, i);
1200
1201 table_group->ops->release_ownership(table_group);
1202}
1203
1204static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1205 struct iommu_table_group *table_group)
1206{
1207 long i, ret = 0;
1208
1209 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1210 !table_group->ops->release_ownership) {
1211 WARN_ON_ONCE(1);
1212 return -EFAULT;
1213 }
1214
1215 table_group->ops->take_ownership(table_group);
1216
1217
1218 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1219 struct iommu_table *tbl = container->tables[i];
1220
1221 if (!tbl)
1222 continue;
1223
1224 ret = table_group->ops->set_window(table_group, i, tbl);
1225 if (ret)
1226 goto release_exit;
1227 }
1228
1229 return 0;
1230
1231release_exit:
1232 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1233 table_group->ops->unset_window(table_group, i);
1234
1235 table_group->ops->release_ownership(table_group);
1236
1237 return ret;
1238}
1239
1240static int tce_iommu_attach_group(void *iommu_data,
1241 struct iommu_group *iommu_group)
1242{
1243 int ret = 0;
1244 struct tce_container *container = iommu_data;
1245 struct iommu_table_group *table_group;
1246 struct tce_iommu_group *tcegrp = NULL;
1247
1248 mutex_lock(&container->lock);
1249
1250
1251
1252 table_group = iommu_group_get_iommudata(iommu_group);
1253 if (!table_group) {
1254 ret = -ENODEV;
1255 goto unlock_exit;
1256 }
1257
1258 if (tce_groups_attached(container) && (!table_group->ops ||
1259 !table_group->ops->take_ownership ||
1260 !table_group->ops->release_ownership)) {
1261 ret = -EBUSY;
1262 goto unlock_exit;
1263 }
1264
1265
1266 list_for_each_entry(tcegrp, &container->group_list, next) {
1267 struct iommu_table_group *table_group_tmp;
1268
1269 if (tcegrp->grp == iommu_group) {
1270 pr_warn("tce_vfio: Group %d is already attached\n",
1271 iommu_group_id(iommu_group));
1272 ret = -EBUSY;
1273 goto unlock_exit;
1274 }
1275 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1276 if (table_group_tmp->ops->create_table !=
1277 table_group->ops->create_table) {
1278 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1279 iommu_group_id(iommu_group),
1280 iommu_group_id(tcegrp->grp));
1281 ret = -EPERM;
1282 goto unlock_exit;
1283 }
1284 }
1285
1286 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1287 if (!tcegrp) {
1288 ret = -ENOMEM;
1289 goto unlock_exit;
1290 }
1291
1292 if (!table_group->ops || !table_group->ops->take_ownership ||
1293 !table_group->ops->release_ownership) {
1294 if (container->v2) {
1295 ret = -EPERM;
1296 goto free_exit;
1297 }
1298 ret = tce_iommu_take_ownership(container, table_group);
1299 } else {
1300 if (!container->v2) {
1301 ret = -EPERM;
1302 goto free_exit;
1303 }
1304 ret = tce_iommu_take_ownership_ddw(container, table_group);
1305 if (!tce_groups_attached(container) && !container->tables[0])
1306 container->def_window_pending = true;
1307 }
1308
1309 if (!ret) {
1310 tcegrp->grp = iommu_group;
1311 list_add(&tcegrp->next, &container->group_list);
1312 }
1313
1314free_exit:
1315 if (ret && tcegrp)
1316 kfree(tcegrp);
1317
1318unlock_exit:
1319 mutex_unlock(&container->lock);
1320
1321 return ret;
1322}
1323
1324static void tce_iommu_detach_group(void *iommu_data,
1325 struct iommu_group *iommu_group)
1326{
1327 struct tce_container *container = iommu_data;
1328 struct iommu_table_group *table_group;
1329 bool found = false;
1330 struct tce_iommu_group *tcegrp;
1331
1332 mutex_lock(&container->lock);
1333
1334 list_for_each_entry(tcegrp, &container->group_list, next) {
1335 if (tcegrp->grp == iommu_group) {
1336 found = true;
1337 break;
1338 }
1339 }
1340
1341 if (!found) {
1342 pr_warn("tce_vfio: detaching unattached group #%u\n",
1343 iommu_group_id(iommu_group));
1344 goto unlock_exit;
1345 }
1346
1347 list_del(&tcegrp->next);
1348 kfree(tcegrp);
1349
1350 table_group = iommu_group_get_iommudata(iommu_group);
1351 BUG_ON(!table_group);
1352
1353 if (!table_group->ops || !table_group->ops->release_ownership)
1354 tce_iommu_release_ownership(container, table_group);
1355 else
1356 tce_iommu_release_ownership_ddw(container, table_group);
1357
1358unlock_exit:
1359 mutex_unlock(&container->lock);
1360}
1361
1362const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1363 .name = "iommu-vfio-powerpc",
1364 .owner = THIS_MODULE,
1365 .open = tce_iommu_open,
1366 .release = tce_iommu_release,
1367 .ioctl = tce_iommu_ioctl,
1368 .attach_group = tce_iommu_attach_group,
1369 .detach_group = tce_iommu_detach_group,
1370};
1371
1372static int __init tce_iommu_init(void)
1373{
1374 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1375}
1376
1377static void __exit tce_iommu_cleanup(void)
1378{
1379 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1380}
1381
1382module_init(tce_iommu_init);
1383module_exit(tce_iommu_cleanup);
1384
1385MODULE_VERSION(DRIVER_VERSION);
1386MODULE_LICENSE("GPL v2");
1387MODULE_AUTHOR(DRIVER_AUTHOR);
1388MODULE_DESCRIPTION(DRIVER_DESC);
1389
1390