1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/pci.h>
15#include <linux/slab.h>
16#include <linux/uaccess.h>
17#include <linux/err.h>
18#include <linux/vfio.h>
19#include <linux/vmalloc.h>
20#include <linux/sched/mm.h>
21#include <linux/sched/signal.h>
22#include <linux/mm.h>
23
24#include <asm/iommu.h>
25#include <asm/tce.h>
26#include <asm/mmu_context.h>
27
28#define DRIVER_VERSION "0.1"
29#define DRIVER_AUTHOR "aik@ozlabs.ru"
30#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
31
32static void tce_iommu_detach_group(void *iommu_data,
33 struct iommu_group *iommu_group);
34
35
36
37
38
39
40
41
42struct tce_iommu_group {
43 struct list_head next;
44 struct iommu_group *grp;
45};
46
47
48
49
50
51struct tce_iommu_prereg {
52 struct list_head next;
53 struct mm_iommu_table_group_mem_t *mem;
54};
55
56
57
58
59
60
61struct tce_container {
62 struct mutex lock;
63 bool enabled;
64 bool v2;
65 bool def_window_pending;
66 unsigned long locked_pages;
67 struct mm_struct *mm;
68 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
69 struct list_head group_list;
70 struct list_head prereg_list;
71};
72
73static long tce_iommu_mm_set(struct tce_container *container)
74{
75 if (container->mm) {
76 if (container->mm == current->mm)
77 return 0;
78 return -EPERM;
79 }
80 BUG_ON(!current->mm);
81 container->mm = current->mm;
82 atomic_inc(&container->mm->mm_count);
83
84 return 0;
85}
86
87static long tce_iommu_prereg_free(struct tce_container *container,
88 struct tce_iommu_prereg *tcemem)
89{
90 long ret;
91
92 ret = mm_iommu_put(container->mm, tcemem->mem);
93 if (ret)
94 return ret;
95
96 list_del(&tcemem->next);
97 kfree(tcemem);
98
99 return 0;
100}
101
102static long tce_iommu_unregister_pages(struct tce_container *container,
103 __u64 vaddr, __u64 size)
104{
105 struct mm_iommu_table_group_mem_t *mem;
106 struct tce_iommu_prereg *tcemem;
107 bool found = false;
108 long ret;
109
110 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
111 return -EINVAL;
112
113 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
114 if (!mem)
115 return -ENOENT;
116
117 list_for_each_entry(tcemem, &container->prereg_list, next) {
118 if (tcemem->mem == mem) {
119 found = true;
120 break;
121 }
122 }
123
124 if (!found)
125 ret = -ENOENT;
126 else
127 ret = tce_iommu_prereg_free(container, tcemem);
128
129 mm_iommu_put(container->mm, mem);
130
131 return ret;
132}
133
134static long tce_iommu_register_pages(struct tce_container *container,
135 __u64 vaddr, __u64 size)
136{
137 long ret = 0;
138 struct mm_iommu_table_group_mem_t *mem = NULL;
139 struct tce_iommu_prereg *tcemem;
140 unsigned long entries = size >> PAGE_SHIFT;
141
142 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
143 ((vaddr + size) < vaddr))
144 return -EINVAL;
145
146 mem = mm_iommu_get(container->mm, vaddr, entries);
147 if (mem) {
148 list_for_each_entry(tcemem, &container->prereg_list, next) {
149 if (tcemem->mem == mem) {
150 ret = -EBUSY;
151 goto put_exit;
152 }
153 }
154 } else {
155 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
156 if (ret)
157 return ret;
158 }
159
160 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
161 if (!tcemem) {
162 ret = -ENOMEM;
163 goto put_exit;
164 }
165
166 tcemem->mem = mem;
167 list_add(&tcemem->next, &container->prereg_list);
168
169 container->enabled = true;
170
171 return 0;
172
173put_exit:
174 mm_iommu_put(container->mm, mem);
175 return ret;
176}
177
178static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
179 unsigned int page_shift)
180{
181 struct page *page;
182 unsigned long size = 0;
183
184 if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
185 return size == (1UL << page_shift);
186
187 page = pfn_to_page(hpa >> PAGE_SHIFT);
188
189
190
191
192
193 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
194}
195
196static inline bool tce_groups_attached(struct tce_container *container)
197{
198 return !list_empty(&container->group_list);
199}
200
201static long tce_iommu_find_table(struct tce_container *container,
202 phys_addr_t ioba, struct iommu_table **ptbl)
203{
204 long i;
205
206 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
207 struct iommu_table *tbl = container->tables[i];
208
209 if (tbl) {
210 unsigned long entry = ioba >> tbl->it_page_shift;
211 unsigned long start = tbl->it_offset;
212 unsigned long end = start + tbl->it_size;
213
214 if ((start <= entry) && (entry < end)) {
215 *ptbl = tbl;
216 return i;
217 }
218 }
219 }
220
221 return -1;
222}
223
224static int tce_iommu_find_free_table(struct tce_container *container)
225{
226 int i;
227
228 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
229 if (!container->tables[i])
230 return i;
231 }
232
233 return -ENOSPC;
234}
235
236static int tce_iommu_enable(struct tce_container *container)
237{
238 int ret = 0;
239 unsigned long locked;
240 struct iommu_table_group *table_group;
241 struct tce_iommu_group *tcegrp;
242
243 if (container->enabled)
244 return -EBUSY;
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275 if (!tce_groups_attached(container))
276 return -ENODEV;
277
278 tcegrp = list_first_entry(&container->group_list,
279 struct tce_iommu_group, next);
280 table_group = iommu_group_get_iommudata(tcegrp->grp);
281 if (!table_group)
282 return -ENODEV;
283
284 if (!table_group->tce32_size)
285 return -EPERM;
286
287 ret = tce_iommu_mm_set(container);
288 if (ret)
289 return ret;
290
291 locked = table_group->tce32_size >> PAGE_SHIFT;
292 ret = account_locked_vm(container->mm, locked, true);
293 if (ret)
294 return ret;
295
296 container->locked_pages = locked;
297
298 container->enabled = true;
299
300 return ret;
301}
302
303static void tce_iommu_disable(struct tce_container *container)
304{
305 if (!container->enabled)
306 return;
307
308 container->enabled = false;
309
310 BUG_ON(!container->mm);
311 account_locked_vm(container->mm, container->locked_pages, false);
312}
313
314static void *tce_iommu_open(unsigned long arg)
315{
316 struct tce_container *container;
317
318 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
319 pr_err("tce_vfio: Wrong IOMMU type\n");
320 return ERR_PTR(-EINVAL);
321 }
322
323 container = kzalloc(sizeof(*container), GFP_KERNEL);
324 if (!container)
325 return ERR_PTR(-ENOMEM);
326
327 mutex_init(&container->lock);
328 INIT_LIST_HEAD_RCU(&container->group_list);
329 INIT_LIST_HEAD_RCU(&container->prereg_list);
330
331 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
332
333 return container;
334}
335
336static int tce_iommu_clear(struct tce_container *container,
337 struct iommu_table *tbl,
338 unsigned long entry, unsigned long pages);
339static void tce_iommu_free_table(struct tce_container *container,
340 struct iommu_table *tbl);
341
342static void tce_iommu_release(void *iommu_data)
343{
344 struct tce_container *container = iommu_data;
345 struct tce_iommu_group *tcegrp;
346 struct tce_iommu_prereg *tcemem, *tmtmp;
347 long i;
348
349 while (tce_groups_attached(container)) {
350 tcegrp = list_first_entry(&container->group_list,
351 struct tce_iommu_group, next);
352 tce_iommu_detach_group(iommu_data, tcegrp->grp);
353 }
354
355
356
357
358
359 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
360 struct iommu_table *tbl = container->tables[i];
361
362 if (!tbl)
363 continue;
364
365 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
366 tce_iommu_free_table(container, tbl);
367 }
368
369 list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
370 WARN_ON(tce_iommu_prereg_free(container, tcemem));
371
372 tce_iommu_disable(container);
373 if (container->mm)
374 mmdrop(container->mm);
375 mutex_destroy(&container->lock);
376
377 kfree(container);
378}
379
380static void tce_iommu_unuse_page(struct tce_container *container,
381 unsigned long hpa)
382{
383 struct page *page;
384
385 page = pfn_to_page(hpa >> PAGE_SHIFT);
386 put_page(page);
387}
388
389static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
390 unsigned long tce, unsigned long shift,
391 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
392{
393 long ret = 0;
394 struct mm_iommu_table_group_mem_t *mem;
395
396 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
397 if (!mem)
398 return -EINVAL;
399
400 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
401 if (ret)
402 return -EINVAL;
403
404 *pmem = mem;
405
406 return 0;
407}
408
409static void tce_iommu_unuse_page_v2(struct tce_container *container,
410 struct iommu_table *tbl, unsigned long entry)
411{
412 struct mm_iommu_table_group_mem_t *mem = NULL;
413 int ret;
414 unsigned long hpa = 0;
415 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
416
417 if (!pua)
418 return;
419
420 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
421 tbl->it_page_shift, &hpa, &mem);
422 if (ret)
423 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
424 __func__, be64_to_cpu(*pua), entry, ret);
425 if (mem)
426 mm_iommu_mapped_dec(mem);
427
428 *pua = cpu_to_be64(0);
429}
430
431static int tce_iommu_clear(struct tce_container *container,
432 struct iommu_table *tbl,
433 unsigned long entry, unsigned long pages)
434{
435 unsigned long oldhpa;
436 long ret;
437 enum dma_data_direction direction;
438 unsigned long lastentry = entry + pages;
439
440 for ( ; entry < lastentry; ++entry) {
441 if (tbl->it_indirect_levels && tbl->it_userspace) {
442
443
444
445
446
447
448
449
450 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
451 entry);
452 if (!pua) {
453
454 entry |= tbl->it_level_size - 1;
455 continue;
456 }
457 }
458
459 cond_resched();
460
461 direction = DMA_NONE;
462 oldhpa = 0;
463 ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
464 &direction);
465 if (ret)
466 continue;
467
468 if (direction == DMA_NONE)
469 continue;
470
471 if (container->v2) {
472 tce_iommu_unuse_page_v2(container, tbl, entry);
473 continue;
474 }
475
476 tce_iommu_unuse_page(container, oldhpa);
477 }
478
479 return 0;
480}
481
482static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
483{
484 struct page *page = NULL;
485 enum dma_data_direction direction = iommu_tce_direction(tce);
486
487 if (get_user_pages_fast(tce & PAGE_MASK, 1,
488 direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
489 &page) != 1)
490 return -EFAULT;
491
492 *hpa = __pa((unsigned long) page_address(page));
493
494 return 0;
495}
496
497static long tce_iommu_build(struct tce_container *container,
498 struct iommu_table *tbl,
499 unsigned long entry, unsigned long tce, unsigned long pages,
500 enum dma_data_direction direction)
501{
502 long i, ret = 0;
503 unsigned long hpa;
504 enum dma_data_direction dirtmp;
505
506 for (i = 0; i < pages; ++i) {
507 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
508
509 ret = tce_iommu_use_page(tce, &hpa);
510 if (ret)
511 break;
512
513 if (!tce_page_is_contained(container->mm, hpa,
514 tbl->it_page_shift)) {
515 ret = -EPERM;
516 break;
517 }
518
519 hpa |= offset;
520 dirtmp = direction;
521 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
522 &dirtmp);
523 if (ret) {
524 tce_iommu_unuse_page(container, hpa);
525 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
526 __func__, entry << tbl->it_page_shift,
527 tce, ret);
528 break;
529 }
530
531 if (dirtmp != DMA_NONE)
532 tce_iommu_unuse_page(container, hpa);
533
534 tce += IOMMU_PAGE_SIZE(tbl);
535 }
536
537 if (ret)
538 tce_iommu_clear(container, tbl, entry, i);
539
540 return ret;
541}
542
543static long tce_iommu_build_v2(struct tce_container *container,
544 struct iommu_table *tbl,
545 unsigned long entry, unsigned long tce, unsigned long pages,
546 enum dma_data_direction direction)
547{
548 long i, ret = 0;
549 unsigned long hpa;
550 enum dma_data_direction dirtmp;
551
552 for (i = 0; i < pages; ++i) {
553 struct mm_iommu_table_group_mem_t *mem = NULL;
554 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
555
556 ret = tce_iommu_prereg_ua_to_hpa(container,
557 tce, tbl->it_page_shift, &hpa, &mem);
558 if (ret)
559 break;
560
561 if (!tce_page_is_contained(container->mm, hpa,
562 tbl->it_page_shift)) {
563 ret = -EPERM;
564 break;
565 }
566
567
568 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
569 dirtmp = direction;
570
571
572 if (mm_iommu_mapped_inc(mem))
573 break;
574
575 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
576 &dirtmp);
577 if (ret) {
578
579 tce_iommu_unuse_page_v2(container, tbl, entry + i);
580 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
581 __func__, entry << tbl->it_page_shift,
582 tce, ret);
583 break;
584 }
585
586 if (dirtmp != DMA_NONE)
587 tce_iommu_unuse_page_v2(container, tbl, entry + i);
588
589 *pua = cpu_to_be64(tce);
590
591 tce += IOMMU_PAGE_SIZE(tbl);
592 }
593
594 if (ret)
595 tce_iommu_clear(container, tbl, entry, i);
596
597 return ret;
598}
599
600static long tce_iommu_create_table(struct tce_container *container,
601 struct iommu_table_group *table_group,
602 int num,
603 __u32 page_shift,
604 __u64 window_size,
605 __u32 levels,
606 struct iommu_table **ptbl)
607{
608 long ret, table_size;
609
610 table_size = table_group->ops->get_table_size(page_shift, window_size,
611 levels);
612 if (!table_size)
613 return -EINVAL;
614
615 ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
616 if (ret)
617 return ret;
618
619 ret = table_group->ops->create_table(table_group, num,
620 page_shift, window_size, levels, ptbl);
621
622 WARN_ON(!ret && !(*ptbl)->it_ops->free);
623 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
624
625 return ret;
626}
627
628static void tce_iommu_free_table(struct tce_container *container,
629 struct iommu_table *tbl)
630{
631 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
632
633 iommu_tce_table_put(tbl);
634 account_locked_vm(container->mm, pages, false);
635}
636
637static long tce_iommu_create_window(struct tce_container *container,
638 __u32 page_shift, __u64 window_size, __u32 levels,
639 __u64 *start_addr)
640{
641 struct tce_iommu_group *tcegrp;
642 struct iommu_table_group *table_group;
643 struct iommu_table *tbl = NULL;
644 long ret, num;
645
646 num = tce_iommu_find_free_table(container);
647 if (num < 0)
648 return num;
649
650
651 tcegrp = list_first_entry(&container->group_list,
652 struct tce_iommu_group, next);
653 table_group = iommu_group_get_iommudata(tcegrp->grp);
654 if (!table_group)
655 return -EFAULT;
656
657 if (!(table_group->pgsizes & (1ULL << page_shift)))
658 return -EINVAL;
659
660 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
661 !table_group->ops->get_table_size ||
662 !table_group->ops->create_table)
663 return -EPERM;
664
665
666 ret = tce_iommu_create_table(container, table_group, num,
667 page_shift, window_size, levels, &tbl);
668 if (ret)
669 return ret;
670
671 BUG_ON(!tbl->it_ops->free);
672
673
674
675
676
677 list_for_each_entry(tcegrp, &container->group_list, next) {
678 table_group = iommu_group_get_iommudata(tcegrp->grp);
679
680 ret = table_group->ops->set_window(table_group, num, tbl);
681 if (ret)
682 goto unset_exit;
683 }
684
685 container->tables[num] = tbl;
686
687
688 *start_addr = tbl->it_offset << tbl->it_page_shift;
689
690 return 0;
691
692unset_exit:
693 list_for_each_entry(tcegrp, &container->group_list, next) {
694 table_group = iommu_group_get_iommudata(tcegrp->grp);
695 table_group->ops->unset_window(table_group, num);
696 }
697 tce_iommu_free_table(container, tbl);
698
699 return ret;
700}
701
702static long tce_iommu_remove_window(struct tce_container *container,
703 __u64 start_addr)
704{
705 struct iommu_table_group *table_group = NULL;
706 struct iommu_table *tbl;
707 struct tce_iommu_group *tcegrp;
708 int num;
709
710 num = tce_iommu_find_table(container, start_addr, &tbl);
711 if (num < 0)
712 return -EINVAL;
713
714 BUG_ON(!tbl->it_size);
715
716
717 list_for_each_entry(tcegrp, &container->group_list, next) {
718 table_group = iommu_group_get_iommudata(tcegrp->grp);
719
720
721
722
723
724
725
726
727 if (!table_group->ops || !table_group->ops->unset_window)
728 return -EPERM;
729
730 table_group->ops->unset_window(table_group, num);
731 }
732
733
734 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
735 tce_iommu_free_table(container, tbl);
736 container->tables[num] = NULL;
737
738 return 0;
739}
740
741static long tce_iommu_create_default_window(struct tce_container *container)
742{
743 long ret;
744 __u64 start_addr = 0;
745 struct tce_iommu_group *tcegrp;
746 struct iommu_table_group *table_group;
747
748 if (!container->def_window_pending)
749 return 0;
750
751 if (!tce_groups_attached(container))
752 return -ENODEV;
753
754 tcegrp = list_first_entry(&container->group_list,
755 struct tce_iommu_group, next);
756 table_group = iommu_group_get_iommudata(tcegrp->grp);
757 if (!table_group)
758 return -ENODEV;
759
760 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
761 table_group->tce32_size, 1, &start_addr);
762 WARN_ON_ONCE(!ret && start_addr);
763
764 if (!ret)
765 container->def_window_pending = false;
766
767 return ret;
768}
769
770static long tce_iommu_ioctl(void *iommu_data,
771 unsigned int cmd, unsigned long arg)
772{
773 struct tce_container *container = iommu_data;
774 unsigned long minsz, ddwsz;
775 long ret;
776
777 switch (cmd) {
778 case VFIO_CHECK_EXTENSION:
779 switch (arg) {
780 case VFIO_SPAPR_TCE_IOMMU:
781 case VFIO_SPAPR_TCE_v2_IOMMU:
782 ret = 1;
783 break;
784 default:
785 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
786 break;
787 }
788
789 return (ret < 0) ? 0 : ret;
790 }
791
792
793
794
795
796 BUG_ON(!container);
797 if (container->mm && container->mm != current->mm)
798 return -EPERM;
799
800 switch (cmd) {
801 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
802 struct vfio_iommu_spapr_tce_info info;
803 struct tce_iommu_group *tcegrp;
804 struct iommu_table_group *table_group;
805
806 if (!tce_groups_attached(container))
807 return -ENXIO;
808
809 tcegrp = list_first_entry(&container->group_list,
810 struct tce_iommu_group, next);
811 table_group = iommu_group_get_iommudata(tcegrp->grp);
812
813 if (!table_group)
814 return -ENXIO;
815
816 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
817 dma32_window_size);
818
819 if (copy_from_user(&info, (void __user *)arg, minsz))
820 return -EFAULT;
821
822 if (info.argsz < minsz)
823 return -EINVAL;
824
825 info.dma32_window_start = table_group->tce32_start;
826 info.dma32_window_size = table_group->tce32_size;
827 info.flags = 0;
828 memset(&info.ddw, 0, sizeof(info.ddw));
829
830 if (table_group->max_dynamic_windows_supported &&
831 container->v2) {
832 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
833 info.ddw.pgsizes = table_group->pgsizes;
834 info.ddw.max_dynamic_windows_supported =
835 table_group->max_dynamic_windows_supported;
836 info.ddw.levels = table_group->max_levels;
837 }
838
839 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
840
841 if (info.argsz >= ddwsz)
842 minsz = ddwsz;
843
844 if (copy_to_user((void __user *)arg, &info, minsz))
845 return -EFAULT;
846
847 return 0;
848 }
849 case VFIO_IOMMU_MAP_DMA: {
850 struct vfio_iommu_type1_dma_map param;
851 struct iommu_table *tbl = NULL;
852 long num;
853 enum dma_data_direction direction;
854
855 if (!container->enabled)
856 return -EPERM;
857
858 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
859
860 if (copy_from_user(¶m, (void __user *)arg, minsz))
861 return -EFAULT;
862
863 if (param.argsz < minsz)
864 return -EINVAL;
865
866 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
867 VFIO_DMA_MAP_FLAG_WRITE))
868 return -EINVAL;
869
870 ret = tce_iommu_create_default_window(container);
871 if (ret)
872 return ret;
873
874 num = tce_iommu_find_table(container, param.iova, &tbl);
875 if (num < 0)
876 return -ENXIO;
877
878 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
879 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
880 return -EINVAL;
881
882
883 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
884 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
885 direction = DMA_BIDIRECTIONAL;
886 else
887 direction = DMA_TO_DEVICE;
888 } else {
889 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
890 direction = DMA_FROM_DEVICE;
891 else
892 return -EINVAL;
893 }
894
895 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
896 if (ret)
897 return ret;
898
899 if (container->v2)
900 ret = tce_iommu_build_v2(container, tbl,
901 param.iova >> tbl->it_page_shift,
902 param.vaddr,
903 param.size >> tbl->it_page_shift,
904 direction);
905 else
906 ret = tce_iommu_build(container, tbl,
907 param.iova >> tbl->it_page_shift,
908 param.vaddr,
909 param.size >> tbl->it_page_shift,
910 direction);
911
912 iommu_flush_tce(tbl);
913
914 return ret;
915 }
916 case VFIO_IOMMU_UNMAP_DMA: {
917 struct vfio_iommu_type1_dma_unmap param;
918 struct iommu_table *tbl = NULL;
919 long num;
920
921 if (!container->enabled)
922 return -EPERM;
923
924 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
925 size);
926
927 if (copy_from_user(¶m, (void __user *)arg, minsz))
928 return -EFAULT;
929
930 if (param.argsz < minsz)
931 return -EINVAL;
932
933
934 if (param.flags)
935 return -EINVAL;
936
937 ret = tce_iommu_create_default_window(container);
938 if (ret)
939 return ret;
940
941 num = tce_iommu_find_table(container, param.iova, &tbl);
942 if (num < 0)
943 return -ENXIO;
944
945 if (param.size & ~IOMMU_PAGE_MASK(tbl))
946 return -EINVAL;
947
948 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
949 param.size >> tbl->it_page_shift);
950 if (ret)
951 return ret;
952
953 ret = tce_iommu_clear(container, tbl,
954 param.iova >> tbl->it_page_shift,
955 param.size >> tbl->it_page_shift);
956 iommu_flush_tce(tbl);
957
958 return ret;
959 }
960 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
961 struct vfio_iommu_spapr_register_memory param;
962
963 if (!container->v2)
964 break;
965
966 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
967 size);
968
969 ret = tce_iommu_mm_set(container);
970 if (ret)
971 return ret;
972
973 if (copy_from_user(¶m, (void __user *)arg, minsz))
974 return -EFAULT;
975
976 if (param.argsz < minsz)
977 return -EINVAL;
978
979
980 if (param.flags)
981 return -EINVAL;
982
983 mutex_lock(&container->lock);
984 ret = tce_iommu_register_pages(container, param.vaddr,
985 param.size);
986 mutex_unlock(&container->lock);
987
988 return ret;
989 }
990 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
991 struct vfio_iommu_spapr_register_memory param;
992
993 if (!container->v2)
994 break;
995
996 if (!container->mm)
997 return -EPERM;
998
999 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1000 size);
1001
1002 if (copy_from_user(¶m, (void __user *)arg, minsz))
1003 return -EFAULT;
1004
1005 if (param.argsz < minsz)
1006 return -EINVAL;
1007
1008
1009 if (param.flags)
1010 return -EINVAL;
1011
1012 mutex_lock(&container->lock);
1013 ret = tce_iommu_unregister_pages(container, param.vaddr,
1014 param.size);
1015 mutex_unlock(&container->lock);
1016
1017 return ret;
1018 }
1019 case VFIO_IOMMU_ENABLE:
1020 if (container->v2)
1021 break;
1022
1023 mutex_lock(&container->lock);
1024 ret = tce_iommu_enable(container);
1025 mutex_unlock(&container->lock);
1026 return ret;
1027
1028
1029 case VFIO_IOMMU_DISABLE:
1030 if (container->v2)
1031 break;
1032
1033 mutex_lock(&container->lock);
1034 tce_iommu_disable(container);
1035 mutex_unlock(&container->lock);
1036 return 0;
1037
1038 case VFIO_EEH_PE_OP: {
1039 struct tce_iommu_group *tcegrp;
1040
1041 ret = 0;
1042 list_for_each_entry(tcegrp, &container->group_list, next) {
1043 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1044 cmd, arg);
1045 if (ret)
1046 return ret;
1047 }
1048 return ret;
1049 }
1050
1051 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1052 struct vfio_iommu_spapr_tce_create create;
1053
1054 if (!container->v2)
1055 break;
1056
1057 ret = tce_iommu_mm_set(container);
1058 if (ret)
1059 return ret;
1060
1061 if (!tce_groups_attached(container))
1062 return -ENXIO;
1063
1064 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1065 start_addr);
1066
1067 if (copy_from_user(&create, (void __user *)arg, minsz))
1068 return -EFAULT;
1069
1070 if (create.argsz < minsz)
1071 return -EINVAL;
1072
1073 if (create.flags)
1074 return -EINVAL;
1075
1076 mutex_lock(&container->lock);
1077
1078 ret = tce_iommu_create_default_window(container);
1079 if (!ret)
1080 ret = tce_iommu_create_window(container,
1081 create.page_shift,
1082 create.window_size, create.levels,
1083 &create.start_addr);
1084
1085 mutex_unlock(&container->lock);
1086
1087 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1088 ret = -EFAULT;
1089
1090 return ret;
1091 }
1092 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1093 struct vfio_iommu_spapr_tce_remove remove;
1094
1095 if (!container->v2)
1096 break;
1097
1098 ret = tce_iommu_mm_set(container);
1099 if (ret)
1100 return ret;
1101
1102 if (!tce_groups_attached(container))
1103 return -ENXIO;
1104
1105 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1106 start_addr);
1107
1108 if (copy_from_user(&remove, (void __user *)arg, minsz))
1109 return -EFAULT;
1110
1111 if (remove.argsz < minsz)
1112 return -EINVAL;
1113
1114 if (remove.flags)
1115 return -EINVAL;
1116
1117 if (container->def_window_pending && !remove.start_addr) {
1118 container->def_window_pending = false;
1119 return 0;
1120 }
1121
1122 mutex_lock(&container->lock);
1123
1124 ret = tce_iommu_remove_window(container, remove.start_addr);
1125
1126 mutex_unlock(&container->lock);
1127
1128 return ret;
1129 }
1130 }
1131
1132 return -ENOTTY;
1133}
1134
1135static void tce_iommu_release_ownership(struct tce_container *container,
1136 struct iommu_table_group *table_group)
1137{
1138 int i;
1139
1140 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1141 struct iommu_table *tbl = container->tables[i];
1142
1143 if (!tbl)
1144 continue;
1145
1146 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1147 if (tbl->it_map)
1148 iommu_release_ownership(tbl);
1149
1150 container->tables[i] = NULL;
1151 }
1152}
1153
1154static int tce_iommu_take_ownership(struct tce_container *container,
1155 struct iommu_table_group *table_group)
1156{
1157 int i, j, rc = 0;
1158
1159 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1160 struct iommu_table *tbl = table_group->tables[i];
1161
1162 if (!tbl || !tbl->it_map)
1163 continue;
1164
1165 rc = iommu_take_ownership(tbl);
1166 if (rc) {
1167 for (j = 0; j < i; ++j)
1168 iommu_release_ownership(
1169 table_group->tables[j]);
1170
1171 return rc;
1172 }
1173 }
1174
1175 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1176 container->tables[i] = table_group->tables[i];
1177
1178 return 0;
1179}
1180
1181static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1182 struct iommu_table_group *table_group)
1183{
1184 long i;
1185
1186 if (!table_group->ops->unset_window) {
1187 WARN_ON_ONCE(1);
1188 return;
1189 }
1190
1191 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1192 if (container->tables[i])
1193 table_group->ops->unset_window(table_group, i);
1194
1195 table_group->ops->release_ownership(table_group);
1196}
1197
1198static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1199 struct iommu_table_group *table_group)
1200{
1201 long i, ret = 0;
1202
1203 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1204 !table_group->ops->release_ownership) {
1205 WARN_ON_ONCE(1);
1206 return -EFAULT;
1207 }
1208
1209 table_group->ops->take_ownership(table_group);
1210
1211
1212 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1213 struct iommu_table *tbl = container->tables[i];
1214
1215 if (!tbl)
1216 continue;
1217
1218 ret = table_group->ops->set_window(table_group, i, tbl);
1219 if (ret)
1220 goto release_exit;
1221 }
1222
1223 return 0;
1224
1225release_exit:
1226 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1227 table_group->ops->unset_window(table_group, i);
1228
1229 table_group->ops->release_ownership(table_group);
1230
1231 return ret;
1232}
1233
1234static int tce_iommu_attach_group(void *iommu_data,
1235 struct iommu_group *iommu_group)
1236{
1237 int ret;
1238 struct tce_container *container = iommu_data;
1239 struct iommu_table_group *table_group;
1240 struct tce_iommu_group *tcegrp = NULL;
1241
1242 mutex_lock(&container->lock);
1243
1244
1245
1246 table_group = iommu_group_get_iommudata(iommu_group);
1247 if (!table_group) {
1248 ret = -ENODEV;
1249 goto unlock_exit;
1250 }
1251
1252 if (tce_groups_attached(container) && (!table_group->ops ||
1253 !table_group->ops->take_ownership ||
1254 !table_group->ops->release_ownership)) {
1255 ret = -EBUSY;
1256 goto unlock_exit;
1257 }
1258
1259
1260 list_for_each_entry(tcegrp, &container->group_list, next) {
1261 struct iommu_table_group *table_group_tmp;
1262
1263 if (tcegrp->grp == iommu_group) {
1264 pr_warn("tce_vfio: Group %d is already attached\n",
1265 iommu_group_id(iommu_group));
1266 ret = -EBUSY;
1267 goto unlock_exit;
1268 }
1269 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1270 if (table_group_tmp->ops->create_table !=
1271 table_group->ops->create_table) {
1272 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1273 iommu_group_id(iommu_group),
1274 iommu_group_id(tcegrp->grp));
1275 ret = -EPERM;
1276 goto unlock_exit;
1277 }
1278 }
1279
1280 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1281 if (!tcegrp) {
1282 ret = -ENOMEM;
1283 goto unlock_exit;
1284 }
1285
1286 if (!table_group->ops || !table_group->ops->take_ownership ||
1287 !table_group->ops->release_ownership) {
1288 if (container->v2) {
1289 ret = -EPERM;
1290 goto unlock_exit;
1291 }
1292 ret = tce_iommu_take_ownership(container, table_group);
1293 } else {
1294 if (!container->v2) {
1295 ret = -EPERM;
1296 goto unlock_exit;
1297 }
1298 ret = tce_iommu_take_ownership_ddw(container, table_group);
1299 if (!tce_groups_attached(container) && !container->tables[0])
1300 container->def_window_pending = true;
1301 }
1302
1303 if (!ret) {
1304 tcegrp->grp = iommu_group;
1305 list_add(&tcegrp->next, &container->group_list);
1306 }
1307
1308unlock_exit:
1309 if (ret && tcegrp)
1310 kfree(tcegrp);
1311
1312 mutex_unlock(&container->lock);
1313
1314 return ret;
1315}
1316
1317static void tce_iommu_detach_group(void *iommu_data,
1318 struct iommu_group *iommu_group)
1319{
1320 struct tce_container *container = iommu_data;
1321 struct iommu_table_group *table_group;
1322 bool found = false;
1323 struct tce_iommu_group *tcegrp;
1324
1325 mutex_lock(&container->lock);
1326
1327 list_for_each_entry(tcegrp, &container->group_list, next) {
1328 if (tcegrp->grp == iommu_group) {
1329 found = true;
1330 break;
1331 }
1332 }
1333
1334 if (!found) {
1335 pr_warn("tce_vfio: detaching unattached group #%u\n",
1336 iommu_group_id(iommu_group));
1337 goto unlock_exit;
1338 }
1339
1340 list_del(&tcegrp->next);
1341 kfree(tcegrp);
1342
1343 table_group = iommu_group_get_iommudata(iommu_group);
1344 BUG_ON(!table_group);
1345
1346 if (!table_group->ops || !table_group->ops->release_ownership)
1347 tce_iommu_release_ownership(container, table_group);
1348 else
1349 tce_iommu_release_ownership_ddw(container, table_group);
1350
1351unlock_exit:
1352 mutex_unlock(&container->lock);
1353}
1354
1355static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1356 .name = "iommu-vfio-powerpc",
1357 .owner = THIS_MODULE,
1358 .open = tce_iommu_open,
1359 .release = tce_iommu_release,
1360 .ioctl = tce_iommu_ioctl,
1361 .attach_group = tce_iommu_attach_group,
1362 .detach_group = tce_iommu_detach_group,
1363};
1364
1365static int __init tce_iommu_init(void)
1366{
1367 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1368}
1369
1370static void __exit tce_iommu_cleanup(void)
1371{
1372 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1373}
1374
1375module_init(tce_iommu_init);
1376module_exit(tce_iommu_cleanup);
1377
1378MODULE_VERSION(DRIVER_VERSION);
1379MODULE_LICENSE("GPL v2");
1380MODULE_AUTHOR(DRIVER_AUTHOR);
1381MODULE_DESCRIPTION(DRIVER_DESC);
1382
1383