1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
23#include <linux/sched/mm.h>
24#include <linux/sched/signal.h>
25
26#include <asm/iommu.h>
27#include <asm/tce.h>
28#include <asm/mmu_context.h>
29
30#define DRIVER_VERSION "0.1"
31#define DRIVER_AUTHOR "aik@ozlabs.ru"
32#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
33
34static void tce_iommu_detach_group(void *iommu_data,
35 struct iommu_group *iommu_group);
36
37static long try_increment_locked_vm(struct mm_struct *mm, long npages)
38{
39 long ret = 0, locked, lock_limit;
40
41 if (WARN_ON_ONCE(!mm))
42 return -EPERM;
43
44 if (!npages)
45 return 0;
46
47 down_write(&mm->mmap_sem);
48 locked = mm->locked_vm + npages;
49 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
50 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
51 ret = -ENOMEM;
52 else
53 mm->locked_vm += npages;
54
55 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
56 npages << PAGE_SHIFT,
57 mm->locked_vm << PAGE_SHIFT,
58 rlimit(RLIMIT_MEMLOCK),
59 ret ? " - exceeded" : "");
60
61 up_write(&mm->mmap_sem);
62
63 return ret;
64}
65
66static void decrement_locked_vm(struct mm_struct *mm, long npages)
67{
68 if (!mm || !npages)
69 return;
70
71 down_write(&mm->mmap_sem);
72 if (WARN_ON_ONCE(npages > mm->locked_vm))
73 npages = mm->locked_vm;
74 mm->locked_vm -= npages;
75 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
76 npages << PAGE_SHIFT,
77 mm->locked_vm << PAGE_SHIFT,
78 rlimit(RLIMIT_MEMLOCK));
79 up_write(&mm->mmap_sem);
80}
81
82
83
84
85
86
87
88
89struct tce_iommu_group {
90 struct list_head next;
91 struct iommu_group *grp;
92};
93
94
95
96
97
98struct tce_iommu_prereg {
99 struct list_head next;
100 struct mm_iommu_table_group_mem_t *mem;
101};
102
103
104
105
106
107
108struct tce_container {
109 struct mutex lock;
110 bool enabled;
111 bool v2;
112 bool def_window_pending;
113 unsigned long locked_pages;
114 struct mm_struct *mm;
115 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
116 struct list_head group_list;
117 struct list_head prereg_list;
118};
119
120static long tce_iommu_mm_set(struct tce_container *container)
121{
122 if (container->mm) {
123 if (container->mm == current->mm)
124 return 0;
125 return -EPERM;
126 }
127 BUG_ON(!current->mm);
128 container->mm = current->mm;
129 atomic_inc(&container->mm->mm_count);
130
131 return 0;
132}
133
134static long tce_iommu_prereg_free(struct tce_container *container,
135 struct tce_iommu_prereg *tcemem)
136{
137 long ret;
138
139 ret = mm_iommu_put(container->mm, tcemem->mem);
140 if (ret)
141 return ret;
142
143 list_del(&tcemem->next);
144 kfree(tcemem);
145
146 return 0;
147}
148
149static long tce_iommu_unregister_pages(struct tce_container *container,
150 __u64 vaddr, __u64 size)
151{
152 struct mm_iommu_table_group_mem_t *mem;
153 struct tce_iommu_prereg *tcemem;
154 bool found = false;
155
156 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
157 return -EINVAL;
158
159 mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
160 if (!mem)
161 return -ENOENT;
162
163 list_for_each_entry(tcemem, &container->prereg_list, next) {
164 if (tcemem->mem == mem) {
165 found = true;
166 break;
167 }
168 }
169
170 if (!found)
171 return -ENOENT;
172
173 return tce_iommu_prereg_free(container, tcemem);
174}
175
176static long tce_iommu_register_pages(struct tce_container *container,
177 __u64 vaddr, __u64 size)
178{
179 long ret = 0;
180 struct mm_iommu_table_group_mem_t *mem = NULL;
181 struct tce_iommu_prereg *tcemem;
182 unsigned long entries = size >> PAGE_SHIFT;
183
184 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
185 ((vaddr + size) < vaddr))
186 return -EINVAL;
187
188 mem = mm_iommu_find(container->mm, vaddr, entries);
189 if (mem) {
190 list_for_each_entry(tcemem, &container->prereg_list, next) {
191 if (tcemem->mem == mem)
192 return -EBUSY;
193 }
194 }
195
196 ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
197 if (ret)
198 return ret;
199
200 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
201 if (!tcemem) {
202 mm_iommu_put(container->mm, mem);
203 return -ENOMEM;
204 }
205
206 tcemem->mem = mem;
207 list_add(&tcemem->next, &container->prereg_list);
208
209 container->enabled = true;
210
211 return 0;
212}
213
214static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
215 struct mm_struct *mm)
216{
217 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
218 tbl->it_size, PAGE_SIZE);
219 unsigned long *uas;
220 long ret;
221
222 BUG_ON(tbl->it_userspace);
223
224 ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
225 if (ret)
226 return ret;
227
228 uas = vzalloc(cb);
229 if (!uas) {
230 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
231 return -ENOMEM;
232 }
233 tbl->it_userspace = uas;
234
235 return 0;
236}
237
238static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
239 struct mm_struct *mm)
240{
241 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
242 tbl->it_size, PAGE_SIZE);
243
244 if (!tbl->it_userspace)
245 return;
246
247 vfree(tbl->it_userspace);
248 tbl->it_userspace = NULL;
249 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
250}
251
252static bool tce_page_is_contained(struct page *page, unsigned page_shift)
253{
254
255
256
257
258
259 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
260}
261
262static inline bool tce_groups_attached(struct tce_container *container)
263{
264 return !list_empty(&container->group_list);
265}
266
267static long tce_iommu_find_table(struct tce_container *container,
268 phys_addr_t ioba, struct iommu_table **ptbl)
269{
270 long i;
271
272 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
273 struct iommu_table *tbl = container->tables[i];
274
275 if (tbl) {
276 unsigned long entry = ioba >> tbl->it_page_shift;
277 unsigned long start = tbl->it_offset;
278 unsigned long end = start + tbl->it_size;
279
280 if ((start <= entry) && (entry < end)) {
281 *ptbl = tbl;
282 return i;
283 }
284 }
285 }
286
287 return -1;
288}
289
290static int tce_iommu_find_free_table(struct tce_container *container)
291{
292 int i;
293
294 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
295 if (!container->tables[i])
296 return i;
297 }
298
299 return -ENOSPC;
300}
301
302static int tce_iommu_enable(struct tce_container *container)
303{
304 int ret = 0;
305 unsigned long locked;
306 struct iommu_table_group *table_group;
307 struct tce_iommu_group *tcegrp;
308
309 if (container->enabled)
310 return -EBUSY;
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 if (!tce_groups_attached(container))
342 return -ENODEV;
343
344 tcegrp = list_first_entry(&container->group_list,
345 struct tce_iommu_group, next);
346 table_group = iommu_group_get_iommudata(tcegrp->grp);
347 if (!table_group)
348 return -ENODEV;
349
350 if (!table_group->tce32_size)
351 return -EPERM;
352
353 ret = tce_iommu_mm_set(container);
354 if (ret)
355 return ret;
356
357 locked = table_group->tce32_size >> PAGE_SHIFT;
358 ret = try_increment_locked_vm(container->mm, locked);
359 if (ret)
360 return ret;
361
362 container->locked_pages = locked;
363
364 container->enabled = true;
365
366 return ret;
367}
368
369static void tce_iommu_disable(struct tce_container *container)
370{
371 if (!container->enabled)
372 return;
373
374 container->enabled = false;
375
376 BUG_ON(!container->mm);
377 decrement_locked_vm(container->mm, container->locked_pages);
378}
379
380static void *tce_iommu_open(unsigned long arg)
381{
382 struct tce_container *container;
383
384 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
385 pr_err("tce_vfio: Wrong IOMMU type\n");
386 return ERR_PTR(-EINVAL);
387 }
388
389 container = kzalloc(sizeof(*container), GFP_KERNEL);
390 if (!container)
391 return ERR_PTR(-ENOMEM);
392
393 mutex_init(&container->lock);
394 INIT_LIST_HEAD_RCU(&container->group_list);
395 INIT_LIST_HEAD_RCU(&container->prereg_list);
396
397 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
398
399 return container;
400}
401
402static int tce_iommu_clear(struct tce_container *container,
403 struct iommu_table *tbl,
404 unsigned long entry, unsigned long pages);
405static void tce_iommu_free_table(struct tce_container *container,
406 struct iommu_table *tbl);
407
408static void tce_iommu_release(void *iommu_data)
409{
410 struct tce_container *container = iommu_data;
411 struct tce_iommu_group *tcegrp;
412 long i;
413
414 while (tce_groups_attached(container)) {
415 tcegrp = list_first_entry(&container->group_list,
416 struct tce_iommu_group, next);
417 tce_iommu_detach_group(iommu_data, tcegrp->grp);
418 }
419
420
421
422
423
424 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
425 struct iommu_table *tbl = container->tables[i];
426
427 if (!tbl)
428 continue;
429
430 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
431 tce_iommu_free_table(container, tbl);
432 }
433
434 while (!list_empty(&container->prereg_list)) {
435 struct tce_iommu_prereg *tcemem;
436
437 tcemem = list_first_entry(&container->prereg_list,
438 struct tce_iommu_prereg, next);
439 WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
440 }
441
442 tce_iommu_disable(container);
443 if (container->mm)
444 mmdrop(container->mm);
445 mutex_destroy(&container->lock);
446
447 kfree(container);
448}
449
450static void tce_iommu_unuse_page(struct tce_container *container,
451 unsigned long hpa)
452{
453 struct page *page;
454
455 page = pfn_to_page(hpa >> PAGE_SHIFT);
456 put_page(page);
457}
458
459static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
460 unsigned long tce, unsigned long size,
461 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
462{
463 long ret = 0;
464 struct mm_iommu_table_group_mem_t *mem;
465
466 mem = mm_iommu_lookup(container->mm, tce, size);
467 if (!mem)
468 return -EINVAL;
469
470 ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
471 if (ret)
472 return -EINVAL;
473
474 *pmem = mem;
475
476 return 0;
477}
478
479static void tce_iommu_unuse_page_v2(struct tce_container *container,
480 struct iommu_table *tbl, unsigned long entry)
481{
482 struct mm_iommu_table_group_mem_t *mem = NULL;
483 int ret;
484 unsigned long hpa = 0;
485 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
486
487 if (!pua)
488 return;
489
490 ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl),
491 &hpa, &mem);
492 if (ret)
493 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
494 __func__, *pua, entry, ret);
495 if (mem)
496 mm_iommu_mapped_dec(mem);
497
498 *pua = 0;
499}
500
501static int tce_iommu_clear(struct tce_container *container,
502 struct iommu_table *tbl,
503 unsigned long entry, unsigned long pages)
504{
505 unsigned long oldhpa;
506 long ret;
507 enum dma_data_direction direction;
508
509 for ( ; pages; --pages, ++entry) {
510 direction = DMA_NONE;
511 oldhpa = 0;
512 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
513 if (ret)
514 continue;
515
516 if (direction == DMA_NONE)
517 continue;
518
519 if (container->v2) {
520 tce_iommu_unuse_page_v2(container, tbl, entry);
521 continue;
522 }
523
524 tce_iommu_unuse_page(container, oldhpa);
525 }
526
527 return 0;
528}
529
530static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
531{
532 struct page *page = NULL;
533 enum dma_data_direction direction = iommu_tce_direction(tce);
534
535 if (get_user_pages_fast(tce & PAGE_MASK, 1,
536 direction != DMA_TO_DEVICE, &page) != 1)
537 return -EFAULT;
538
539 *hpa = __pa((unsigned long) page_address(page));
540
541 return 0;
542}
543
544static long tce_iommu_build(struct tce_container *container,
545 struct iommu_table *tbl,
546 unsigned long entry, unsigned long tce, unsigned long pages,
547 enum dma_data_direction direction)
548{
549 long i, ret = 0;
550 struct page *page;
551 unsigned long hpa;
552 enum dma_data_direction dirtmp;
553
554 for (i = 0; i < pages; ++i) {
555 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
556
557 ret = tce_iommu_use_page(tce, &hpa);
558 if (ret)
559 break;
560
561 page = pfn_to_page(hpa >> PAGE_SHIFT);
562 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
563 ret = -EPERM;
564 break;
565 }
566
567 hpa |= offset;
568 dirtmp = direction;
569 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
570 if (ret) {
571 tce_iommu_unuse_page(container, hpa);
572 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
573 __func__, entry << tbl->it_page_shift,
574 tce, ret);
575 break;
576 }
577
578 if (dirtmp != DMA_NONE)
579 tce_iommu_unuse_page(container, hpa);
580
581 tce += IOMMU_PAGE_SIZE(tbl);
582 }
583
584 if (ret)
585 tce_iommu_clear(container, tbl, entry, i);
586
587 return ret;
588}
589
590static long tce_iommu_build_v2(struct tce_container *container,
591 struct iommu_table *tbl,
592 unsigned long entry, unsigned long tce, unsigned long pages,
593 enum dma_data_direction direction)
594{
595 long i, ret = 0;
596 struct page *page;
597 unsigned long hpa;
598 enum dma_data_direction dirtmp;
599
600 if (!tbl->it_userspace) {
601 ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
602 if (ret)
603 return ret;
604 }
605
606 for (i = 0; i < pages; ++i) {
607 struct mm_iommu_table_group_mem_t *mem = NULL;
608 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
609 entry + i);
610
611 ret = tce_iommu_prereg_ua_to_hpa(container,
612 tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
613 if (ret)
614 break;
615
616 page = pfn_to_page(hpa >> PAGE_SHIFT);
617 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
618 ret = -EPERM;
619 break;
620 }
621
622
623 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
624 dirtmp = direction;
625
626
627 if (mm_iommu_mapped_inc(mem))
628 break;
629
630 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
631 if (ret) {
632
633 tce_iommu_unuse_page_v2(container, tbl, entry + i);
634 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
635 __func__, entry << tbl->it_page_shift,
636 tce, ret);
637 break;
638 }
639
640 if (dirtmp != DMA_NONE)
641 tce_iommu_unuse_page_v2(container, tbl, entry + i);
642
643 *pua = tce;
644
645 tce += IOMMU_PAGE_SIZE(tbl);
646 }
647
648 if (ret)
649 tce_iommu_clear(container, tbl, entry, i);
650
651 return ret;
652}
653
654static long tce_iommu_create_table(struct tce_container *container,
655 struct iommu_table_group *table_group,
656 int num,
657 __u32 page_shift,
658 __u64 window_size,
659 __u32 levels,
660 struct iommu_table **ptbl)
661{
662 long ret, table_size;
663
664 table_size = table_group->ops->get_table_size(page_shift, window_size,
665 levels);
666 if (!table_size)
667 return -EINVAL;
668
669 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
670 if (ret)
671 return ret;
672
673 ret = table_group->ops->create_table(table_group, num,
674 page_shift, window_size, levels, ptbl);
675
676 WARN_ON(!ret && !(*ptbl)->it_ops->free);
677 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
678
679 return ret;
680}
681
682static void tce_iommu_free_table(struct tce_container *container,
683 struct iommu_table *tbl)
684{
685 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
686
687 tce_iommu_userspace_view_free(tbl, container->mm);
688 iommu_tce_table_put(tbl);
689 decrement_locked_vm(container->mm, pages);
690}
691
692static long tce_iommu_create_window(struct tce_container *container,
693 __u32 page_shift, __u64 window_size, __u32 levels,
694 __u64 *start_addr)
695{
696 struct tce_iommu_group *tcegrp;
697 struct iommu_table_group *table_group;
698 struct iommu_table *tbl = NULL;
699 long ret, num;
700
701 num = tce_iommu_find_free_table(container);
702 if (num < 0)
703 return num;
704
705
706 tcegrp = list_first_entry(&container->group_list,
707 struct tce_iommu_group, next);
708 table_group = iommu_group_get_iommudata(tcegrp->grp);
709 if (!table_group)
710 return -EFAULT;
711
712 if (!(table_group->pgsizes & (1ULL << page_shift)))
713 return -EINVAL;
714
715 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
716 !table_group->ops->get_table_size ||
717 !table_group->ops->create_table)
718 return -EPERM;
719
720
721 ret = tce_iommu_create_table(container, table_group, num,
722 page_shift, window_size, levels, &tbl);
723 if (ret)
724 return ret;
725
726 BUG_ON(!tbl->it_ops->free);
727
728
729
730
731
732 list_for_each_entry(tcegrp, &container->group_list, next) {
733 table_group = iommu_group_get_iommudata(tcegrp->grp);
734
735 ret = table_group->ops->set_window(table_group, num, tbl);
736 if (ret)
737 goto unset_exit;
738 }
739
740 container->tables[num] = tbl;
741
742
743 *start_addr = tbl->it_offset << tbl->it_page_shift;
744
745 return 0;
746
747unset_exit:
748 list_for_each_entry(tcegrp, &container->group_list, next) {
749 table_group = iommu_group_get_iommudata(tcegrp->grp);
750 table_group->ops->unset_window(table_group, num);
751 }
752 tce_iommu_free_table(container, tbl);
753
754 return ret;
755}
756
757static long tce_iommu_remove_window(struct tce_container *container,
758 __u64 start_addr)
759{
760 struct iommu_table_group *table_group = NULL;
761 struct iommu_table *tbl;
762 struct tce_iommu_group *tcegrp;
763 int num;
764
765 num = tce_iommu_find_table(container, start_addr, &tbl);
766 if (num < 0)
767 return -EINVAL;
768
769 BUG_ON(!tbl->it_size);
770
771
772 list_for_each_entry(tcegrp, &container->group_list, next) {
773 table_group = iommu_group_get_iommudata(tcegrp->grp);
774
775
776
777
778
779
780
781
782 if (!table_group->ops || !table_group->ops->unset_window)
783 return -EPERM;
784
785 table_group->ops->unset_window(table_group, num);
786 }
787
788
789 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
790 tce_iommu_free_table(container, tbl);
791 container->tables[num] = NULL;
792
793 return 0;
794}
795
796static long tce_iommu_create_default_window(struct tce_container *container)
797{
798 long ret;
799 __u64 start_addr = 0;
800 struct tce_iommu_group *tcegrp;
801 struct iommu_table_group *table_group;
802
803 if (!container->def_window_pending)
804 return 0;
805
806 if (!tce_groups_attached(container))
807 return -ENODEV;
808
809 tcegrp = list_first_entry(&container->group_list,
810 struct tce_iommu_group, next);
811 table_group = iommu_group_get_iommudata(tcegrp->grp);
812 if (!table_group)
813 return -ENODEV;
814
815 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
816 table_group->tce32_size, 1, &start_addr);
817 WARN_ON_ONCE(!ret && start_addr);
818
819 if (!ret)
820 container->def_window_pending = false;
821
822 return ret;
823}
824
825static long tce_iommu_ioctl(void *iommu_data,
826 unsigned int cmd, unsigned long arg)
827{
828 struct tce_container *container = iommu_data;
829 unsigned long minsz, ddwsz;
830 long ret;
831
832 switch (cmd) {
833 case VFIO_CHECK_EXTENSION:
834 switch (arg) {
835 case VFIO_SPAPR_TCE_IOMMU:
836 case VFIO_SPAPR_TCE_v2_IOMMU:
837 ret = 1;
838 break;
839 default:
840 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
841 break;
842 }
843
844 return (ret < 0) ? 0 : ret;
845 }
846
847
848
849
850
851 BUG_ON(!container);
852 if (container->mm && container->mm != current->mm)
853 return -EPERM;
854
855 switch (cmd) {
856 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
857 struct vfio_iommu_spapr_tce_info info;
858 struct tce_iommu_group *tcegrp;
859 struct iommu_table_group *table_group;
860
861 if (!tce_groups_attached(container))
862 return -ENXIO;
863
864 tcegrp = list_first_entry(&container->group_list,
865 struct tce_iommu_group, next);
866 table_group = iommu_group_get_iommudata(tcegrp->grp);
867
868 if (!table_group)
869 return -ENXIO;
870
871 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
872 dma32_window_size);
873
874 if (copy_from_user(&info, (void __user *)arg, minsz))
875 return -EFAULT;
876
877 if (info.argsz < minsz)
878 return -EINVAL;
879
880 info.dma32_window_start = table_group->tce32_start;
881 info.dma32_window_size = table_group->tce32_size;
882 info.flags = 0;
883 memset(&info.ddw, 0, sizeof(info.ddw));
884
885 if (table_group->max_dynamic_windows_supported &&
886 container->v2) {
887 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
888 info.ddw.pgsizes = table_group->pgsizes;
889 info.ddw.max_dynamic_windows_supported =
890 table_group->max_dynamic_windows_supported;
891 info.ddw.levels = table_group->max_levels;
892 }
893
894 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
895
896 if (info.argsz >= ddwsz)
897 minsz = ddwsz;
898
899 if (copy_to_user((void __user *)arg, &info, minsz))
900 return -EFAULT;
901
902 return 0;
903 }
904 case VFIO_IOMMU_MAP_DMA: {
905 struct vfio_iommu_type1_dma_map param;
906 struct iommu_table *tbl = NULL;
907 long num;
908 enum dma_data_direction direction;
909
910 if (!container->enabled)
911 return -EPERM;
912
913 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
914
915 if (copy_from_user(¶m, (void __user *)arg, minsz))
916 return -EFAULT;
917
918 if (param.argsz < minsz)
919 return -EINVAL;
920
921 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
922 VFIO_DMA_MAP_FLAG_WRITE))
923 return -EINVAL;
924
925 ret = tce_iommu_create_default_window(container);
926 if (ret)
927 return ret;
928
929 num = tce_iommu_find_table(container, param.iova, &tbl);
930 if (num < 0)
931 return -ENXIO;
932
933 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
934 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
935 return -EINVAL;
936
937
938 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
939 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
940 direction = DMA_BIDIRECTIONAL;
941 else
942 direction = DMA_TO_DEVICE;
943 } else {
944 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
945 direction = DMA_FROM_DEVICE;
946 else
947 return -EINVAL;
948 }
949
950 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
951 if (ret)
952 return ret;
953
954 if (container->v2)
955 ret = tce_iommu_build_v2(container, tbl,
956 param.iova >> tbl->it_page_shift,
957 param.vaddr,
958 param.size >> tbl->it_page_shift,
959 direction);
960 else
961 ret = tce_iommu_build(container, tbl,
962 param.iova >> tbl->it_page_shift,
963 param.vaddr,
964 param.size >> tbl->it_page_shift,
965 direction);
966
967 iommu_flush_tce(tbl);
968
969 return ret;
970 }
971 case VFIO_IOMMU_UNMAP_DMA: {
972 struct vfio_iommu_type1_dma_unmap param;
973 struct iommu_table *tbl = NULL;
974 long num;
975
976 if (!container->enabled)
977 return -EPERM;
978
979 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
980 size);
981
982 if (copy_from_user(¶m, (void __user *)arg, minsz))
983 return -EFAULT;
984
985 if (param.argsz < minsz)
986 return -EINVAL;
987
988
989 if (param.flags)
990 return -EINVAL;
991
992 ret = tce_iommu_create_default_window(container);
993 if (ret)
994 return ret;
995
996 num = tce_iommu_find_table(container, param.iova, &tbl);
997 if (num < 0)
998 return -ENXIO;
999
1000 if (param.size & ~IOMMU_PAGE_MASK(tbl))
1001 return -EINVAL;
1002
1003 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
1004 param.size >> tbl->it_page_shift);
1005 if (ret)
1006 return ret;
1007
1008 ret = tce_iommu_clear(container, tbl,
1009 param.iova >> tbl->it_page_shift,
1010 param.size >> tbl->it_page_shift);
1011 iommu_flush_tce(tbl);
1012
1013 return ret;
1014 }
1015 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
1016 struct vfio_iommu_spapr_register_memory param;
1017
1018 if (!container->v2)
1019 break;
1020
1021 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1022 size);
1023
1024 ret = tce_iommu_mm_set(container);
1025 if (ret)
1026 return ret;
1027
1028 if (copy_from_user(¶m, (void __user *)arg, minsz))
1029 return -EFAULT;
1030
1031 if (param.argsz < minsz)
1032 return -EINVAL;
1033
1034
1035 if (param.flags)
1036 return -EINVAL;
1037
1038 mutex_lock(&container->lock);
1039 ret = tce_iommu_register_pages(container, param.vaddr,
1040 param.size);
1041 mutex_unlock(&container->lock);
1042
1043 return ret;
1044 }
1045 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1046 struct vfio_iommu_spapr_register_memory param;
1047
1048 if (!container->v2)
1049 break;
1050
1051 if (!container->mm)
1052 return -EPERM;
1053
1054 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1055 size);
1056
1057 if (copy_from_user(¶m, (void __user *)arg, minsz))
1058 return -EFAULT;
1059
1060 if (param.argsz < minsz)
1061 return -EINVAL;
1062
1063
1064 if (param.flags)
1065 return -EINVAL;
1066
1067 mutex_lock(&container->lock);
1068 ret = tce_iommu_unregister_pages(container, param.vaddr,
1069 param.size);
1070 mutex_unlock(&container->lock);
1071
1072 return ret;
1073 }
1074 case VFIO_IOMMU_ENABLE:
1075 if (container->v2)
1076 break;
1077
1078 mutex_lock(&container->lock);
1079 ret = tce_iommu_enable(container);
1080 mutex_unlock(&container->lock);
1081 return ret;
1082
1083
1084 case VFIO_IOMMU_DISABLE:
1085 if (container->v2)
1086 break;
1087
1088 mutex_lock(&container->lock);
1089 tce_iommu_disable(container);
1090 mutex_unlock(&container->lock);
1091 return 0;
1092
1093 case VFIO_EEH_PE_OP: {
1094 struct tce_iommu_group *tcegrp;
1095
1096 ret = 0;
1097 list_for_each_entry(tcegrp, &container->group_list, next) {
1098 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1099 cmd, arg);
1100 if (ret)
1101 return ret;
1102 }
1103 return ret;
1104 }
1105
1106 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1107 struct vfio_iommu_spapr_tce_create create;
1108
1109 if (!container->v2)
1110 break;
1111
1112 ret = tce_iommu_mm_set(container);
1113 if (ret)
1114 return ret;
1115
1116 if (!tce_groups_attached(container))
1117 return -ENXIO;
1118
1119 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1120 start_addr);
1121
1122 if (copy_from_user(&create, (void __user *)arg, minsz))
1123 return -EFAULT;
1124
1125 if (create.argsz < minsz)
1126 return -EINVAL;
1127
1128 if (create.flags)
1129 return -EINVAL;
1130
1131 mutex_lock(&container->lock);
1132
1133 ret = tce_iommu_create_default_window(container);
1134 if (!ret)
1135 ret = tce_iommu_create_window(container,
1136 create.page_shift,
1137 create.window_size, create.levels,
1138 &create.start_addr);
1139
1140 mutex_unlock(&container->lock);
1141
1142 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1143 ret = -EFAULT;
1144
1145 return ret;
1146 }
1147 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1148 struct vfio_iommu_spapr_tce_remove remove;
1149
1150 if (!container->v2)
1151 break;
1152
1153 ret = tce_iommu_mm_set(container);
1154 if (ret)
1155 return ret;
1156
1157 if (!tce_groups_attached(container))
1158 return -ENXIO;
1159
1160 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1161 start_addr);
1162
1163 if (copy_from_user(&remove, (void __user *)arg, minsz))
1164 return -EFAULT;
1165
1166 if (remove.argsz < minsz)
1167 return -EINVAL;
1168
1169 if (remove.flags)
1170 return -EINVAL;
1171
1172 if (container->def_window_pending && !remove.start_addr) {
1173 container->def_window_pending = false;
1174 return 0;
1175 }
1176
1177 mutex_lock(&container->lock);
1178
1179 ret = tce_iommu_remove_window(container, remove.start_addr);
1180
1181 mutex_unlock(&container->lock);
1182
1183 return ret;
1184 }
1185 }
1186
1187 return -ENOTTY;
1188}
1189
1190static void tce_iommu_release_ownership(struct tce_container *container,
1191 struct iommu_table_group *table_group)
1192{
1193 int i;
1194
1195 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1196 struct iommu_table *tbl = container->tables[i];
1197
1198 if (!tbl)
1199 continue;
1200
1201 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1202 tce_iommu_userspace_view_free(tbl, container->mm);
1203 if (tbl->it_map)
1204 iommu_release_ownership(tbl);
1205
1206 container->tables[i] = NULL;
1207 }
1208}
1209
1210static int tce_iommu_take_ownership(struct tce_container *container,
1211 struct iommu_table_group *table_group)
1212{
1213 int i, j, rc = 0;
1214
1215 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1216 struct iommu_table *tbl = table_group->tables[i];
1217
1218 if (!tbl || !tbl->it_map)
1219 continue;
1220
1221 rc = iommu_take_ownership(tbl);
1222 if (rc) {
1223 for (j = 0; j < i; ++j)
1224 iommu_release_ownership(
1225 table_group->tables[j]);
1226
1227 return rc;
1228 }
1229 }
1230
1231 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1232 container->tables[i] = table_group->tables[i];
1233
1234 return 0;
1235}
1236
1237static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1238 struct iommu_table_group *table_group)
1239{
1240 long i;
1241
1242 if (!table_group->ops->unset_window) {
1243 WARN_ON_ONCE(1);
1244 return;
1245 }
1246
1247 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1248 table_group->ops->unset_window(table_group, i);
1249
1250 table_group->ops->release_ownership(table_group);
1251}
1252
1253static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1254 struct iommu_table_group *table_group)
1255{
1256 long i, ret = 0;
1257
1258 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1259 !table_group->ops->release_ownership) {
1260 WARN_ON_ONCE(1);
1261 return -EFAULT;
1262 }
1263
1264 table_group->ops->take_ownership(table_group);
1265
1266
1267 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1268 struct iommu_table *tbl = container->tables[i];
1269
1270 if (!tbl)
1271 continue;
1272
1273 ret = table_group->ops->set_window(table_group, i, tbl);
1274 if (ret)
1275 goto release_exit;
1276 }
1277
1278 return 0;
1279
1280release_exit:
1281 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1282 table_group->ops->unset_window(table_group, i);
1283
1284 table_group->ops->release_ownership(table_group);
1285
1286 return ret;
1287}
1288
1289static int tce_iommu_attach_group(void *iommu_data,
1290 struct iommu_group *iommu_group)
1291{
1292 int ret;
1293 struct tce_container *container = iommu_data;
1294 struct iommu_table_group *table_group;
1295 struct tce_iommu_group *tcegrp = NULL;
1296
1297 mutex_lock(&container->lock);
1298
1299
1300
1301 table_group = iommu_group_get_iommudata(iommu_group);
1302 if (!table_group) {
1303 ret = -ENODEV;
1304 goto unlock_exit;
1305 }
1306
1307 if (tce_groups_attached(container) && (!table_group->ops ||
1308 !table_group->ops->take_ownership ||
1309 !table_group->ops->release_ownership)) {
1310 ret = -EBUSY;
1311 goto unlock_exit;
1312 }
1313
1314
1315 list_for_each_entry(tcegrp, &container->group_list, next) {
1316 struct iommu_table_group *table_group_tmp;
1317
1318 if (tcegrp->grp == iommu_group) {
1319 pr_warn("tce_vfio: Group %d is already attached\n",
1320 iommu_group_id(iommu_group));
1321 ret = -EBUSY;
1322 goto unlock_exit;
1323 }
1324 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1325 if (table_group_tmp->ops->create_table !=
1326 table_group->ops->create_table) {
1327 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1328 iommu_group_id(iommu_group),
1329 iommu_group_id(tcegrp->grp));
1330 ret = -EPERM;
1331 goto unlock_exit;
1332 }
1333 }
1334
1335 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1336 if (!tcegrp) {
1337 ret = -ENOMEM;
1338 goto unlock_exit;
1339 }
1340
1341 if (!table_group->ops || !table_group->ops->take_ownership ||
1342 !table_group->ops->release_ownership) {
1343 if (container->v2) {
1344 ret = -EPERM;
1345 goto unlock_exit;
1346 }
1347 ret = tce_iommu_take_ownership(container, table_group);
1348 } else {
1349 if (!container->v2) {
1350 ret = -EPERM;
1351 goto unlock_exit;
1352 }
1353 ret = tce_iommu_take_ownership_ddw(container, table_group);
1354 if (!tce_groups_attached(container) && !container->tables[0])
1355 container->def_window_pending = true;
1356 }
1357
1358 if (!ret) {
1359 tcegrp->grp = iommu_group;
1360 list_add(&tcegrp->next, &container->group_list);
1361 }
1362
1363unlock_exit:
1364 if (ret && tcegrp)
1365 kfree(tcegrp);
1366
1367 mutex_unlock(&container->lock);
1368
1369 return ret;
1370}
1371
1372static void tce_iommu_detach_group(void *iommu_data,
1373 struct iommu_group *iommu_group)
1374{
1375 struct tce_container *container = iommu_data;
1376 struct iommu_table_group *table_group;
1377 bool found = false;
1378 struct tce_iommu_group *tcegrp;
1379
1380 mutex_lock(&container->lock);
1381
1382 list_for_each_entry(tcegrp, &container->group_list, next) {
1383 if (tcegrp->grp == iommu_group) {
1384 found = true;
1385 break;
1386 }
1387 }
1388
1389 if (!found) {
1390 pr_warn("tce_vfio: detaching unattached group #%u\n",
1391 iommu_group_id(iommu_group));
1392 goto unlock_exit;
1393 }
1394
1395 list_del(&tcegrp->next);
1396 kfree(tcegrp);
1397
1398 table_group = iommu_group_get_iommudata(iommu_group);
1399 BUG_ON(!table_group);
1400
1401 if (!table_group->ops || !table_group->ops->release_ownership)
1402 tce_iommu_release_ownership(container, table_group);
1403 else
1404 tce_iommu_release_ownership_ddw(container, table_group);
1405
1406unlock_exit:
1407 mutex_unlock(&container->lock);
1408}
1409
1410const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1411 .name = "iommu-vfio-powerpc",
1412 .owner = THIS_MODULE,
1413 .open = tce_iommu_open,
1414 .release = tce_iommu_release,
1415 .ioctl = tce_iommu_ioctl,
1416 .attach_group = tce_iommu_attach_group,
1417 .detach_group = tce_iommu_detach_group,
1418};
1419
1420static int __init tce_iommu_init(void)
1421{
1422 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1423}
1424
1425static void __exit tce_iommu_cleanup(void)
1426{
1427 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1428}
1429
1430module_init(tce_iommu_init);
1431module_exit(tce_iommu_cleanup);
1432
1433MODULE_VERSION(DRIVER_VERSION);
1434MODULE_LICENSE("GPL v2");
1435MODULE_AUTHOR(DRIVER_AUTHOR);
1436MODULE_DESCRIPTION(DRIVER_DESC);
1437
1438