1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
23#include <asm/iommu.h>
24#include <asm/tce.h>
25#include <asm/mmu_context.h>
26
27#define DRIVER_VERSION "0.1"
28#define DRIVER_AUTHOR "aik@ozlabs.ru"
29#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
30
31static void tce_iommu_detach_group(void *iommu_data,
32 struct iommu_group *iommu_group);
33
34static long try_increment_locked_vm(struct mm_struct *mm, long npages)
35{
36 long ret = 0, locked, lock_limit;
37
38 if (WARN_ON_ONCE(!mm))
39 return -EPERM;
40
41 if (!npages)
42 return 0;
43
44 down_write(&mm->mmap_sem);
45 locked = mm->locked_vm + npages;
46 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
47 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
48 ret = -ENOMEM;
49 else
50 mm->locked_vm += npages;
51
52 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
53 npages << PAGE_SHIFT,
54 mm->locked_vm << PAGE_SHIFT,
55 rlimit(RLIMIT_MEMLOCK),
56 ret ? " - exceeded" : "");
57
58 up_write(&mm->mmap_sem);
59
60 return ret;
61}
62
63static void decrement_locked_vm(struct mm_struct *mm, long npages)
64{
65 if (!mm || !npages)
66 return;
67
68 down_write(&mm->mmap_sem);
69 if (WARN_ON_ONCE(npages > mm->locked_vm))
70 npages = mm->locked_vm;
71 mm->locked_vm -= npages;
72 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
73 npages << PAGE_SHIFT,
74 mm->locked_vm << PAGE_SHIFT,
75 rlimit(RLIMIT_MEMLOCK));
76 up_write(&mm->mmap_sem);
77}
78
79
80
81
82
83
84
85
86struct tce_iommu_group {
87 struct list_head next;
88 struct iommu_group *grp;
89};
90
91
92
93
94
95struct tce_iommu_prereg {
96 struct list_head next;
97 struct mm_iommu_table_group_mem_t *mem;
98};
99
100
101
102
103
104
105struct tce_container {
106 struct mutex lock;
107 bool enabled;
108 bool v2;
109 bool def_window_pending;
110 unsigned long locked_pages;
111 struct mm_struct *mm;
112 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
113 struct list_head group_list;
114 struct list_head prereg_list;
115};
116
117static long tce_iommu_mm_set(struct tce_container *container)
118{
119 if (container->mm) {
120 if (container->mm == current->mm)
121 return 0;
122 return -EPERM;
123 }
124 BUG_ON(!current->mm);
125 container->mm = current->mm;
126 atomic_inc(&container->mm->mm_count);
127
128 return 0;
129}
130
131static long tce_iommu_prereg_free(struct tce_container *container,
132 struct tce_iommu_prereg *tcemem)
133{
134 long ret;
135
136 ret = mm_iommu_put(container->mm, tcemem->mem);
137 if (ret)
138 return ret;
139
140 list_del(&tcemem->next);
141 kfree(tcemem);
142
143 return 0;
144}
145
146static long tce_iommu_unregister_pages(struct tce_container *container,
147 __u64 vaddr, __u64 size)
148{
149 struct mm_iommu_table_group_mem_t *mem;
150 struct tce_iommu_prereg *tcemem;
151 bool found = false;
152
153 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
154 return -EINVAL;
155
156 mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
157 if (!mem)
158 return -ENOENT;
159
160 list_for_each_entry(tcemem, &container->prereg_list, next) {
161 if (tcemem->mem == mem) {
162 found = true;
163 break;
164 }
165 }
166
167 if (!found)
168 return -ENOENT;
169
170 return tce_iommu_prereg_free(container, tcemem);
171}
172
173static long tce_iommu_register_pages(struct tce_container *container,
174 __u64 vaddr, __u64 size)
175{
176 long ret = 0;
177 struct mm_iommu_table_group_mem_t *mem = NULL;
178 struct tce_iommu_prereg *tcemem;
179 unsigned long entries = size >> PAGE_SHIFT;
180
181 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
182 ((vaddr + size) < vaddr))
183 return -EINVAL;
184
185 mem = mm_iommu_find(container->mm, vaddr, entries);
186 if (mem) {
187 list_for_each_entry(tcemem, &container->prereg_list, next) {
188 if (tcemem->mem == mem)
189 return -EBUSY;
190 }
191 }
192
193 ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
194 if (ret)
195 return ret;
196
197 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
198 if (!tcemem) {
199 mm_iommu_put(container->mm, mem);
200 return -ENOMEM;
201 }
202
203 tcemem->mem = mem;
204 list_add(&tcemem->next, &container->prereg_list);
205
206 container->enabled = true;
207
208 return 0;
209}
210
211static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
212 struct mm_struct *mm)
213{
214 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
215 tbl->it_size, PAGE_SIZE);
216 unsigned long *uas;
217 long ret;
218
219 BUG_ON(tbl->it_userspace);
220
221 ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
222 if (ret)
223 return ret;
224
225 uas = vzalloc(cb);
226 if (!uas) {
227 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
228 return -ENOMEM;
229 }
230 tbl->it_userspace = uas;
231
232 return 0;
233}
234
235static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
236 struct mm_struct *mm)
237{
238 unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
239 tbl->it_size, PAGE_SIZE);
240
241 if (!tbl->it_userspace)
242 return;
243
244 vfree(tbl->it_userspace);
245 tbl->it_userspace = NULL;
246 decrement_locked_vm(mm, cb >> PAGE_SHIFT);
247}
248
249static bool tce_page_is_contained(struct page *page, unsigned page_shift)
250{
251
252
253
254
255
256 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
257}
258
259static inline bool tce_groups_attached(struct tce_container *container)
260{
261 return !list_empty(&container->group_list);
262}
263
264static long tce_iommu_find_table(struct tce_container *container,
265 phys_addr_t ioba, struct iommu_table **ptbl)
266{
267 long i;
268
269 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
270 struct iommu_table *tbl = container->tables[i];
271
272 if (tbl) {
273 unsigned long entry = ioba >> tbl->it_page_shift;
274 unsigned long start = tbl->it_offset;
275 unsigned long end = start + tbl->it_size;
276
277 if ((start <= entry) && (entry < end)) {
278 *ptbl = tbl;
279 return i;
280 }
281 }
282 }
283
284 return -1;
285}
286
287static int tce_iommu_find_free_table(struct tce_container *container)
288{
289 int i;
290
291 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
292 if (!container->tables[i])
293 return i;
294 }
295
296 return -ENOSPC;
297}
298
299static int tce_iommu_enable(struct tce_container *container)
300{
301 int ret = 0;
302 unsigned long locked;
303 struct iommu_table_group *table_group;
304 struct tce_iommu_group *tcegrp;
305
306 if (container->enabled)
307 return -EBUSY;
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 if (!tce_groups_attached(container))
339 return -ENODEV;
340
341 tcegrp = list_first_entry(&container->group_list,
342 struct tce_iommu_group, next);
343 table_group = iommu_group_get_iommudata(tcegrp->grp);
344 if (!table_group)
345 return -ENODEV;
346
347 if (!table_group->tce32_size)
348 return -EPERM;
349
350 ret = tce_iommu_mm_set(container);
351 if (ret)
352 return ret;
353
354 locked = table_group->tce32_size >> PAGE_SHIFT;
355 ret = try_increment_locked_vm(container->mm, locked);
356 if (ret)
357 return ret;
358
359 container->locked_pages = locked;
360
361 container->enabled = true;
362
363 return ret;
364}
365
366static void tce_iommu_disable(struct tce_container *container)
367{
368 if (!container->enabled)
369 return;
370
371 container->enabled = false;
372
373 BUG_ON(!container->mm);
374 decrement_locked_vm(container->mm, container->locked_pages);
375}
376
377static void *tce_iommu_open(unsigned long arg)
378{
379 struct tce_container *container;
380
381 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
382 pr_err("tce_vfio: Wrong IOMMU type\n");
383 return ERR_PTR(-EINVAL);
384 }
385
386 container = kzalloc(sizeof(*container), GFP_KERNEL);
387 if (!container)
388 return ERR_PTR(-ENOMEM);
389
390 mutex_init(&container->lock);
391 INIT_LIST_HEAD_RCU(&container->group_list);
392 INIT_LIST_HEAD_RCU(&container->prereg_list);
393
394 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
395
396 return container;
397}
398
399static int tce_iommu_clear(struct tce_container *container,
400 struct iommu_table *tbl,
401 unsigned long entry, unsigned long pages);
402static void tce_iommu_free_table(struct tce_container *container,
403 struct iommu_table *tbl);
404
405static void tce_iommu_release(void *iommu_data)
406{
407 struct tce_container *container = iommu_data;
408 struct tce_iommu_group *tcegrp;
409 long i;
410
411 while (tce_groups_attached(container)) {
412 tcegrp = list_first_entry(&container->group_list,
413 struct tce_iommu_group, next);
414 tce_iommu_detach_group(iommu_data, tcegrp->grp);
415 }
416
417
418
419
420
421 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
422 struct iommu_table *tbl = container->tables[i];
423
424 if (!tbl)
425 continue;
426
427 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
428 tce_iommu_free_table(container, tbl);
429 }
430
431 while (!list_empty(&container->prereg_list)) {
432 struct tce_iommu_prereg *tcemem;
433
434 tcemem = list_first_entry(&container->prereg_list,
435 struct tce_iommu_prereg, next);
436 WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
437 }
438
439 tce_iommu_disable(container);
440 if (container->mm)
441 mmdrop(container->mm);
442 mutex_destroy(&container->lock);
443
444 kfree(container);
445}
446
447static void tce_iommu_unuse_page(struct tce_container *container,
448 unsigned long hpa)
449{
450 struct page *page;
451
452 page = pfn_to_page(hpa >> PAGE_SHIFT);
453 put_page(page);
454}
455
456static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
457 unsigned long tce, unsigned long size,
458 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
459{
460 long ret = 0;
461 struct mm_iommu_table_group_mem_t *mem;
462
463 mem = mm_iommu_lookup(container->mm, tce, size);
464 if (!mem)
465 return -EINVAL;
466
467 ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
468 if (ret)
469 return -EINVAL;
470
471 *pmem = mem;
472
473 return 0;
474}
475
476static void tce_iommu_unuse_page_v2(struct tce_container *container,
477 struct iommu_table *tbl, unsigned long entry)
478{
479 struct mm_iommu_table_group_mem_t *mem = NULL;
480 int ret;
481 unsigned long hpa = 0;
482 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
483
484 if (!pua)
485 return;
486
487 ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl),
488 &hpa, &mem);
489 if (ret)
490 pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
491 __func__, *pua, entry, ret);
492 if (mem)
493 mm_iommu_mapped_dec(mem);
494
495 *pua = 0;
496}
497
498static int tce_iommu_clear(struct tce_container *container,
499 struct iommu_table *tbl,
500 unsigned long entry, unsigned long pages)
501{
502 unsigned long oldhpa;
503 long ret;
504 enum dma_data_direction direction;
505
506 for ( ; pages; --pages, ++entry) {
507 direction = DMA_NONE;
508 oldhpa = 0;
509 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
510 if (ret)
511 continue;
512
513 if (direction == DMA_NONE)
514 continue;
515
516 if (container->v2) {
517 tce_iommu_unuse_page_v2(container, tbl, entry);
518 continue;
519 }
520
521 tce_iommu_unuse_page(container, oldhpa);
522 }
523
524 return 0;
525}
526
527static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
528{
529 struct page *page = NULL;
530 enum dma_data_direction direction = iommu_tce_direction(tce);
531
532 if (get_user_pages_fast(tce & PAGE_MASK, 1,
533 direction != DMA_TO_DEVICE, &page) != 1)
534 return -EFAULT;
535
536 *hpa = __pa((unsigned long) page_address(page));
537
538 return 0;
539}
540
541static long tce_iommu_build(struct tce_container *container,
542 struct iommu_table *tbl,
543 unsigned long entry, unsigned long tce, unsigned long pages,
544 enum dma_data_direction direction)
545{
546 long i, ret = 0;
547 struct page *page;
548 unsigned long hpa;
549 enum dma_data_direction dirtmp;
550
551 for (i = 0; i < pages; ++i) {
552 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
553
554 ret = tce_iommu_use_page(tce, &hpa);
555 if (ret)
556 break;
557
558 page = pfn_to_page(hpa >> PAGE_SHIFT);
559 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
560 ret = -EPERM;
561 break;
562 }
563
564 hpa |= offset;
565 dirtmp = direction;
566 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
567 if (ret) {
568 tce_iommu_unuse_page(container, hpa);
569 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
570 __func__, entry << tbl->it_page_shift,
571 tce, ret);
572 break;
573 }
574
575 if (dirtmp != DMA_NONE)
576 tce_iommu_unuse_page(container, hpa);
577
578 tce += IOMMU_PAGE_SIZE(tbl);
579 }
580
581 if (ret)
582 tce_iommu_clear(container, tbl, entry, i);
583
584 return ret;
585}
586
587static long tce_iommu_build_v2(struct tce_container *container,
588 struct iommu_table *tbl,
589 unsigned long entry, unsigned long tce, unsigned long pages,
590 enum dma_data_direction direction)
591{
592 long i, ret = 0;
593 struct page *page;
594 unsigned long hpa;
595 enum dma_data_direction dirtmp;
596
597 if (!tbl->it_userspace) {
598 ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
599 if (ret)
600 return ret;
601 }
602
603 for (i = 0; i < pages; ++i) {
604 struct mm_iommu_table_group_mem_t *mem = NULL;
605 unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
606 entry + i);
607
608 ret = tce_iommu_prereg_ua_to_hpa(container,
609 tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem);
610 if (ret)
611 break;
612
613 page = pfn_to_page(hpa >> PAGE_SHIFT);
614 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
615 ret = -EPERM;
616 break;
617 }
618
619
620 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
621 dirtmp = direction;
622
623
624 if (mm_iommu_mapped_inc(mem))
625 break;
626
627 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
628 if (ret) {
629
630 tce_iommu_unuse_page_v2(container, tbl, entry + i);
631 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
632 __func__, entry << tbl->it_page_shift,
633 tce, ret);
634 break;
635 }
636
637 if (dirtmp != DMA_NONE)
638 tce_iommu_unuse_page_v2(container, tbl, entry + i);
639
640 *pua = tce;
641
642 tce += IOMMU_PAGE_SIZE(tbl);
643 }
644
645 if (ret)
646 tce_iommu_clear(container, tbl, entry, i);
647
648 return ret;
649}
650
651static long tce_iommu_create_table(struct tce_container *container,
652 struct iommu_table_group *table_group,
653 int num,
654 __u32 page_shift,
655 __u64 window_size,
656 __u32 levels,
657 struct iommu_table **ptbl)
658{
659 long ret, table_size;
660
661 table_size = table_group->ops->get_table_size(page_shift, window_size,
662 levels);
663 if (!table_size)
664 return -EINVAL;
665
666 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
667 if (ret)
668 return ret;
669
670 ret = table_group->ops->create_table(table_group, num,
671 page_shift, window_size, levels, ptbl);
672
673 WARN_ON(!ret && !(*ptbl)->it_ops->free);
674 WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
675
676 return ret;
677}
678
679static void tce_iommu_free_table(struct tce_container *container,
680 struct iommu_table *tbl)
681{
682 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
683
684 tce_iommu_userspace_view_free(tbl, container->mm);
685 tbl->it_ops->free(tbl);
686 decrement_locked_vm(container->mm, pages);
687}
688
689static long tce_iommu_create_window(struct tce_container *container,
690 __u32 page_shift, __u64 window_size, __u32 levels,
691 __u64 *start_addr)
692{
693 struct tce_iommu_group *tcegrp;
694 struct iommu_table_group *table_group;
695 struct iommu_table *tbl = NULL;
696 long ret, num;
697
698 num = tce_iommu_find_free_table(container);
699 if (num < 0)
700 return num;
701
702
703 tcegrp = list_first_entry(&container->group_list,
704 struct tce_iommu_group, next);
705 table_group = iommu_group_get_iommudata(tcegrp->grp);
706 if (!table_group)
707 return -EFAULT;
708
709 if (!(table_group->pgsizes & (1ULL << page_shift)))
710 return -EINVAL;
711
712 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
713 !table_group->ops->get_table_size ||
714 !table_group->ops->create_table)
715 return -EPERM;
716
717
718 ret = tce_iommu_create_table(container, table_group, num,
719 page_shift, window_size, levels, &tbl);
720 if (ret)
721 return ret;
722
723 BUG_ON(!tbl->it_ops->free);
724
725
726
727
728
729 list_for_each_entry(tcegrp, &container->group_list, next) {
730 table_group = iommu_group_get_iommudata(tcegrp->grp);
731
732 ret = table_group->ops->set_window(table_group, num, tbl);
733 if (ret)
734 goto unset_exit;
735 }
736
737 container->tables[num] = tbl;
738
739
740 *start_addr = tbl->it_offset << tbl->it_page_shift;
741
742 return 0;
743
744unset_exit:
745 list_for_each_entry(tcegrp, &container->group_list, next) {
746 table_group = iommu_group_get_iommudata(tcegrp->grp);
747 table_group->ops->unset_window(table_group, num);
748 }
749 tce_iommu_free_table(container, tbl);
750
751 return ret;
752}
753
754static long tce_iommu_remove_window(struct tce_container *container,
755 __u64 start_addr)
756{
757 struct iommu_table_group *table_group = NULL;
758 struct iommu_table *tbl;
759 struct tce_iommu_group *tcegrp;
760 int num;
761
762 num = tce_iommu_find_table(container, start_addr, &tbl);
763 if (num < 0)
764 return -EINVAL;
765
766 BUG_ON(!tbl->it_size);
767
768
769 list_for_each_entry(tcegrp, &container->group_list, next) {
770 table_group = iommu_group_get_iommudata(tcegrp->grp);
771
772
773
774
775
776
777
778
779 if (!table_group->ops || !table_group->ops->unset_window)
780 return -EPERM;
781
782 table_group->ops->unset_window(table_group, num);
783 }
784
785
786 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
787 tce_iommu_free_table(container, tbl);
788 container->tables[num] = NULL;
789
790 return 0;
791}
792
793static long tce_iommu_create_default_window(struct tce_container *container)
794{
795 long ret;
796 __u64 start_addr = 0;
797 struct tce_iommu_group *tcegrp;
798 struct iommu_table_group *table_group;
799
800 if (!container->def_window_pending)
801 return 0;
802
803 if (!tce_groups_attached(container))
804 return -ENODEV;
805
806 tcegrp = list_first_entry(&container->group_list,
807 struct tce_iommu_group, next);
808 table_group = iommu_group_get_iommudata(tcegrp->grp);
809 if (!table_group)
810 return -ENODEV;
811
812 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
813 table_group->tce32_size, 1, &start_addr);
814 WARN_ON_ONCE(!ret && start_addr);
815
816 if (!ret)
817 container->def_window_pending = false;
818
819 return ret;
820}
821
822static long tce_iommu_ioctl(void *iommu_data,
823 unsigned int cmd, unsigned long arg)
824{
825 struct tce_container *container = iommu_data;
826 unsigned long minsz, ddwsz;
827 long ret;
828
829 switch (cmd) {
830 case VFIO_CHECK_EXTENSION:
831 switch (arg) {
832 case VFIO_SPAPR_TCE_IOMMU:
833 case VFIO_SPAPR_TCE_v2_IOMMU:
834 ret = 1;
835 break;
836 default:
837 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
838 break;
839 }
840
841 return (ret < 0) ? 0 : ret;
842 }
843
844
845
846
847
848 BUG_ON(!container);
849 if (container->mm && container->mm != current->mm)
850 return -EPERM;
851
852 switch (cmd) {
853 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
854 struct vfio_iommu_spapr_tce_info info;
855 struct tce_iommu_group *tcegrp;
856 struct iommu_table_group *table_group;
857
858 if (!tce_groups_attached(container))
859 return -ENXIO;
860
861 tcegrp = list_first_entry(&container->group_list,
862 struct tce_iommu_group, next);
863 table_group = iommu_group_get_iommudata(tcegrp->grp);
864
865 if (!table_group)
866 return -ENXIO;
867
868 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
869 dma32_window_size);
870
871 if (copy_from_user(&info, (void __user *)arg, minsz))
872 return -EFAULT;
873
874 if (info.argsz < minsz)
875 return -EINVAL;
876
877 info.dma32_window_start = table_group->tce32_start;
878 info.dma32_window_size = table_group->tce32_size;
879 info.flags = 0;
880 memset(&info.ddw, 0, sizeof(info.ddw));
881
882 if (table_group->max_dynamic_windows_supported &&
883 container->v2) {
884 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
885 info.ddw.pgsizes = table_group->pgsizes;
886 info.ddw.max_dynamic_windows_supported =
887 table_group->max_dynamic_windows_supported;
888 info.ddw.levels = table_group->max_levels;
889 }
890
891 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
892
893 if (info.argsz >= ddwsz)
894 minsz = ddwsz;
895
896 if (copy_to_user((void __user *)arg, &info, minsz))
897 return -EFAULT;
898
899 return 0;
900 }
901 case VFIO_IOMMU_MAP_DMA: {
902 struct vfio_iommu_type1_dma_map param;
903 struct iommu_table *tbl = NULL;
904 long num;
905 enum dma_data_direction direction;
906
907 if (!container->enabled)
908 return -EPERM;
909
910 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
911
912 if (copy_from_user(¶m, (void __user *)arg, minsz))
913 return -EFAULT;
914
915 if (param.argsz < minsz)
916 return -EINVAL;
917
918 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
919 VFIO_DMA_MAP_FLAG_WRITE))
920 return -EINVAL;
921
922 ret = tce_iommu_create_default_window(container);
923 if (ret)
924 return ret;
925
926 num = tce_iommu_find_table(container, param.iova, &tbl);
927 if (num < 0)
928 return -ENXIO;
929
930 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
931 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
932 return -EINVAL;
933
934
935 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
936 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
937 direction = DMA_BIDIRECTIONAL;
938 else
939 direction = DMA_TO_DEVICE;
940 } else {
941 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
942 direction = DMA_FROM_DEVICE;
943 else
944 return -EINVAL;
945 }
946
947 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
948 if (ret)
949 return ret;
950
951 if (container->v2)
952 ret = tce_iommu_build_v2(container, tbl,
953 param.iova >> tbl->it_page_shift,
954 param.vaddr,
955 param.size >> tbl->it_page_shift,
956 direction);
957 else
958 ret = tce_iommu_build(container, tbl,
959 param.iova >> tbl->it_page_shift,
960 param.vaddr,
961 param.size >> tbl->it_page_shift,
962 direction);
963
964 iommu_flush_tce(tbl);
965
966 return ret;
967 }
968 case VFIO_IOMMU_UNMAP_DMA: {
969 struct vfio_iommu_type1_dma_unmap param;
970 struct iommu_table *tbl = NULL;
971 long num;
972
973 if (!container->enabled)
974 return -EPERM;
975
976 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
977 size);
978
979 if (copy_from_user(¶m, (void __user *)arg, minsz))
980 return -EFAULT;
981
982 if (param.argsz < minsz)
983 return -EINVAL;
984
985
986 if (param.flags)
987 return -EINVAL;
988
989 ret = tce_iommu_create_default_window(container);
990 if (ret)
991 return ret;
992
993 num = tce_iommu_find_table(container, param.iova, &tbl);
994 if (num < 0)
995 return -ENXIO;
996
997 if (param.size & ~IOMMU_PAGE_MASK(tbl))
998 return -EINVAL;
999
1000 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
1001 param.size >> tbl->it_page_shift);
1002 if (ret)
1003 return ret;
1004
1005 ret = tce_iommu_clear(container, tbl,
1006 param.iova >> tbl->it_page_shift,
1007 param.size >> tbl->it_page_shift);
1008 iommu_flush_tce(tbl);
1009
1010 return ret;
1011 }
1012 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
1013 struct vfio_iommu_spapr_register_memory param;
1014
1015 if (!container->v2)
1016 break;
1017
1018 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1019 size);
1020
1021 ret = tce_iommu_mm_set(container);
1022 if (ret)
1023 return ret;
1024
1025 if (copy_from_user(¶m, (void __user *)arg, minsz))
1026 return -EFAULT;
1027
1028 if (param.argsz < minsz)
1029 return -EINVAL;
1030
1031
1032 if (param.flags)
1033 return -EINVAL;
1034
1035 mutex_lock(&container->lock);
1036 ret = tce_iommu_register_pages(container, param.vaddr,
1037 param.size);
1038 mutex_unlock(&container->lock);
1039
1040 return ret;
1041 }
1042 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1043 struct vfio_iommu_spapr_register_memory param;
1044
1045 if (!container->v2)
1046 break;
1047
1048 if (!container->mm)
1049 return -EPERM;
1050
1051 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1052 size);
1053
1054 if (copy_from_user(¶m, (void __user *)arg, minsz))
1055 return -EFAULT;
1056
1057 if (param.argsz < minsz)
1058 return -EINVAL;
1059
1060
1061 if (param.flags)
1062 return -EINVAL;
1063
1064 mutex_lock(&container->lock);
1065 ret = tce_iommu_unregister_pages(container, param.vaddr,
1066 param.size);
1067 mutex_unlock(&container->lock);
1068
1069 return ret;
1070 }
1071 case VFIO_IOMMU_ENABLE:
1072 if (container->v2)
1073 break;
1074
1075 mutex_lock(&container->lock);
1076 ret = tce_iommu_enable(container);
1077 mutex_unlock(&container->lock);
1078 return ret;
1079
1080
1081 case VFIO_IOMMU_DISABLE:
1082 if (container->v2)
1083 break;
1084
1085 mutex_lock(&container->lock);
1086 tce_iommu_disable(container);
1087 mutex_unlock(&container->lock);
1088 return 0;
1089
1090 case VFIO_EEH_PE_OP: {
1091 struct tce_iommu_group *tcegrp;
1092
1093 ret = 0;
1094 list_for_each_entry(tcegrp, &container->group_list, next) {
1095 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1096 cmd, arg);
1097 if (ret)
1098 return ret;
1099 }
1100 return ret;
1101 }
1102
1103 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1104 struct vfio_iommu_spapr_tce_create create;
1105
1106 if (!container->v2)
1107 break;
1108
1109 ret = tce_iommu_mm_set(container);
1110 if (ret)
1111 return ret;
1112
1113 if (!tce_groups_attached(container))
1114 return -ENXIO;
1115
1116 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1117 start_addr);
1118
1119 if (copy_from_user(&create, (void __user *)arg, minsz))
1120 return -EFAULT;
1121
1122 if (create.argsz < minsz)
1123 return -EINVAL;
1124
1125 if (create.flags)
1126 return -EINVAL;
1127
1128 mutex_lock(&container->lock);
1129
1130 ret = tce_iommu_create_default_window(container);
1131 if (!ret)
1132 ret = tce_iommu_create_window(container,
1133 create.page_shift,
1134 create.window_size, create.levels,
1135 &create.start_addr);
1136
1137 mutex_unlock(&container->lock);
1138
1139 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1140 ret = -EFAULT;
1141
1142 return ret;
1143 }
1144 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1145 struct vfio_iommu_spapr_tce_remove remove;
1146
1147 if (!container->v2)
1148 break;
1149
1150 ret = tce_iommu_mm_set(container);
1151 if (ret)
1152 return ret;
1153
1154 if (!tce_groups_attached(container))
1155 return -ENXIO;
1156
1157 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1158 start_addr);
1159
1160 if (copy_from_user(&remove, (void __user *)arg, minsz))
1161 return -EFAULT;
1162
1163 if (remove.argsz < minsz)
1164 return -EINVAL;
1165
1166 if (remove.flags)
1167 return -EINVAL;
1168
1169 if (container->def_window_pending && !remove.start_addr) {
1170 container->def_window_pending = false;
1171 return 0;
1172 }
1173
1174 mutex_lock(&container->lock);
1175
1176 ret = tce_iommu_remove_window(container, remove.start_addr);
1177
1178 mutex_unlock(&container->lock);
1179
1180 return ret;
1181 }
1182 }
1183
1184 return -ENOTTY;
1185}
1186
1187static void tce_iommu_release_ownership(struct tce_container *container,
1188 struct iommu_table_group *table_group)
1189{
1190 int i;
1191
1192 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1193 struct iommu_table *tbl = container->tables[i];
1194
1195 if (!tbl)
1196 continue;
1197
1198 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1199 tce_iommu_userspace_view_free(tbl, container->mm);
1200 if (tbl->it_map)
1201 iommu_release_ownership(tbl);
1202
1203 container->tables[i] = NULL;
1204 }
1205}
1206
1207static int tce_iommu_take_ownership(struct tce_container *container,
1208 struct iommu_table_group *table_group)
1209{
1210 int i, j, rc = 0;
1211
1212 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1213 struct iommu_table *tbl = table_group->tables[i];
1214
1215 if (!tbl || !tbl->it_map)
1216 continue;
1217
1218 rc = iommu_take_ownership(tbl);
1219 if (rc) {
1220 for (j = 0; j < i; ++j)
1221 iommu_release_ownership(
1222 table_group->tables[j]);
1223
1224 return rc;
1225 }
1226 }
1227
1228 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1229 container->tables[i] = table_group->tables[i];
1230
1231 return 0;
1232}
1233
1234static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1235 struct iommu_table_group *table_group)
1236{
1237 long i;
1238
1239 if (!table_group->ops->unset_window) {
1240 WARN_ON_ONCE(1);
1241 return;
1242 }
1243
1244 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1245 table_group->ops->unset_window(table_group, i);
1246
1247 table_group->ops->release_ownership(table_group);
1248}
1249
1250static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1251 struct iommu_table_group *table_group)
1252{
1253 long i, ret = 0;
1254
1255 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1256 !table_group->ops->release_ownership) {
1257 WARN_ON_ONCE(1);
1258 return -EFAULT;
1259 }
1260
1261 table_group->ops->take_ownership(table_group);
1262
1263
1264 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1265 struct iommu_table *tbl = container->tables[i];
1266
1267 if (!tbl)
1268 continue;
1269
1270 ret = table_group->ops->set_window(table_group, i, tbl);
1271 if (ret)
1272 goto release_exit;
1273 }
1274
1275 return 0;
1276
1277release_exit:
1278 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1279 table_group->ops->unset_window(table_group, i);
1280
1281 table_group->ops->release_ownership(table_group);
1282
1283 return ret;
1284}
1285
1286static int tce_iommu_attach_group(void *iommu_data,
1287 struct iommu_group *iommu_group)
1288{
1289 int ret;
1290 struct tce_container *container = iommu_data;
1291 struct iommu_table_group *table_group;
1292 struct tce_iommu_group *tcegrp = NULL;
1293
1294 mutex_lock(&container->lock);
1295
1296
1297
1298 table_group = iommu_group_get_iommudata(iommu_group);
1299
1300 if (tce_groups_attached(container) && (!table_group->ops ||
1301 !table_group->ops->take_ownership ||
1302 !table_group->ops->release_ownership)) {
1303 ret = -EBUSY;
1304 goto unlock_exit;
1305 }
1306
1307
1308 list_for_each_entry(tcegrp, &container->group_list, next) {
1309 struct iommu_table_group *table_group_tmp;
1310
1311 if (tcegrp->grp == iommu_group) {
1312 pr_warn("tce_vfio: Group %d is already attached\n",
1313 iommu_group_id(iommu_group));
1314 ret = -EBUSY;
1315 goto unlock_exit;
1316 }
1317 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1318 if (table_group_tmp->ops != table_group->ops) {
1319 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1320 iommu_group_id(iommu_group),
1321 iommu_group_id(tcegrp->grp));
1322 ret = -EPERM;
1323 goto unlock_exit;
1324 }
1325 }
1326
1327 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1328 if (!tcegrp) {
1329 ret = -ENOMEM;
1330 goto unlock_exit;
1331 }
1332
1333 if (!table_group->ops || !table_group->ops->take_ownership ||
1334 !table_group->ops->release_ownership) {
1335 ret = tce_iommu_take_ownership(container, table_group);
1336 } else {
1337 ret = tce_iommu_take_ownership_ddw(container, table_group);
1338 if (!tce_groups_attached(container) && !container->tables[0])
1339 container->def_window_pending = true;
1340 }
1341
1342 if (!ret) {
1343 tcegrp->grp = iommu_group;
1344 list_add(&tcegrp->next, &container->group_list);
1345 }
1346
1347unlock_exit:
1348 if (ret && tcegrp)
1349 kfree(tcegrp);
1350
1351 mutex_unlock(&container->lock);
1352
1353 return ret;
1354}
1355
1356static void tce_iommu_detach_group(void *iommu_data,
1357 struct iommu_group *iommu_group)
1358{
1359 struct tce_container *container = iommu_data;
1360 struct iommu_table_group *table_group;
1361 bool found = false;
1362 struct tce_iommu_group *tcegrp;
1363
1364 mutex_lock(&container->lock);
1365
1366 list_for_each_entry(tcegrp, &container->group_list, next) {
1367 if (tcegrp->grp == iommu_group) {
1368 found = true;
1369 break;
1370 }
1371 }
1372
1373 if (!found) {
1374 pr_warn("tce_vfio: detaching unattached group #%u\n",
1375 iommu_group_id(iommu_group));
1376 goto unlock_exit;
1377 }
1378
1379 list_del(&tcegrp->next);
1380 kfree(tcegrp);
1381
1382 table_group = iommu_group_get_iommudata(iommu_group);
1383 BUG_ON(!table_group);
1384
1385 if (!table_group->ops || !table_group->ops->release_ownership)
1386 tce_iommu_release_ownership(container, table_group);
1387 else
1388 tce_iommu_release_ownership_ddw(container, table_group);
1389
1390unlock_exit:
1391 mutex_unlock(&container->lock);
1392}
1393
1394const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1395 .name = "iommu-vfio-powerpc",
1396 .owner = THIS_MODULE,
1397 .open = tce_iommu_open,
1398 .release = tce_iommu_release,
1399 .ioctl = tce_iommu_ioctl,
1400 .attach_group = tce_iommu_attach_group,
1401 .detach_group = tce_iommu_detach_group,
1402};
1403
1404static int __init tce_iommu_init(void)
1405{
1406 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1407}
1408
1409static void __exit tce_iommu_cleanup(void)
1410{
1411 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1412}
1413
1414module_init(tce_iommu_init);
1415module_exit(tce_iommu_cleanup);
1416
1417MODULE_VERSION(DRIVER_VERSION);
1418MODULE_LICENSE("GPL v2");
1419MODULE_AUTHOR(DRIVER_AUTHOR);
1420MODULE_DESCRIPTION(DRIVER_DESC);
1421
1422