1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <linux/vmalloc.h>
23#include <linux/sched/mm.h>
24#include <linux/sched/signal.h>
25
26#include <asm/iommu.h>
27#include <asm/tce.h>
28#include <asm/mmu_context.h>
29
30#define DRIVER_VERSION "0.1"
31#define DRIVER_AUTHOR "aik@ozlabs.ru"
32#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
33
34static void tce_iommu_detach_group(void *iommu_data,
35 struct iommu_group *iommu_group);
36
37static long try_increment_locked_vm(struct mm_struct *mm, long npages)
38{
39 long ret = 0, locked, lock_limit;
40
41 if (WARN_ON_ONCE(!mm))
42 return -EPERM;
43
44 if (!npages)
45 return 0;
46
47 down_write(&mm->mmap_sem);
48 locked = mm->locked_vm + npages;
49 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
50 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
51 ret = -ENOMEM;
52 else
53 mm->locked_vm += npages;
54
55 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
56 npages << PAGE_SHIFT,
57 mm->locked_vm << PAGE_SHIFT,
58 rlimit(RLIMIT_MEMLOCK),
59 ret ? " - exceeded" : "");
60
61 up_write(&mm->mmap_sem);
62
63 return ret;
64}
65
66static void decrement_locked_vm(struct mm_struct *mm, long npages)
67{
68 if (!mm || !npages)
69 return;
70
71 down_write(&mm->mmap_sem);
72 if (WARN_ON_ONCE(npages > mm->locked_vm))
73 npages = mm->locked_vm;
74 mm->locked_vm -= npages;
75 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
76 npages << PAGE_SHIFT,
77 mm->locked_vm << PAGE_SHIFT,
78 rlimit(RLIMIT_MEMLOCK));
79 up_write(&mm->mmap_sem);
80}
81
82
83
84
85
86
87
88
89struct tce_iommu_group {
90 struct list_head next;
91 struct iommu_group *grp;
92};
93
94
95
96
97
98struct tce_iommu_prereg {
99 struct list_head next;
100 struct mm_iommu_table_group_mem_t *mem;
101};
102
103
104
105
106
107
108struct tce_container {
109 struct mutex lock;
110 bool enabled;
111 bool v2;
112 bool def_window_pending;
113 unsigned long locked_pages;
114 struct mm_struct *mm;
115 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
116 struct list_head group_list;
117 struct list_head prereg_list;
118};
119
120static long tce_iommu_mm_set(struct tce_container *container)
121{
122 if (container->mm) {
123 if (container->mm == current->mm)
124 return 0;
125 return -EPERM;
126 }
127 BUG_ON(!current->mm);
128 container->mm = current->mm;
129 atomic_inc(&container->mm->mm_count);
130
131 return 0;
132}
133
134static long tce_iommu_prereg_free(struct tce_container *container,
135 struct tce_iommu_prereg *tcemem)
136{
137 long ret;
138
139 ret = mm_iommu_put(container->mm, tcemem->mem);
140 if (ret)
141 return ret;
142
143 list_del(&tcemem->next);
144 kfree(tcemem);
145
146 return 0;
147}
148
149static long tce_iommu_unregister_pages(struct tce_container *container,
150 __u64 vaddr, __u64 size)
151{
152 struct mm_iommu_table_group_mem_t *mem;
153 struct tce_iommu_prereg *tcemem;
154 bool found = false;
155 long ret;
156
157 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
158 return -EINVAL;
159
160 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
161 if (!mem)
162 return -ENOENT;
163
164 list_for_each_entry(tcemem, &container->prereg_list, next) {
165 if (tcemem->mem == mem) {
166 found = true;
167 break;
168 }
169 }
170
171 if (!found)
172 ret = -ENOENT;
173 else
174 ret = tce_iommu_prereg_free(container, tcemem);
175
176 mm_iommu_put(container->mm, mem);
177
178 return ret;
179}
180
181static long tce_iommu_register_pages(struct tce_container *container,
182 __u64 vaddr, __u64 size)
183{
184 long ret = 0;
185 struct mm_iommu_table_group_mem_t *mem = NULL;
186 struct tce_iommu_prereg *tcemem;
187 unsigned long entries = size >> PAGE_SHIFT;
188
189 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
190 ((vaddr + size) < vaddr))
191 return -EINVAL;
192
193 mem = mm_iommu_get(container->mm, vaddr, entries);
194 if (mem) {
195 list_for_each_entry(tcemem, &container->prereg_list, next) {
196 if (tcemem->mem == mem) {
197 ret = -EBUSY;
198 goto put_exit;
199 }
200 }
201 } else {
202 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
203 if (ret)
204 return ret;
205 }
206
207 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
208 if (!tcemem) {
209 ret = -ENOMEM;
210 goto put_exit;
211 }
212
213 tcemem->mem = mem;
214 list_add(&tcemem->next, &container->prereg_list);
215
216 container->enabled = true;
217
218 return 0;
219
220put_exit:
221 mm_iommu_put(container->mm, mem);
222 return ret;
223}
224
225static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
226 unsigned int page_shift)
227{
228 struct page *page;
229 unsigned long size = 0;
230
231 if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
232 return size == (1UL << page_shift);
233
234 page = pfn_to_page(hpa >> PAGE_SHIFT);
235
236
237
238
239
240 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
241}
242
243static inline bool tce_groups_attached(struct tce_container *container)
244{
245 return !list_empty(&container->group_list);
246}
247
248static long tce_iommu_find_table(struct tce_container *container,
249 phys_addr_t ioba, struct iommu_table **ptbl)
250{
251 long i;
252
253 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
254 struct iommu_table *tbl = container->tables[i];
255
256 if (tbl) {
257 unsigned long entry = ioba >> tbl->it_page_shift;
258 unsigned long start = tbl->it_offset;
259 unsigned long end = start + tbl->it_size;
260
261 if ((start <= entry) && (entry < end)) {
262 *ptbl = tbl;
263 return i;
264 }
265 }
266 }
267
268 return -1;
269}
270
271static int tce_iommu_find_free_table(struct tce_container *container)
272{
273 int i;
274
275 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
276 if (!container->tables[i])
277 return i;
278 }
279
280 return -ENOSPC;
281}
282
283static int tce_iommu_enable(struct tce_container *container)
284{
285 int ret = 0;
286 unsigned long locked;
287 struct iommu_table_group *table_group;
288 struct tce_iommu_group *tcegrp;
289
290 if (container->enabled)
291 return -EBUSY;
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322 if (!tce_groups_attached(container))
323 return -ENODEV;
324
325 tcegrp = list_first_entry(&container->group_list,
326 struct tce_iommu_group, next);
327 table_group = iommu_group_get_iommudata(tcegrp->grp);
328 if (!table_group)
329 return -ENODEV;
330
331 if (!table_group->tce32_size)
332 return -EPERM;
333
334 ret = tce_iommu_mm_set(container);
335 if (ret)
336 return ret;
337
338 locked = table_group->tce32_size >> PAGE_SHIFT;
339 ret = try_increment_locked_vm(container->mm, locked);
340 if (ret)
341 return ret;
342
343 container->locked_pages = locked;
344
345 container->enabled = true;
346
347 return ret;
348}
349
350static void tce_iommu_disable(struct tce_container *container)
351{
352 if (!container->enabled)
353 return;
354
355 container->enabled = false;
356
357 BUG_ON(!container->mm);
358 decrement_locked_vm(container->mm, container->locked_pages);
359}
360
361static void *tce_iommu_open(unsigned long arg)
362{
363 struct tce_container *container;
364
365 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
366 pr_err("tce_vfio: Wrong IOMMU type\n");
367 return ERR_PTR(-EINVAL);
368 }
369
370 container = kzalloc(sizeof(*container), GFP_KERNEL);
371 if (!container)
372 return ERR_PTR(-ENOMEM);
373
374 mutex_init(&container->lock);
375 INIT_LIST_HEAD_RCU(&container->group_list);
376 INIT_LIST_HEAD_RCU(&container->prereg_list);
377
378 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
379
380 return container;
381}
382
383static int tce_iommu_clear(struct tce_container *container,
384 struct iommu_table *tbl,
385 unsigned long entry, unsigned long pages);
386static void tce_iommu_free_table(struct tce_container *container,
387 struct iommu_table *tbl);
388
389static void tce_iommu_release(void *iommu_data)
390{
391 struct tce_container *container = iommu_data;
392 struct tce_iommu_group *tcegrp;
393 struct tce_iommu_prereg *tcemem, *tmtmp;
394 long i;
395
396 while (tce_groups_attached(container)) {
397 tcegrp = list_first_entry(&container->group_list,
398 struct tce_iommu_group, next);
399 tce_iommu_detach_group(iommu_data, tcegrp->grp);
400 }
401
402
403
404
405
406 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
407 struct iommu_table *tbl = container->tables[i];
408
409 if (!tbl)
410 continue;
411
412 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
413 tce_iommu_free_table(container, tbl);
414 }
415
416 list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
417 WARN_ON(tce_iommu_prereg_free(container, tcemem));
418
419 tce_iommu_disable(container);
420 if (container->mm)
421 mmdrop(container->mm);
422 mutex_destroy(&container->lock);
423
424 kfree(container);
425}
426
427static void tce_iommu_unuse_page(struct tce_container *container,
428 unsigned long hpa)
429{
430 struct page *page;
431
432 page = pfn_to_page(hpa >> PAGE_SHIFT);
433 put_page(page);
434}
435
436static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
437 unsigned long tce, unsigned long shift,
438 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
439{
440 long ret = 0;
441 struct mm_iommu_table_group_mem_t *mem;
442
443 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
444 if (!mem)
445 return -EINVAL;
446
447 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
448 if (ret)
449 return -EINVAL;
450
451 *pmem = mem;
452
453 return 0;
454}
455
456static void tce_iommu_unuse_page_v2(struct tce_container *container,
457 struct iommu_table *tbl, unsigned long entry)
458{
459 struct mm_iommu_table_group_mem_t *mem = NULL;
460 int ret;
461 unsigned long hpa = 0;
462 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
463
464 if (!pua)
465 return;
466
467 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
468 tbl->it_page_shift, &hpa, &mem);
469 if (ret)
470 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
471 __func__, be64_to_cpu(*pua), entry, ret);
472 if (mem)
473 mm_iommu_mapped_dec(mem);
474
475 *pua = cpu_to_be64(0);
476}
477
478static int tce_iommu_clear(struct tce_container *container,
479 struct iommu_table *tbl,
480 unsigned long entry, unsigned long pages)
481{
482 unsigned long oldhpa;
483 long ret;
484 enum dma_data_direction direction;
485 unsigned long lastentry = entry + pages;
486
487 for ( ; entry < lastentry; ++entry) {
488 if (tbl->it_indirect_levels && tbl->it_userspace) {
489
490
491
492
493
494
495
496
497 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
498 entry);
499 if (!pua) {
500
501 entry |= tbl->it_level_size - 1;
502 continue;
503 }
504 }
505
506 cond_resched();
507
508 direction = DMA_NONE;
509 oldhpa = 0;
510 ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
511 &direction);
512 if (ret)
513 continue;
514
515 if (direction == DMA_NONE)
516 continue;
517
518 if (container->v2) {
519 tce_iommu_unuse_page_v2(container, tbl, entry);
520 continue;
521 }
522
523 tce_iommu_unuse_page(container, oldhpa);
524 }
525
526 return 0;
527}
528
529static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
530{
531 struct page *page = NULL;
532 enum dma_data_direction direction = iommu_tce_direction(tce);
533
534 if (get_user_pages_fast(tce & PAGE_MASK, 1,
535 direction != DMA_TO_DEVICE, &page) != 1)
536 return -EFAULT;
537
538 *hpa = __pa((unsigned long) page_address(page));
539
540 return 0;
541}
542
543static long tce_iommu_build(struct tce_container *container,
544 struct iommu_table *tbl,
545 unsigned long entry, unsigned long tce, unsigned long pages,
546 enum dma_data_direction direction)
547{
548 long i, ret = 0;
549 unsigned long hpa;
550 enum dma_data_direction dirtmp;
551
552 for (i = 0; i < pages; ++i) {
553 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
554
555 ret = tce_iommu_use_page(tce, &hpa);
556 if (ret)
557 break;
558
559 if (!tce_page_is_contained(container->mm, hpa,
560 tbl->it_page_shift)) {
561 ret = -EPERM;
562 break;
563 }
564
565 hpa |= offset;
566 dirtmp = direction;
567 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
568 &dirtmp);
569 if (ret) {
570 tce_iommu_unuse_page(container, hpa);
571 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
572 __func__, entry << tbl->it_page_shift,
573 tce, ret);
574 break;
575 }
576
577 if (dirtmp != DMA_NONE)
578 tce_iommu_unuse_page(container, hpa);
579
580 tce += IOMMU_PAGE_SIZE(tbl);
581 }
582
583 if (ret)
584 tce_iommu_clear(container, tbl, entry, i);
585
586 return ret;
587}
588
589static long tce_iommu_build_v2(struct tce_container *container,
590 struct iommu_table *tbl,
591 unsigned long entry, unsigned long tce, unsigned long pages,
592 enum dma_data_direction direction)
593{
594 long i, ret = 0;
595 unsigned long hpa;
596 enum dma_data_direction dirtmp;
597
598 for (i = 0; i < pages; ++i) {
599 struct mm_iommu_table_group_mem_t *mem = NULL;
600 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
601
602 ret = tce_iommu_prereg_ua_to_hpa(container,
603 tce, tbl->it_page_shift, &hpa, &mem);
604 if (ret)
605 break;
606
607 if (!tce_page_is_contained(container->mm, hpa,
608 tbl->it_page_shift)) {
609 ret = -EPERM;
610 break;
611 }
612
613
614 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
615 dirtmp = direction;
616
617
618 if (mm_iommu_mapped_inc(mem))
619 break;
620
621 ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
622 &dirtmp);
623 if (ret) {
624
625 tce_iommu_unuse_page_v2(container, tbl, entry + i);
626 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
627 __func__, entry << tbl->it_page_shift,
628 tce, ret);
629 break;
630 }
631
632 if (dirtmp != DMA_NONE)
633 tce_iommu_unuse_page_v2(container, tbl, entry + i);
634
635 *pua = cpu_to_be64(tce);
636
637 tce += IOMMU_PAGE_SIZE(tbl);
638 }
639
640 if (ret)
641 tce_iommu_clear(container, tbl, entry, i);
642
643 return ret;
644}
645
646static long tce_iommu_create_table(struct tce_container *container,
647 struct iommu_table_group *table_group,
648 int num,
649 __u32 page_shift,
650 __u64 window_size,
651 __u32 levels,
652 struct iommu_table **ptbl)
653{
654 long ret, table_size;
655
656 table_size = table_group->ops->get_table_size(page_shift, window_size,
657 levels);
658 if (!table_size)
659 return -EINVAL;
660
661 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
662 if (ret)
663 return ret;
664
665 ret = table_group->ops->create_table(table_group, num,
666 page_shift, window_size, levels, ptbl);
667
668 WARN_ON(!ret && !(*ptbl)->it_ops->free);
669 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
670
671 return ret;
672}
673
674static void tce_iommu_free_table(struct tce_container *container,
675 struct iommu_table *tbl)
676{
677 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
678
679 iommu_tce_table_put(tbl);
680 decrement_locked_vm(container->mm, pages);
681}
682
683static long tce_iommu_create_window(struct tce_container *container,
684 __u32 page_shift, __u64 window_size, __u32 levels,
685 __u64 *start_addr)
686{
687 struct tce_iommu_group *tcegrp;
688 struct iommu_table_group *table_group;
689 struct iommu_table *tbl = NULL;
690 long ret, num;
691
692 num = tce_iommu_find_free_table(container);
693 if (num < 0)
694 return num;
695
696
697 tcegrp = list_first_entry(&container->group_list,
698 struct tce_iommu_group, next);
699 table_group = iommu_group_get_iommudata(tcegrp->grp);
700 if (!table_group)
701 return -EFAULT;
702
703 if (!(table_group->pgsizes & (1ULL << page_shift)))
704 return -EINVAL;
705
706 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
707 !table_group->ops->get_table_size ||
708 !table_group->ops->create_table)
709 return -EPERM;
710
711
712 ret = tce_iommu_create_table(container, table_group, num,
713 page_shift, window_size, levels, &tbl);
714 if (ret)
715 return ret;
716
717 BUG_ON(!tbl->it_ops->free);
718
719
720
721
722
723 list_for_each_entry(tcegrp, &container->group_list, next) {
724 table_group = iommu_group_get_iommudata(tcegrp->grp);
725
726 ret = table_group->ops->set_window(table_group, num, tbl);
727 if (ret)
728 goto unset_exit;
729 }
730
731 container->tables[num] = tbl;
732
733
734 *start_addr = tbl->it_offset << tbl->it_page_shift;
735
736 return 0;
737
738unset_exit:
739 list_for_each_entry(tcegrp, &container->group_list, next) {
740 table_group = iommu_group_get_iommudata(tcegrp->grp);
741 table_group->ops->unset_window(table_group, num);
742 }
743 tce_iommu_free_table(container, tbl);
744
745 return ret;
746}
747
748static long tce_iommu_remove_window(struct tce_container *container,
749 __u64 start_addr)
750{
751 struct iommu_table_group *table_group = NULL;
752 struct iommu_table *tbl;
753 struct tce_iommu_group *tcegrp;
754 int num;
755
756 num = tce_iommu_find_table(container, start_addr, &tbl);
757 if (num < 0)
758 return -EINVAL;
759
760 BUG_ON(!tbl->it_size);
761
762
763 list_for_each_entry(tcegrp, &container->group_list, next) {
764 table_group = iommu_group_get_iommudata(tcegrp->grp);
765
766
767
768
769
770
771
772
773 if (!table_group->ops || !table_group->ops->unset_window)
774 return -EPERM;
775
776 table_group->ops->unset_window(table_group, num);
777 }
778
779
780 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
781 tce_iommu_free_table(container, tbl);
782 container->tables[num] = NULL;
783
784 return 0;
785}
786
787static long tce_iommu_create_default_window(struct tce_container *container)
788{
789 long ret;
790 __u64 start_addr = 0;
791 struct tce_iommu_group *tcegrp;
792 struct iommu_table_group *table_group;
793
794 if (!container->def_window_pending)
795 return 0;
796
797 if (!tce_groups_attached(container))
798 return -ENODEV;
799
800 tcegrp = list_first_entry(&container->group_list,
801 struct tce_iommu_group, next);
802 table_group = iommu_group_get_iommudata(tcegrp->grp);
803 if (!table_group)
804 return -ENODEV;
805
806 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
807 table_group->tce32_size, 1, &start_addr);
808 WARN_ON_ONCE(!ret && start_addr);
809
810 if (!ret)
811 container->def_window_pending = false;
812
813 return ret;
814}
815
816static long tce_iommu_ioctl(void *iommu_data,
817 unsigned int cmd, unsigned long arg)
818{
819 struct tce_container *container = iommu_data;
820 unsigned long minsz, ddwsz;
821 long ret;
822
823 switch (cmd) {
824 case VFIO_CHECK_EXTENSION:
825 switch (arg) {
826 case VFIO_SPAPR_TCE_IOMMU:
827 case VFIO_SPAPR_TCE_v2_IOMMU:
828 ret = 1;
829 break;
830 default:
831 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
832 break;
833 }
834
835 return (ret < 0) ? 0 : ret;
836 }
837
838
839
840
841
842 BUG_ON(!container);
843 if (container->mm && container->mm != current->mm)
844 return -EPERM;
845
846 switch (cmd) {
847 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
848 struct vfio_iommu_spapr_tce_info info;
849 struct tce_iommu_group *tcegrp;
850 struct iommu_table_group *table_group;
851
852 if (!tce_groups_attached(container))
853 return -ENXIO;
854
855 tcegrp = list_first_entry(&container->group_list,
856 struct tce_iommu_group, next);
857 table_group = iommu_group_get_iommudata(tcegrp->grp);
858
859 if (!table_group)
860 return -ENXIO;
861
862 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
863 dma32_window_size);
864
865 if (copy_from_user(&info, (void __user *)arg, minsz))
866 return -EFAULT;
867
868 if (info.argsz < minsz)
869 return -EINVAL;
870
871 info.dma32_window_start = table_group->tce32_start;
872 info.dma32_window_size = table_group->tce32_size;
873 info.flags = 0;
874 memset(&info.ddw, 0, sizeof(info.ddw));
875
876 if (table_group->max_dynamic_windows_supported &&
877 container->v2) {
878 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
879 info.ddw.pgsizes = table_group->pgsizes;
880 info.ddw.max_dynamic_windows_supported =
881 table_group->max_dynamic_windows_supported;
882 info.ddw.levels = table_group->max_levels;
883 }
884
885 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
886
887 if (info.argsz >= ddwsz)
888 minsz = ddwsz;
889
890 if (copy_to_user((void __user *)arg, &info, minsz))
891 return -EFAULT;
892
893 return 0;
894 }
895 case VFIO_IOMMU_MAP_DMA: {
896 struct vfio_iommu_type1_dma_map param;
897 struct iommu_table *tbl = NULL;
898 long num;
899 enum dma_data_direction direction;
900
901 if (!container->enabled)
902 return -EPERM;
903
904 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
905
906 if (copy_from_user(¶m, (void __user *)arg, minsz))
907 return -EFAULT;
908
909 if (param.argsz < minsz)
910 return -EINVAL;
911
912 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
913 VFIO_DMA_MAP_FLAG_WRITE))
914 return -EINVAL;
915
916 ret = tce_iommu_create_default_window(container);
917 if (ret)
918 return ret;
919
920 num = tce_iommu_find_table(container, param.iova, &tbl);
921 if (num < 0)
922 return -ENXIO;
923
924 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
925 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
926 return -EINVAL;
927
928
929 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
930 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
931 direction = DMA_BIDIRECTIONAL;
932 else
933 direction = DMA_TO_DEVICE;
934 } else {
935 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
936 direction = DMA_FROM_DEVICE;
937 else
938 return -EINVAL;
939 }
940
941 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
942 if (ret)
943 return ret;
944
945 if (container->v2)
946 ret = tce_iommu_build_v2(container, tbl,
947 param.iova >> tbl->it_page_shift,
948 param.vaddr,
949 param.size >> tbl->it_page_shift,
950 direction);
951 else
952 ret = tce_iommu_build(container, tbl,
953 param.iova >> tbl->it_page_shift,
954 param.vaddr,
955 param.size >> tbl->it_page_shift,
956 direction);
957
958 iommu_flush_tce(tbl);
959
960 return ret;
961 }
962 case VFIO_IOMMU_UNMAP_DMA: {
963 struct vfio_iommu_type1_dma_unmap param;
964 struct iommu_table *tbl = NULL;
965 long num;
966
967 if (!container->enabled)
968 return -EPERM;
969
970 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
971 size);
972
973 if (copy_from_user(¶m, (void __user *)arg, minsz))
974 return -EFAULT;
975
976 if (param.argsz < minsz)
977 return -EINVAL;
978
979
980 if (param.flags)
981 return -EINVAL;
982
983 ret = tce_iommu_create_default_window(container);
984 if (ret)
985 return ret;
986
987 num = tce_iommu_find_table(container, param.iova, &tbl);
988 if (num < 0)
989 return -ENXIO;
990
991 if (param.size & ~IOMMU_PAGE_MASK(tbl))
992 return -EINVAL;
993
994 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
995 param.size >> tbl->it_page_shift);
996 if (ret)
997 return ret;
998
999 ret = tce_iommu_clear(container, tbl,
1000 param.iova >> tbl->it_page_shift,
1001 param.size >> tbl->it_page_shift);
1002 iommu_flush_tce(tbl);
1003
1004 return ret;
1005 }
1006 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
1007 struct vfio_iommu_spapr_register_memory param;
1008
1009 if (!container->v2)
1010 break;
1011
1012 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1013 size);
1014
1015 ret = tce_iommu_mm_set(container);
1016 if (ret)
1017 return ret;
1018
1019 if (copy_from_user(¶m, (void __user *)arg, minsz))
1020 return -EFAULT;
1021
1022 if (param.argsz < minsz)
1023 return -EINVAL;
1024
1025
1026 if (param.flags)
1027 return -EINVAL;
1028
1029 mutex_lock(&container->lock);
1030 ret = tce_iommu_register_pages(container, param.vaddr,
1031 param.size);
1032 mutex_unlock(&container->lock);
1033
1034 return ret;
1035 }
1036 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1037 struct vfio_iommu_spapr_register_memory param;
1038
1039 if (!container->v2)
1040 break;
1041
1042 if (!container->mm)
1043 return -EPERM;
1044
1045 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1046 size);
1047
1048 if (copy_from_user(¶m, (void __user *)arg, minsz))
1049 return -EFAULT;
1050
1051 if (param.argsz < minsz)
1052 return -EINVAL;
1053
1054
1055 if (param.flags)
1056 return -EINVAL;
1057
1058 mutex_lock(&container->lock);
1059 ret = tce_iommu_unregister_pages(container, param.vaddr,
1060 param.size);
1061 mutex_unlock(&container->lock);
1062
1063 return ret;
1064 }
1065 case VFIO_IOMMU_ENABLE:
1066 if (container->v2)
1067 break;
1068
1069 mutex_lock(&container->lock);
1070 ret = tce_iommu_enable(container);
1071 mutex_unlock(&container->lock);
1072 return ret;
1073
1074
1075 case VFIO_IOMMU_DISABLE:
1076 if (container->v2)
1077 break;
1078
1079 mutex_lock(&container->lock);
1080 tce_iommu_disable(container);
1081 mutex_unlock(&container->lock);
1082 return 0;
1083
1084 case VFIO_EEH_PE_OP: {
1085 struct tce_iommu_group *tcegrp;
1086
1087 ret = 0;
1088 list_for_each_entry(tcegrp, &container->group_list, next) {
1089 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1090 cmd, arg);
1091 if (ret)
1092 return ret;
1093 }
1094 return ret;
1095 }
1096
1097 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1098 struct vfio_iommu_spapr_tce_create create;
1099
1100 if (!container->v2)
1101 break;
1102
1103 ret = tce_iommu_mm_set(container);
1104 if (ret)
1105 return ret;
1106
1107 if (!tce_groups_attached(container))
1108 return -ENXIO;
1109
1110 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1111 start_addr);
1112
1113 if (copy_from_user(&create, (void __user *)arg, minsz))
1114 return -EFAULT;
1115
1116 if (create.argsz < minsz)
1117 return -EINVAL;
1118
1119 if (create.flags)
1120 return -EINVAL;
1121
1122 mutex_lock(&container->lock);
1123
1124 ret = tce_iommu_create_default_window(container);
1125 if (!ret)
1126 ret = tce_iommu_create_window(container,
1127 create.page_shift,
1128 create.window_size, create.levels,
1129 &create.start_addr);
1130
1131 mutex_unlock(&container->lock);
1132
1133 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1134 ret = -EFAULT;
1135
1136 return ret;
1137 }
1138 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1139 struct vfio_iommu_spapr_tce_remove remove;
1140
1141 if (!container->v2)
1142 break;
1143
1144 ret = tce_iommu_mm_set(container);
1145 if (ret)
1146 return ret;
1147
1148 if (!tce_groups_attached(container))
1149 return -ENXIO;
1150
1151 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1152 start_addr);
1153
1154 if (copy_from_user(&remove, (void __user *)arg, minsz))
1155 return -EFAULT;
1156
1157 if (remove.argsz < minsz)
1158 return -EINVAL;
1159
1160 if (remove.flags)
1161 return -EINVAL;
1162
1163 if (container->def_window_pending && !remove.start_addr) {
1164 container->def_window_pending = false;
1165 return 0;
1166 }
1167
1168 mutex_lock(&container->lock);
1169
1170 ret = tce_iommu_remove_window(container, remove.start_addr);
1171
1172 mutex_unlock(&container->lock);
1173
1174 return ret;
1175 }
1176 }
1177
1178 return -ENOTTY;
1179}
1180
1181static void tce_iommu_release_ownership(struct tce_container *container,
1182 struct iommu_table_group *table_group)
1183{
1184 int i;
1185
1186 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1187 struct iommu_table *tbl = container->tables[i];
1188
1189 if (!tbl)
1190 continue;
1191
1192 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1193 if (tbl->it_map)
1194 iommu_release_ownership(tbl);
1195
1196 container->tables[i] = NULL;
1197 }
1198}
1199
1200static int tce_iommu_take_ownership(struct tce_container *container,
1201 struct iommu_table_group *table_group)
1202{
1203 int i, j, rc = 0;
1204
1205 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1206 struct iommu_table *tbl = table_group->tables[i];
1207
1208 if (!tbl || !tbl->it_map)
1209 continue;
1210
1211 rc = iommu_take_ownership(tbl);
1212 if (rc) {
1213 for (j = 0; j < i; ++j)
1214 iommu_release_ownership(
1215 table_group->tables[j]);
1216
1217 return rc;
1218 }
1219 }
1220
1221 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1222 container->tables[i] = table_group->tables[i];
1223
1224 return 0;
1225}
1226
1227static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1228 struct iommu_table_group *table_group)
1229{
1230 long i;
1231
1232 if (!table_group->ops->unset_window) {
1233 WARN_ON_ONCE(1);
1234 return;
1235 }
1236
1237 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1238 table_group->ops->unset_window(table_group, i);
1239
1240 table_group->ops->release_ownership(table_group);
1241}
1242
1243static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1244 struct iommu_table_group *table_group)
1245{
1246 long i, ret = 0;
1247
1248 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1249 !table_group->ops->release_ownership) {
1250 WARN_ON_ONCE(1);
1251 return -EFAULT;
1252 }
1253
1254 table_group->ops->take_ownership(table_group);
1255
1256
1257 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1258 struct iommu_table *tbl = container->tables[i];
1259
1260 if (!tbl)
1261 continue;
1262
1263 ret = table_group->ops->set_window(table_group, i, tbl);
1264 if (ret)
1265 goto release_exit;
1266 }
1267
1268 return 0;
1269
1270release_exit:
1271 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1272 table_group->ops->unset_window(table_group, i);
1273
1274 table_group->ops->release_ownership(table_group);
1275
1276 return ret;
1277}
1278
1279static int tce_iommu_attach_group(void *iommu_data,
1280 struct iommu_group *iommu_group)
1281{
1282 int ret;
1283 struct tce_container *container = iommu_data;
1284 struct iommu_table_group *table_group;
1285 struct tce_iommu_group *tcegrp = NULL;
1286
1287 mutex_lock(&container->lock);
1288
1289
1290
1291 table_group = iommu_group_get_iommudata(iommu_group);
1292 if (!table_group) {
1293 ret = -ENODEV;
1294 goto unlock_exit;
1295 }
1296
1297 if (tce_groups_attached(container) && (!table_group->ops ||
1298 !table_group->ops->take_ownership ||
1299 !table_group->ops->release_ownership)) {
1300 ret = -EBUSY;
1301 goto unlock_exit;
1302 }
1303
1304
1305 list_for_each_entry(tcegrp, &container->group_list, next) {
1306 struct iommu_table_group *table_group_tmp;
1307
1308 if (tcegrp->grp == iommu_group) {
1309 pr_warn("tce_vfio: Group %d is already attached\n",
1310 iommu_group_id(iommu_group));
1311 ret = -EBUSY;
1312 goto unlock_exit;
1313 }
1314 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1315 if (table_group_tmp->ops->create_table !=
1316 table_group->ops->create_table) {
1317 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1318 iommu_group_id(iommu_group),
1319 iommu_group_id(tcegrp->grp));
1320 ret = -EPERM;
1321 goto unlock_exit;
1322 }
1323 }
1324
1325 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1326 if (!tcegrp) {
1327 ret = -ENOMEM;
1328 goto unlock_exit;
1329 }
1330
1331 if (!table_group->ops || !table_group->ops->take_ownership ||
1332 !table_group->ops->release_ownership) {
1333 if (container->v2) {
1334 ret = -EPERM;
1335 goto unlock_exit;
1336 }
1337 ret = tce_iommu_take_ownership(container, table_group);
1338 } else {
1339 if (!container->v2) {
1340 ret = -EPERM;
1341 goto unlock_exit;
1342 }
1343 ret = tce_iommu_take_ownership_ddw(container, table_group);
1344 if (!tce_groups_attached(container) && !container->tables[0])
1345 container->def_window_pending = true;
1346 }
1347
1348 if (!ret) {
1349 tcegrp->grp = iommu_group;
1350 list_add(&tcegrp->next, &container->group_list);
1351 }
1352
1353unlock_exit:
1354 if (ret && tcegrp)
1355 kfree(tcegrp);
1356
1357 mutex_unlock(&container->lock);
1358
1359 return ret;
1360}
1361
1362static void tce_iommu_detach_group(void *iommu_data,
1363 struct iommu_group *iommu_group)
1364{
1365 struct tce_container *container = iommu_data;
1366 struct iommu_table_group *table_group;
1367 bool found = false;
1368 struct tce_iommu_group *tcegrp;
1369
1370 mutex_lock(&container->lock);
1371
1372 list_for_each_entry(tcegrp, &container->group_list, next) {
1373 if (tcegrp->grp == iommu_group) {
1374 found = true;
1375 break;
1376 }
1377 }
1378
1379 if (!found) {
1380 pr_warn("tce_vfio: detaching unattached group #%u\n",
1381 iommu_group_id(iommu_group));
1382 goto unlock_exit;
1383 }
1384
1385 list_del(&tcegrp->next);
1386 kfree(tcegrp);
1387
1388 table_group = iommu_group_get_iommudata(iommu_group);
1389 BUG_ON(!table_group);
1390
1391 if (!table_group->ops || !table_group->ops->release_ownership)
1392 tce_iommu_release_ownership(container, table_group);
1393 else
1394 tce_iommu_release_ownership_ddw(container, table_group);
1395
1396unlock_exit:
1397 mutex_unlock(&container->lock);
1398}
1399
1400const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1401 .name = "iommu-vfio-powerpc",
1402 .owner = THIS_MODULE,
1403 .open = tce_iommu_open,
1404 .release = tce_iommu_release,
1405 .ioctl = tce_iommu_ioctl,
1406 .attach_group = tce_iommu_attach_group,
1407 .detach_group = tce_iommu_detach_group,
1408};
1409
1410static int __init tce_iommu_init(void)
1411{
1412 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1413}
1414
1415static void __exit tce_iommu_cleanup(void)
1416{
1417 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1418}
1419
1420module_init(tce_iommu_init);
1421module_exit(tce_iommu_cleanup);
1422
1423MODULE_VERSION(DRIVER_VERSION);
1424MODULE_LICENSE("GPL v2");
1425MODULE_AUTHOR(DRIVER_AUTHOR);
1426MODULE_DESCRIPTION(DRIVER_DESC);
1427
1428