1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/cdev.h>
14#include <linux/compat.h>
15#include <linux/device.h>
16#include <linux/file.h>
17#include <linux/anon_inodes.h>
18#include <linux/fs.h>
19#include <linux/idr.h>
20#include <linux/iommu.h>
21#include <linux/list.h>
22#include <linux/miscdevice.h>
23#include <linux/module.h>
24#include <linux/mutex.h>
25#include <linux/pci.h>
26#include <linux/rwsem.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/stat.h>
30#include <linux/string.h>
31#include <linux/uaccess.h>
32#include <linux/vfio.h>
33#include <linux/wait.h>
34#include <linux/sched/signal.h>
35
36#define DRIVER_VERSION "0.3"
37#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
38#define DRIVER_DESC "VFIO - User Level meta-driver"
39
40static struct vfio {
41 struct class *class;
42 struct list_head iommu_drivers_list;
43 struct mutex iommu_drivers_lock;
44 struct list_head group_list;
45 struct idr group_idr;
46 struct mutex group_lock;
47 struct cdev group_cdev;
48 dev_t group_devt;
49 wait_queue_head_t release_q;
50} vfio;
51
52struct vfio_iommu_driver {
53 const struct vfio_iommu_driver_ops *ops;
54 struct list_head vfio_next;
55};
56
57struct vfio_container {
58 struct kref kref;
59 struct list_head group_list;
60 struct rw_semaphore group_lock;
61 struct vfio_iommu_driver *iommu_driver;
62 void *iommu_data;
63 bool noiommu;
64};
65
66struct vfio_unbound_dev {
67 struct device *dev;
68 struct list_head unbound_next;
69};
70
71struct vfio_group {
72 struct kref kref;
73 int minor;
74 atomic_t container_users;
75 struct iommu_group *iommu_group;
76 struct vfio_container *container;
77 struct list_head device_list;
78 struct mutex device_lock;
79 struct device *dev;
80 struct notifier_block nb;
81 struct list_head vfio_next;
82 struct list_head container_next;
83 struct list_head unbound_list;
84 struct mutex unbound_lock;
85 atomic_t opened;
86 wait_queue_head_t container_q;
87 bool noiommu;
88 unsigned int dev_counter;
89 struct kvm *kvm;
90 struct blocking_notifier_head notifier;
91};
92
93struct vfio_device {
94 struct kref kref;
95 struct device *dev;
96 const struct vfio_device_ops *ops;
97 struct vfio_group *group;
98 struct list_head group_next;
99 void *device_data;
100};
101
102#ifdef CONFIG_VFIO_NOIOMMU
103static bool noiommu __read_mostly;
104module_param_named(enable_unsafe_noiommu_mode,
105 noiommu, bool, S_IRUGO | S_IWUSR);
106MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
107#endif
108
109
110
111
112
113
114
115
116
117struct iommu_group *vfio_iommu_group_get(struct device *dev)
118{
119 struct iommu_group *group;
120 int __maybe_unused ret;
121
122 group = iommu_group_get(dev);
123
124#ifdef CONFIG_VFIO_NOIOMMU
125
126
127
128
129
130
131 if (group || !noiommu || iommu_present(dev->bus))
132 return group;
133
134 group = iommu_group_alloc();
135 if (IS_ERR(group))
136 return NULL;
137
138 iommu_group_set_name(group, "vfio-noiommu");
139 iommu_group_set_iommudata(group, &noiommu, NULL);
140 ret = iommu_group_add_device(group, dev);
141 if (ret) {
142 iommu_group_put(group);
143 return NULL;
144 }
145
146
147
148
149
150
151
152
153
154 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
155 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
156#endif
157
158 return group;
159}
160EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
161
162void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
163{
164#ifdef CONFIG_VFIO_NOIOMMU
165 if (iommu_group_get_iommudata(group) == &noiommu)
166 iommu_group_remove_device(dev);
167#endif
168
169 iommu_group_put(group);
170}
171EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
172
173#ifdef CONFIG_VFIO_NOIOMMU
174static void *vfio_noiommu_open(unsigned long arg)
175{
176 if (arg != VFIO_NOIOMMU_IOMMU)
177 return ERR_PTR(-EINVAL);
178 if (!capable(CAP_SYS_RAWIO))
179 return ERR_PTR(-EPERM);
180
181 return NULL;
182}
183
184static void vfio_noiommu_release(void *iommu_data)
185{
186}
187
188static long vfio_noiommu_ioctl(void *iommu_data,
189 unsigned int cmd, unsigned long arg)
190{
191 if (cmd == VFIO_CHECK_EXTENSION)
192 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
193
194 return -ENOTTY;
195}
196
197static int vfio_noiommu_attach_group(void *iommu_data,
198 struct iommu_group *iommu_group)
199{
200 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
201}
202
203static void vfio_noiommu_detach_group(void *iommu_data,
204 struct iommu_group *iommu_group)
205{
206}
207
208static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
209 .name = "vfio-noiommu",
210 .owner = THIS_MODULE,
211 .open = vfio_noiommu_open,
212 .release = vfio_noiommu_release,
213 .ioctl = vfio_noiommu_ioctl,
214 .attach_group = vfio_noiommu_attach_group,
215 .detach_group = vfio_noiommu_detach_group,
216};
217#endif
218
219
220
221
222
223int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
224{
225 struct vfio_iommu_driver *driver, *tmp;
226
227 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
228 if (!driver)
229 return -ENOMEM;
230
231 driver->ops = ops;
232
233 mutex_lock(&vfio.iommu_drivers_lock);
234
235
236 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
237 if (tmp->ops == ops) {
238 mutex_unlock(&vfio.iommu_drivers_lock);
239 kfree(driver);
240 return -EINVAL;
241 }
242 }
243
244 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
245
246 mutex_unlock(&vfio.iommu_drivers_lock);
247
248 return 0;
249}
250EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
251
252void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
253{
254 struct vfio_iommu_driver *driver;
255
256 mutex_lock(&vfio.iommu_drivers_lock);
257 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
258 if (driver->ops == ops) {
259 list_del(&driver->vfio_next);
260 mutex_unlock(&vfio.iommu_drivers_lock);
261 kfree(driver);
262 return;
263 }
264 }
265 mutex_unlock(&vfio.iommu_drivers_lock);
266}
267EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
268
269
270
271
272static int vfio_alloc_group_minor(struct vfio_group *group)
273{
274 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
275}
276
277static void vfio_free_group_minor(int minor)
278{
279 idr_remove(&vfio.group_idr, minor);
280}
281
282static int vfio_iommu_group_notifier(struct notifier_block *nb,
283 unsigned long action, void *data);
284static void vfio_group_get(struct vfio_group *group);
285
286
287
288
289
290
291
292static void vfio_container_get(struct vfio_container *container)
293{
294 kref_get(&container->kref);
295}
296
297static void vfio_container_release(struct kref *kref)
298{
299 struct vfio_container *container;
300 container = container_of(kref, struct vfio_container, kref);
301
302 kfree(container);
303}
304
305static void vfio_container_put(struct vfio_container *container)
306{
307 kref_put(&container->kref, vfio_container_release);
308}
309
310static void vfio_group_unlock_and_free(struct vfio_group *group)
311{
312 mutex_unlock(&vfio.group_lock);
313
314
315
316
317 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
318 kfree(group);
319}
320
321
322
323
324static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
325{
326 struct vfio_group *group, *tmp;
327 struct device *dev;
328 int ret, minor;
329
330 group = kzalloc(sizeof(*group), GFP_KERNEL);
331 if (!group)
332 return ERR_PTR(-ENOMEM);
333
334 kref_init(&group->kref);
335 INIT_LIST_HEAD(&group->device_list);
336 mutex_init(&group->device_lock);
337 INIT_LIST_HEAD(&group->unbound_list);
338 mutex_init(&group->unbound_lock);
339 atomic_set(&group->container_users, 0);
340 atomic_set(&group->opened, 0);
341 init_waitqueue_head(&group->container_q);
342 group->iommu_group = iommu_group;
343#ifdef CONFIG_VFIO_NOIOMMU
344 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
345#endif
346 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
347
348 group->nb.notifier_call = vfio_iommu_group_notifier;
349
350
351
352
353
354
355
356
357 ret = iommu_group_register_notifier(iommu_group, &group->nb);
358 if (ret) {
359 kfree(group);
360 return ERR_PTR(ret);
361 }
362
363 mutex_lock(&vfio.group_lock);
364
365
366 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
367 if (tmp->iommu_group == iommu_group) {
368 vfio_group_get(tmp);
369 vfio_group_unlock_and_free(group);
370 return tmp;
371 }
372 }
373
374 minor = vfio_alloc_group_minor(group);
375 if (minor < 0) {
376 vfio_group_unlock_and_free(group);
377 return ERR_PTR(minor);
378 }
379
380 dev = device_create(vfio.class, NULL,
381 MKDEV(MAJOR(vfio.group_devt), minor),
382 group, "%s%d", group->noiommu ? "noiommu-" : "",
383 iommu_group_id(iommu_group));
384 if (IS_ERR(dev)) {
385 vfio_free_group_minor(minor);
386 vfio_group_unlock_and_free(group);
387 return ERR_CAST(dev);
388 }
389
390 group->minor = minor;
391 group->dev = dev;
392
393 list_add(&group->vfio_next, &vfio.group_list);
394
395 mutex_unlock(&vfio.group_lock);
396
397 return group;
398}
399
400
401static void vfio_group_release(struct kref *kref)
402{
403 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
404 struct vfio_unbound_dev *unbound, *tmp;
405 struct iommu_group *iommu_group = group->iommu_group;
406
407 WARN_ON(!list_empty(&group->device_list));
408 WARN_ON(group->notifier.head);
409
410 list_for_each_entry_safe(unbound, tmp,
411 &group->unbound_list, unbound_next) {
412 list_del(&unbound->unbound_next);
413 kfree(unbound);
414 }
415
416 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
417 list_del(&group->vfio_next);
418 vfio_free_group_minor(group->minor);
419 vfio_group_unlock_and_free(group);
420 iommu_group_put(iommu_group);
421}
422
423static void vfio_group_put(struct vfio_group *group)
424{
425 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
426}
427
428struct vfio_group_put_work {
429 struct work_struct work;
430 struct vfio_group *group;
431};
432
433static void vfio_group_put_bg(struct work_struct *work)
434{
435 struct vfio_group_put_work *do_work;
436
437 do_work = container_of(work, struct vfio_group_put_work, work);
438
439 vfio_group_put(do_work->group);
440 kfree(do_work);
441}
442
443static void vfio_group_schedule_put(struct vfio_group *group)
444{
445 struct vfio_group_put_work *do_work;
446
447 do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
448 if (WARN_ON(!do_work))
449 return;
450
451 INIT_WORK(&do_work->work, vfio_group_put_bg);
452 do_work->group = group;
453 schedule_work(&do_work->work);
454}
455
456
457static void vfio_group_get(struct vfio_group *group)
458{
459 kref_get(&group->kref);
460}
461
462
463
464
465
466static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
467{
468 struct vfio_group *target = group;
469
470 mutex_lock(&vfio.group_lock);
471 list_for_each_entry(group, &vfio.group_list, vfio_next) {
472 if (group == target) {
473 vfio_group_get(group);
474 mutex_unlock(&vfio.group_lock);
475 return group;
476 }
477 }
478 mutex_unlock(&vfio.group_lock);
479
480 return NULL;
481}
482
483static
484struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
485{
486 struct vfio_group *group;
487
488 mutex_lock(&vfio.group_lock);
489 list_for_each_entry(group, &vfio.group_list, vfio_next) {
490 if (group->iommu_group == iommu_group) {
491 vfio_group_get(group);
492 mutex_unlock(&vfio.group_lock);
493 return group;
494 }
495 }
496 mutex_unlock(&vfio.group_lock);
497
498 return NULL;
499}
500
501static struct vfio_group *vfio_group_get_from_minor(int minor)
502{
503 struct vfio_group *group;
504
505 mutex_lock(&vfio.group_lock);
506 group = idr_find(&vfio.group_idr, minor);
507 if (!group) {
508 mutex_unlock(&vfio.group_lock);
509 return NULL;
510 }
511 vfio_group_get(group);
512 mutex_unlock(&vfio.group_lock);
513
514 return group;
515}
516
517static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
518{
519 struct iommu_group *iommu_group;
520 struct vfio_group *group;
521
522 iommu_group = iommu_group_get(dev);
523 if (!iommu_group)
524 return NULL;
525
526 group = vfio_group_get_from_iommu(iommu_group);
527 iommu_group_put(iommu_group);
528
529 return group;
530}
531
532
533
534
535static
536struct vfio_device *vfio_group_create_device(struct vfio_group *group,
537 struct device *dev,
538 const struct vfio_device_ops *ops,
539 void *device_data)
540{
541 struct vfio_device *device;
542
543 device = kzalloc(sizeof(*device), GFP_KERNEL);
544 if (!device)
545 return ERR_PTR(-ENOMEM);
546
547 kref_init(&device->kref);
548 device->dev = dev;
549 device->group = group;
550 device->ops = ops;
551 device->device_data = device_data;
552 dev_set_drvdata(dev, device);
553
554
555 vfio_group_get(group);
556
557 mutex_lock(&group->device_lock);
558 list_add(&device->group_next, &group->device_list);
559 group->dev_counter++;
560 mutex_unlock(&group->device_lock);
561
562 return device;
563}
564
565static void vfio_device_release(struct kref *kref)
566{
567 struct vfio_device *device = container_of(kref,
568 struct vfio_device, kref);
569 struct vfio_group *group = device->group;
570
571 list_del(&device->group_next);
572 group->dev_counter--;
573 mutex_unlock(&group->device_lock);
574
575 dev_set_drvdata(device->dev, NULL);
576
577 kfree(device);
578
579
580 wake_up(&vfio.release_q);
581}
582
583
584void vfio_device_put(struct vfio_device *device)
585{
586 struct vfio_group *group = device->group;
587 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
588 vfio_group_put(group);
589}
590EXPORT_SYMBOL_GPL(vfio_device_put);
591
592static void vfio_device_get(struct vfio_device *device)
593{
594 vfio_group_get(device->group);
595 kref_get(&device->kref);
596}
597
598static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
599 struct device *dev)
600{
601 struct vfio_device *device;
602
603 mutex_lock(&group->device_lock);
604 list_for_each_entry(device, &group->device_list, group_next) {
605 if (device->dev == dev) {
606 vfio_device_get(device);
607 mutex_unlock(&group->device_lock);
608 return device;
609 }
610 }
611 mutex_unlock(&group->device_lock);
612 return NULL;
613}
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630static const char * const vfio_driver_allowed[] = { "pci-stub" };
631
632static bool vfio_dev_driver_allowed(struct device *dev,
633 struct device_driver *drv)
634{
635 if (dev_is_pci(dev)) {
636 struct pci_dev *pdev = to_pci_dev(dev);
637
638 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
639 return true;
640 }
641
642 return match_string(vfio_driver_allowed,
643 ARRAY_SIZE(vfio_driver_allowed),
644 drv->name) >= 0;
645}
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661static int vfio_dev_viable(struct device *dev, void *data)
662{
663 struct vfio_group *group = data;
664 struct vfio_device *device;
665 struct device_driver *drv = READ_ONCE(dev->driver);
666 struct vfio_unbound_dev *unbound;
667 int ret = -EINVAL;
668
669 mutex_lock(&group->unbound_lock);
670 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
671 if (dev == unbound->dev) {
672 ret = 0;
673 break;
674 }
675 }
676 mutex_unlock(&group->unbound_lock);
677
678 if (!ret || !drv || vfio_dev_driver_allowed(dev, drv))
679 return 0;
680
681 device = vfio_group_get_device(group, dev);
682 if (device) {
683 vfio_device_put(device);
684 return 0;
685 }
686
687 return ret;
688}
689
690
691
692
693static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
694{
695 struct vfio_device *device;
696
697
698 device = vfio_group_get_device(group, dev);
699 if (WARN_ON_ONCE(device)) {
700 vfio_device_put(device);
701 return 0;
702 }
703
704
705 if (!atomic_read(&group->container_users))
706 return 0;
707
708
709 dev_WARN(dev, "Device added to live group %d!\n",
710 iommu_group_id(group->iommu_group));
711
712 return 0;
713}
714
715static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
716{
717
718 if (!atomic_read(&group->container_users))
719 return 0;
720
721 return vfio_dev_viable(dev, group);
722}
723
724static int vfio_iommu_group_notifier(struct notifier_block *nb,
725 unsigned long action, void *data)
726{
727 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
728 struct device *dev = data;
729 struct vfio_unbound_dev *unbound;
730
731
732
733
734
735 group = vfio_group_try_get(group);
736 if (!group)
737 return NOTIFY_OK;
738
739 switch (action) {
740 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
741 vfio_group_nb_add_dev(group, dev);
742 break;
743 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
744
745
746
747
748
749
750
751 break;
752 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
753 dev_dbg(dev, "%s: group %d binding to driver\n", __func__,
754 iommu_group_id(group->iommu_group));
755 break;
756 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
757 dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__,
758 iommu_group_id(group->iommu_group), dev->driver->name);
759 BUG_ON(vfio_group_nb_verify(group, dev));
760 break;
761 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
762 dev_dbg(dev, "%s: group %d unbinding from driver %s\n",
763 __func__, iommu_group_id(group->iommu_group),
764 dev->driver->name);
765 break;
766 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
767 dev_dbg(dev, "%s: group %d unbound from driver\n", __func__,
768 iommu_group_id(group->iommu_group));
769
770
771
772
773
774
775
776
777 mutex_lock(&group->unbound_lock);
778 list_for_each_entry(unbound,
779 &group->unbound_list, unbound_next) {
780 if (dev == unbound->dev) {
781 list_del(&unbound->unbound_next);
782 kfree(unbound);
783 break;
784 }
785 }
786 mutex_unlock(&group->unbound_lock);
787 break;
788 }
789
790
791
792
793
794
795
796
797 vfio_group_schedule_put(group);
798 return NOTIFY_OK;
799}
800
801
802
803
804int vfio_add_group_dev(struct device *dev,
805 const struct vfio_device_ops *ops, void *device_data)
806{
807 struct iommu_group *iommu_group;
808 struct vfio_group *group;
809 struct vfio_device *device;
810
811 iommu_group = iommu_group_get(dev);
812 if (!iommu_group)
813 return -EINVAL;
814
815 group = vfio_group_get_from_iommu(iommu_group);
816 if (!group) {
817 group = vfio_create_group(iommu_group);
818 if (IS_ERR(group)) {
819 iommu_group_put(iommu_group);
820 return PTR_ERR(group);
821 }
822 } else {
823
824
825
826
827 iommu_group_put(iommu_group);
828 }
829
830 device = vfio_group_get_device(group, dev);
831 if (device) {
832 dev_WARN(dev, "Device already exists on group %d\n",
833 iommu_group_id(iommu_group));
834 vfio_device_put(device);
835 vfio_group_put(group);
836 return -EBUSY;
837 }
838
839 device = vfio_group_create_device(group, dev, ops, device_data);
840 if (IS_ERR(device)) {
841 vfio_group_put(group);
842 return PTR_ERR(device);
843 }
844
845
846
847
848
849
850 vfio_group_put(group);
851
852 return 0;
853}
854EXPORT_SYMBOL_GPL(vfio_add_group_dev);
855
856
857
858
859
860
861
862
863struct vfio_device *vfio_device_get_from_dev(struct device *dev)
864{
865 struct vfio_group *group;
866 struct vfio_device *device;
867
868 group = vfio_group_get_from_dev(dev);
869 if (!group)
870 return NULL;
871
872 device = vfio_group_get_device(group, dev);
873 vfio_group_put(group);
874
875 return device;
876}
877EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
878
879static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
880 char *buf)
881{
882 struct vfio_device *it, *device = ERR_PTR(-ENODEV);
883
884 mutex_lock(&group->device_lock);
885 list_for_each_entry(it, &group->device_list, group_next) {
886 int ret;
887
888 if (it->ops->match) {
889 ret = it->ops->match(it->device_data, buf);
890 if (ret < 0) {
891 device = ERR_PTR(ret);
892 break;
893 }
894 } else {
895 ret = !strcmp(dev_name(it->dev), buf);
896 }
897
898 if (ret) {
899 device = it;
900 vfio_device_get(device);
901 break;
902 }
903 }
904 mutex_unlock(&group->device_lock);
905
906 return device;
907}
908
909
910
911
912void *vfio_device_data(struct vfio_device *device)
913{
914 return device->device_data;
915}
916EXPORT_SYMBOL_GPL(vfio_device_data);
917
918
919
920
921void *vfio_del_group_dev(struct device *dev)
922{
923 DEFINE_WAIT_FUNC(wait, woken_wake_function);
924 struct vfio_device *device = dev_get_drvdata(dev);
925 struct vfio_group *group = device->group;
926 void *device_data = device->device_data;
927 struct vfio_unbound_dev *unbound;
928 unsigned int i = 0;
929 bool interrupted = false;
930
931
932
933
934
935 vfio_group_get(group);
936
937
938
939
940
941
942
943
944
945
946 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
947 if (unbound) {
948 unbound->dev = dev;
949 mutex_lock(&group->unbound_lock);
950 list_add(&unbound->unbound_next, &group->unbound_list);
951 mutex_unlock(&group->unbound_lock);
952 }
953 WARN_ON(!unbound);
954
955 vfio_device_put(device);
956
957
958
959
960
961
962
963
964
965 add_wait_queue(&vfio.release_q, &wait);
966
967 do {
968 device = vfio_group_get_device(group, dev);
969 if (!device)
970 break;
971
972 if (device->ops->request)
973 device->ops->request(device_data, i++);
974
975 vfio_device_put(device);
976
977 if (interrupted) {
978 wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
979 } else {
980 wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
981 if (signal_pending(current)) {
982 interrupted = true;
983 dev_warn(dev,
984 "Device is currently in use, task"
985 " \"%s\" (%d) "
986 "blocked until device is released",
987 current->comm, task_pid_nr(current));
988 }
989 }
990
991 } while (1);
992
993 remove_wait_queue(&vfio.release_q, &wait);
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 if (list_empty(&group->device_list))
1009 wait_event(group->container_q, !group->container);
1010
1011 vfio_group_put(group);
1012
1013 return device_data;
1014}
1015EXPORT_SYMBOL_GPL(vfio_del_group_dev);
1016
1017
1018
1019
1020static long vfio_ioctl_check_extension(struct vfio_container *container,
1021 unsigned long arg)
1022{
1023 struct vfio_iommu_driver *driver;
1024 long ret = 0;
1025
1026 down_read(&container->group_lock);
1027
1028 driver = container->iommu_driver;
1029
1030 switch (arg) {
1031
1032 default:
1033
1034
1035
1036
1037
1038
1039 if (!driver) {
1040 mutex_lock(&vfio.iommu_drivers_lock);
1041 list_for_each_entry(driver, &vfio.iommu_drivers_list,
1042 vfio_next) {
1043
1044#ifdef CONFIG_VFIO_NOIOMMU
1045 if (!list_empty(&container->group_list) &&
1046 (container->noiommu !=
1047 (driver->ops == &vfio_noiommu_ops)))
1048 continue;
1049#endif
1050
1051 if (!try_module_get(driver->ops->owner))
1052 continue;
1053
1054 ret = driver->ops->ioctl(NULL,
1055 VFIO_CHECK_EXTENSION,
1056 arg);
1057 module_put(driver->ops->owner);
1058 if (ret > 0)
1059 break;
1060 }
1061 mutex_unlock(&vfio.iommu_drivers_lock);
1062 } else
1063 ret = driver->ops->ioctl(container->iommu_data,
1064 VFIO_CHECK_EXTENSION, arg);
1065 }
1066
1067 up_read(&container->group_lock);
1068
1069 return ret;
1070}
1071
1072
1073static int __vfio_container_attach_groups(struct vfio_container *container,
1074 struct vfio_iommu_driver *driver,
1075 void *data)
1076{
1077 struct vfio_group *group;
1078 int ret = -ENODEV;
1079
1080 list_for_each_entry(group, &container->group_list, container_next) {
1081 ret = driver->ops->attach_group(data, group->iommu_group);
1082 if (ret)
1083 goto unwind;
1084 }
1085
1086 return ret;
1087
1088unwind:
1089 list_for_each_entry_continue_reverse(group, &container->group_list,
1090 container_next) {
1091 driver->ops->detach_group(data, group->iommu_group);
1092 }
1093
1094 return ret;
1095}
1096
1097static long vfio_ioctl_set_iommu(struct vfio_container *container,
1098 unsigned long arg)
1099{
1100 struct vfio_iommu_driver *driver;
1101 long ret = -ENODEV;
1102
1103 down_write(&container->group_lock);
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113 if (list_empty(&container->group_list) || container->iommu_driver) {
1114 up_write(&container->group_lock);
1115 return -EINVAL;
1116 }
1117
1118 mutex_lock(&vfio.iommu_drivers_lock);
1119 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1120 void *data;
1121
1122#ifdef CONFIG_VFIO_NOIOMMU
1123
1124
1125
1126
1127 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1128 continue;
1129#endif
1130
1131 if (!try_module_get(driver->ops->owner))
1132 continue;
1133
1134
1135
1136
1137
1138
1139
1140
1141 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1142 module_put(driver->ops->owner);
1143 continue;
1144 }
1145
1146 data = driver->ops->open(arg);
1147 if (IS_ERR(data)) {
1148 ret = PTR_ERR(data);
1149 module_put(driver->ops->owner);
1150 continue;
1151 }
1152
1153 ret = __vfio_container_attach_groups(container, driver, data);
1154 if (ret) {
1155 driver->ops->release(data);
1156 module_put(driver->ops->owner);
1157 continue;
1158 }
1159
1160 container->iommu_driver = driver;
1161 container->iommu_data = data;
1162 break;
1163 }
1164
1165 mutex_unlock(&vfio.iommu_drivers_lock);
1166 up_write(&container->group_lock);
1167
1168 return ret;
1169}
1170
1171static long vfio_fops_unl_ioctl(struct file *filep,
1172 unsigned int cmd, unsigned long arg)
1173{
1174 struct vfio_container *container = filep->private_data;
1175 struct vfio_iommu_driver *driver;
1176 void *data;
1177 long ret = -EINVAL;
1178
1179 if (!container)
1180 return ret;
1181
1182 switch (cmd) {
1183 case VFIO_GET_API_VERSION:
1184 ret = VFIO_API_VERSION;
1185 break;
1186 case VFIO_CHECK_EXTENSION:
1187 ret = vfio_ioctl_check_extension(container, arg);
1188 break;
1189 case VFIO_SET_IOMMU:
1190 ret = vfio_ioctl_set_iommu(container, arg);
1191 break;
1192 default:
1193 driver = container->iommu_driver;
1194 data = container->iommu_data;
1195
1196 if (driver)
1197 ret = driver->ops->ioctl(data, cmd, arg);
1198 }
1199
1200 return ret;
1201}
1202
1203static int vfio_fops_open(struct inode *inode, struct file *filep)
1204{
1205 struct vfio_container *container;
1206
1207 container = kzalloc(sizeof(*container), GFP_KERNEL);
1208 if (!container)
1209 return -ENOMEM;
1210
1211 INIT_LIST_HEAD(&container->group_list);
1212 init_rwsem(&container->group_lock);
1213 kref_init(&container->kref);
1214
1215 filep->private_data = container;
1216
1217 return 0;
1218}
1219
1220static int vfio_fops_release(struct inode *inode, struct file *filep)
1221{
1222 struct vfio_container *container = filep->private_data;
1223
1224 filep->private_data = NULL;
1225
1226 vfio_container_put(container);
1227
1228 return 0;
1229}
1230
1231
1232
1233
1234
1235static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1236 size_t count, loff_t *ppos)
1237{
1238 struct vfio_container *container = filep->private_data;
1239 struct vfio_iommu_driver *driver;
1240 ssize_t ret = -EINVAL;
1241
1242 driver = container->iommu_driver;
1243 if (likely(driver && driver->ops->read))
1244 ret = driver->ops->read(container->iommu_data,
1245 buf, count, ppos);
1246
1247 return ret;
1248}
1249
1250static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1251 size_t count, loff_t *ppos)
1252{
1253 struct vfio_container *container = filep->private_data;
1254 struct vfio_iommu_driver *driver;
1255 ssize_t ret = -EINVAL;
1256
1257 driver = container->iommu_driver;
1258 if (likely(driver && driver->ops->write))
1259 ret = driver->ops->write(container->iommu_data,
1260 buf, count, ppos);
1261
1262 return ret;
1263}
1264
1265static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1266{
1267 struct vfio_container *container = filep->private_data;
1268 struct vfio_iommu_driver *driver;
1269 int ret = -EINVAL;
1270
1271 driver = container->iommu_driver;
1272 if (likely(driver && driver->ops->mmap))
1273 ret = driver->ops->mmap(container->iommu_data, vma);
1274
1275 return ret;
1276}
1277
1278static const struct file_operations vfio_fops = {
1279 .owner = THIS_MODULE,
1280 .open = vfio_fops_open,
1281 .release = vfio_fops_release,
1282 .read = vfio_fops_read,
1283 .write = vfio_fops_write,
1284 .unlocked_ioctl = vfio_fops_unl_ioctl,
1285 .compat_ioctl = compat_ptr_ioctl,
1286 .mmap = vfio_fops_mmap,
1287};
1288
1289
1290
1291
1292static void __vfio_group_unset_container(struct vfio_group *group)
1293{
1294 struct vfio_container *container = group->container;
1295 struct vfio_iommu_driver *driver;
1296
1297 down_write(&container->group_lock);
1298
1299 driver = container->iommu_driver;
1300 if (driver)
1301 driver->ops->detach_group(container->iommu_data,
1302 group->iommu_group);
1303
1304 group->container = NULL;
1305 wake_up(&group->container_q);
1306 list_del(&group->container_next);
1307
1308
1309 if (driver && list_empty(&container->group_list)) {
1310 driver->ops->release(container->iommu_data);
1311 module_put(driver->ops->owner);
1312 container->iommu_driver = NULL;
1313 container->iommu_data = NULL;
1314 }
1315
1316 up_write(&container->group_lock);
1317
1318 vfio_container_put(container);
1319}
1320
1321
1322
1323
1324
1325
1326
1327static int vfio_group_unset_container(struct vfio_group *group)
1328{
1329 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1330
1331 if (!users)
1332 return -EINVAL;
1333 if (users != 1)
1334 return -EBUSY;
1335
1336 __vfio_group_unset_container(group);
1337
1338 return 0;
1339}
1340
1341
1342
1343
1344
1345
1346
1347static void vfio_group_try_dissolve_container(struct vfio_group *group)
1348{
1349 if (0 == atomic_dec_if_positive(&group->container_users))
1350 __vfio_group_unset_container(group);
1351}
1352
1353static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1354{
1355 struct fd f;
1356 struct vfio_container *container;
1357 struct vfio_iommu_driver *driver;
1358 int ret = 0;
1359
1360 if (atomic_read(&group->container_users))
1361 return -EINVAL;
1362
1363 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1364 return -EPERM;
1365
1366 f = fdget(container_fd);
1367 if (!f.file)
1368 return -EBADF;
1369
1370
1371 if (f.file->f_op != &vfio_fops) {
1372 fdput(f);
1373 return -EINVAL;
1374 }
1375
1376 container = f.file->private_data;
1377 WARN_ON(!container);
1378
1379 down_write(&container->group_lock);
1380
1381
1382 if (!list_empty(&container->group_list) &&
1383 container->noiommu != group->noiommu) {
1384 ret = -EPERM;
1385 goto unlock_out;
1386 }
1387
1388 driver = container->iommu_driver;
1389 if (driver) {
1390 ret = driver->ops->attach_group(container->iommu_data,
1391 group->iommu_group);
1392 if (ret)
1393 goto unlock_out;
1394 }
1395
1396 group->container = container;
1397 container->noiommu = group->noiommu;
1398 list_add(&group->container_next, &container->group_list);
1399
1400
1401 vfio_container_get(container);
1402 atomic_inc(&group->container_users);
1403
1404unlock_out:
1405 up_write(&container->group_lock);
1406 fdput(f);
1407 return ret;
1408}
1409
1410static bool vfio_group_viable(struct vfio_group *group)
1411{
1412 return (iommu_group_for_each_dev(group->iommu_group,
1413 group, vfio_dev_viable) == 0);
1414}
1415
1416static int vfio_group_add_container_user(struct vfio_group *group)
1417{
1418 if (!atomic_inc_not_zero(&group->container_users))
1419 return -EINVAL;
1420
1421 if (group->noiommu) {
1422 atomic_dec(&group->container_users);
1423 return -EPERM;
1424 }
1425 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1426 atomic_dec(&group->container_users);
1427 return -EINVAL;
1428 }
1429
1430 return 0;
1431}
1432
1433static const struct file_operations vfio_device_fops;
1434
1435static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1436{
1437 struct vfio_device *device;
1438 struct file *filep;
1439 int ret;
1440
1441 if (0 == atomic_read(&group->container_users) ||
1442 !group->container->iommu_driver || !vfio_group_viable(group))
1443 return -EINVAL;
1444
1445 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1446 return -EPERM;
1447
1448 device = vfio_device_get_from_name(group, buf);
1449 if (IS_ERR(device))
1450 return PTR_ERR(device);
1451
1452 ret = device->ops->open(device->device_data);
1453 if (ret) {
1454 vfio_device_put(device);
1455 return ret;
1456 }
1457
1458
1459
1460
1461
1462 ret = get_unused_fd_flags(O_CLOEXEC);
1463 if (ret < 0) {
1464 device->ops->release(device->device_data);
1465 vfio_device_put(device);
1466 return ret;
1467 }
1468
1469 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1470 device, O_RDWR);
1471 if (IS_ERR(filep)) {
1472 put_unused_fd(ret);
1473 ret = PTR_ERR(filep);
1474 device->ops->release(device->device_data);
1475 vfio_device_put(device);
1476 return ret;
1477 }
1478
1479
1480
1481
1482
1483
1484 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1485
1486 atomic_inc(&group->container_users);
1487
1488 fd_install(ret, filep);
1489
1490 if (group->noiommu)
1491 dev_warn(device->dev, "vfio-noiommu device opened by user "
1492 "(%s:%d)\n", current->comm, task_pid_nr(current));
1493
1494 return ret;
1495}
1496
1497static long vfio_group_fops_unl_ioctl(struct file *filep,
1498 unsigned int cmd, unsigned long arg)
1499{
1500 struct vfio_group *group = filep->private_data;
1501 long ret = -ENOTTY;
1502
1503 switch (cmd) {
1504 case VFIO_GROUP_GET_STATUS:
1505 {
1506 struct vfio_group_status status;
1507 unsigned long minsz;
1508
1509 minsz = offsetofend(struct vfio_group_status, flags);
1510
1511 if (copy_from_user(&status, (void __user *)arg, minsz))
1512 return -EFAULT;
1513
1514 if (status.argsz < minsz)
1515 return -EINVAL;
1516
1517 status.flags = 0;
1518
1519 if (vfio_group_viable(group))
1520 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1521
1522 if (group->container)
1523 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1524
1525 if (copy_to_user((void __user *)arg, &status, minsz))
1526 return -EFAULT;
1527
1528 ret = 0;
1529 break;
1530 }
1531 case VFIO_GROUP_SET_CONTAINER:
1532 {
1533 int fd;
1534
1535 if (get_user(fd, (int __user *)arg))
1536 return -EFAULT;
1537
1538 if (fd < 0)
1539 return -EINVAL;
1540
1541 ret = vfio_group_set_container(group, fd);
1542 break;
1543 }
1544 case VFIO_GROUP_UNSET_CONTAINER:
1545 ret = vfio_group_unset_container(group);
1546 break;
1547 case VFIO_GROUP_GET_DEVICE_FD:
1548 {
1549 char *buf;
1550
1551 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1552 if (IS_ERR(buf))
1553 return PTR_ERR(buf);
1554
1555 ret = vfio_group_get_device_fd(group, buf);
1556 kfree(buf);
1557 break;
1558 }
1559 }
1560
1561 return ret;
1562}
1563
1564static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1565{
1566 struct vfio_group *group;
1567 int opened;
1568
1569 group = vfio_group_get_from_minor(iminor(inode));
1570 if (!group)
1571 return -ENODEV;
1572
1573 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1574 vfio_group_put(group);
1575 return -EPERM;
1576 }
1577
1578
1579 opened = atomic_cmpxchg(&group->opened, 0, 1);
1580 if (opened) {
1581 vfio_group_put(group);
1582 return -EBUSY;
1583 }
1584
1585
1586 if (group->container) {
1587 atomic_dec(&group->opened);
1588 vfio_group_put(group);
1589 return -EBUSY;
1590 }
1591
1592
1593 if (WARN_ON(group->notifier.head))
1594 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1595
1596 filep->private_data = group;
1597
1598 return 0;
1599}
1600
1601static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1602{
1603 struct vfio_group *group = filep->private_data;
1604
1605 filep->private_data = NULL;
1606
1607 vfio_group_try_dissolve_container(group);
1608
1609 atomic_dec(&group->opened);
1610
1611 vfio_group_put(group);
1612
1613 return 0;
1614}
1615
1616static const struct file_operations vfio_group_fops = {
1617 .owner = THIS_MODULE,
1618 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1619 .compat_ioctl = compat_ptr_ioctl,
1620 .open = vfio_group_fops_open,
1621 .release = vfio_group_fops_release,
1622};
1623
1624
1625
1626
1627static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1628{
1629 struct vfio_device *device = filep->private_data;
1630
1631 device->ops->release(device->device_data);
1632
1633 vfio_group_try_dissolve_container(device->group);
1634
1635 vfio_device_put(device);
1636
1637 return 0;
1638}
1639
1640static long vfio_device_fops_unl_ioctl(struct file *filep,
1641 unsigned int cmd, unsigned long arg)
1642{
1643 struct vfio_device *device = filep->private_data;
1644
1645 if (unlikely(!device->ops->ioctl))
1646 return -EINVAL;
1647
1648 return device->ops->ioctl(device->device_data, cmd, arg);
1649}
1650
1651static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1652 size_t count, loff_t *ppos)
1653{
1654 struct vfio_device *device = filep->private_data;
1655
1656 if (unlikely(!device->ops->read))
1657 return -EINVAL;
1658
1659 return device->ops->read(device->device_data, buf, count, ppos);
1660}
1661
1662static ssize_t vfio_device_fops_write(struct file *filep,
1663 const char __user *buf,
1664 size_t count, loff_t *ppos)
1665{
1666 struct vfio_device *device = filep->private_data;
1667
1668 if (unlikely(!device->ops->write))
1669 return -EINVAL;
1670
1671 return device->ops->write(device->device_data, buf, count, ppos);
1672}
1673
1674static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1675{
1676 struct vfio_device *device = filep->private_data;
1677
1678 if (unlikely(!device->ops->mmap))
1679 return -EINVAL;
1680
1681 return device->ops->mmap(device->device_data, vma);
1682}
1683
1684static const struct file_operations vfio_device_fops = {
1685 .owner = THIS_MODULE,
1686 .release = vfio_device_fops_release,
1687 .read = vfio_device_fops_read,
1688 .write = vfio_device_fops_write,
1689 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1690 .compat_ioctl = compat_ptr_ioctl,
1691 .mmap = vfio_device_fops_mmap,
1692};
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721struct vfio_group *vfio_group_get_external_user(struct file *filep)
1722{
1723 struct vfio_group *group = filep->private_data;
1724 int ret;
1725
1726 if (filep->f_op != &vfio_group_fops)
1727 return ERR_PTR(-EINVAL);
1728
1729 ret = vfio_group_add_container_user(group);
1730 if (ret)
1731 return ERR_PTR(ret);
1732
1733 vfio_group_get(group);
1734
1735 return group;
1736}
1737EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev)
1759{
1760 struct vfio_group *group;
1761 int ret;
1762
1763 group = vfio_group_get_from_dev(dev);
1764 if (!group)
1765 return ERR_PTR(-ENODEV);
1766
1767 ret = vfio_group_add_container_user(group);
1768 if (ret) {
1769 vfio_group_put(group);
1770 return ERR_PTR(ret);
1771 }
1772
1773 return group;
1774}
1775EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev);
1776
1777void vfio_group_put_external_user(struct vfio_group *group)
1778{
1779 vfio_group_try_dissolve_container(group);
1780 vfio_group_put(group);
1781}
1782EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1783
1784bool vfio_external_group_match_file(struct vfio_group *test_group,
1785 struct file *filep)
1786{
1787 struct vfio_group *group = filep->private_data;
1788
1789 return (filep->f_op == &vfio_group_fops) && (group == test_group);
1790}
1791EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
1792
1793int vfio_external_user_iommu_id(struct vfio_group *group)
1794{
1795 return iommu_group_id(group->iommu_group);
1796}
1797EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1798
1799long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1800{
1801 return vfio_ioctl_check_extension(group->container, arg);
1802}
1803EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1818 size_t size, u16 id, u16 version)
1819{
1820 void *buf;
1821 struct vfio_info_cap_header *header, *tmp;
1822
1823 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1824 if (!buf) {
1825 kfree(caps->buf);
1826 caps->size = 0;
1827 return ERR_PTR(-ENOMEM);
1828 }
1829
1830 caps->buf = buf;
1831 header = buf + caps->size;
1832
1833
1834 memset(header, 0, size);
1835
1836 header->id = id;
1837 header->version = version;
1838
1839
1840 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1841 ;
1842
1843 tmp->next = caps->size;
1844 caps->size += size;
1845
1846 return header;
1847}
1848EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1849
1850void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1851{
1852 struct vfio_info_cap_header *tmp;
1853 void *buf = (void *)caps->buf;
1854
1855 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1856 tmp->next += offset;
1857}
1858EXPORT_SYMBOL(vfio_info_cap_shift);
1859
1860int vfio_info_add_capability(struct vfio_info_cap *caps,
1861 struct vfio_info_cap_header *cap, size_t size)
1862{
1863 struct vfio_info_cap_header *header;
1864
1865 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1866 if (IS_ERR(header))
1867 return PTR_ERR(header);
1868
1869 memcpy(header + 1, cap + 1, size - sizeof(*header));
1870
1871 return 0;
1872}
1873EXPORT_SYMBOL(vfio_info_add_capability);
1874
1875int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1876 int max_irq_type, size_t *data_size)
1877{
1878 unsigned long minsz;
1879 size_t size;
1880
1881 minsz = offsetofend(struct vfio_irq_set, count);
1882
1883 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1884 (hdr->count >= (U32_MAX - hdr->start)) ||
1885 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1886 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1887 return -EINVAL;
1888
1889 if (data_size)
1890 *data_size = 0;
1891
1892 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1893 return -EINVAL;
1894
1895 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1896 case VFIO_IRQ_SET_DATA_NONE:
1897 size = 0;
1898 break;
1899 case VFIO_IRQ_SET_DATA_BOOL:
1900 size = sizeof(uint8_t);
1901 break;
1902 case VFIO_IRQ_SET_DATA_EVENTFD:
1903 size = sizeof(int32_t);
1904 break;
1905 default:
1906 return -EINVAL;
1907 }
1908
1909 if (size) {
1910 if (hdr->argsz - minsz < hdr->count * size)
1911 return -EINVAL;
1912
1913 if (!data_size)
1914 return -EINVAL;
1915
1916 *data_size = hdr->count * size;
1917 }
1918
1919 return 0;
1920}
1921EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1935 int prot, unsigned long *phys_pfn)
1936{
1937 struct vfio_container *container;
1938 struct vfio_group *group;
1939 struct vfio_iommu_driver *driver;
1940 int ret;
1941
1942 if (!dev || !user_pfn || !phys_pfn || !npage)
1943 return -EINVAL;
1944
1945 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1946 return -E2BIG;
1947
1948 group = vfio_group_get_from_dev(dev);
1949 if (!group)
1950 return -ENODEV;
1951
1952 if (group->dev_counter > 1) {
1953 ret = -EINVAL;
1954 goto err_pin_pages;
1955 }
1956
1957 ret = vfio_group_add_container_user(group);
1958 if (ret)
1959 goto err_pin_pages;
1960
1961 container = group->container;
1962 driver = container->iommu_driver;
1963 if (likely(driver && driver->ops->pin_pages))
1964 ret = driver->ops->pin_pages(container->iommu_data,
1965 group->iommu_group, user_pfn,
1966 npage, prot, phys_pfn);
1967 else
1968 ret = -ENOTTY;
1969
1970 vfio_group_try_dissolve_container(group);
1971
1972err_pin_pages:
1973 vfio_group_put(group);
1974 return ret;
1975}
1976EXPORT_SYMBOL(vfio_pin_pages);
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1988{
1989 struct vfio_container *container;
1990 struct vfio_group *group;
1991 struct vfio_iommu_driver *driver;
1992 int ret;
1993
1994 if (!dev || !user_pfn || !npage)
1995 return -EINVAL;
1996
1997 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1998 return -E2BIG;
1999
2000 group = vfio_group_get_from_dev(dev);
2001 if (!group)
2002 return -ENODEV;
2003
2004 ret = vfio_group_add_container_user(group);
2005 if (ret)
2006 goto err_unpin_pages;
2007
2008 container = group->container;
2009 driver = container->iommu_driver;
2010 if (likely(driver && driver->ops->unpin_pages))
2011 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
2012 npage);
2013 else
2014 ret = -ENOTTY;
2015
2016 vfio_group_try_dissolve_container(group);
2017
2018err_unpin_pages:
2019 vfio_group_put(group);
2020 return ret;
2021}
2022EXPORT_SYMBOL(vfio_unpin_pages);
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045int vfio_group_pin_pages(struct vfio_group *group,
2046 unsigned long *user_iova_pfn, int npage,
2047 int prot, unsigned long *phys_pfn)
2048{
2049 struct vfio_container *container;
2050 struct vfio_iommu_driver *driver;
2051 int ret;
2052
2053 if (!group || !user_iova_pfn || !phys_pfn || !npage)
2054 return -EINVAL;
2055
2056 if (group->dev_counter > 1)
2057 return -EINVAL;
2058
2059 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
2060 return -E2BIG;
2061
2062 container = group->container;
2063 driver = container->iommu_driver;
2064 if (likely(driver && driver->ops->pin_pages))
2065 ret = driver->ops->pin_pages(container->iommu_data,
2066 group->iommu_group, user_iova_pfn,
2067 npage, prot, phys_pfn);
2068 else
2069 ret = -ENOTTY;
2070
2071 return ret;
2072}
2073EXPORT_SYMBOL(vfio_group_pin_pages);
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093int vfio_group_unpin_pages(struct vfio_group *group,
2094 unsigned long *user_iova_pfn, int npage)
2095{
2096 struct vfio_container *container;
2097 struct vfio_iommu_driver *driver;
2098 int ret;
2099
2100 if (!group || !user_iova_pfn || !npage)
2101 return -EINVAL;
2102
2103 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
2104 return -E2BIG;
2105
2106 container = group->container;
2107 driver = container->iommu_driver;
2108 if (likely(driver && driver->ops->unpin_pages))
2109 ret = driver->ops->unpin_pages(container->iommu_data,
2110 user_iova_pfn, npage);
2111 else
2112 ret = -ENOTTY;
2113
2114 return ret;
2115}
2116EXPORT_SYMBOL(vfio_group_unpin_pages);
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
2145 void *data, size_t len, bool write)
2146{
2147 struct vfio_container *container;
2148 struct vfio_iommu_driver *driver;
2149 int ret = 0;
2150
2151 if (!group || !data || len <= 0)
2152 return -EINVAL;
2153
2154 container = group->container;
2155 driver = container->iommu_driver;
2156
2157 if (likely(driver && driver->ops->dma_rw))
2158 ret = driver->ops->dma_rw(container->iommu_data,
2159 user_iova, data, len, write);
2160 else
2161 ret = -ENOTTY;
2162
2163 return ret;
2164}
2165EXPORT_SYMBOL(vfio_dma_rw);
2166
2167static int vfio_register_iommu_notifier(struct vfio_group *group,
2168 unsigned long *events,
2169 struct notifier_block *nb)
2170{
2171 struct vfio_container *container;
2172 struct vfio_iommu_driver *driver;
2173 int ret;
2174
2175 ret = vfio_group_add_container_user(group);
2176 if (ret)
2177 return -EINVAL;
2178
2179 container = group->container;
2180 driver = container->iommu_driver;
2181 if (likely(driver && driver->ops->register_notifier))
2182 ret = driver->ops->register_notifier(container->iommu_data,
2183 events, nb);
2184 else
2185 ret = -ENOTTY;
2186
2187 vfio_group_try_dissolve_container(group);
2188
2189 return ret;
2190}
2191
2192static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2193 struct notifier_block *nb)
2194{
2195 struct vfio_container *container;
2196 struct vfio_iommu_driver *driver;
2197 int ret;
2198
2199 ret = vfio_group_add_container_user(group);
2200 if (ret)
2201 return -EINVAL;
2202
2203 container = group->container;
2204 driver = container->iommu_driver;
2205 if (likely(driver && driver->ops->unregister_notifier))
2206 ret = driver->ops->unregister_notifier(container->iommu_data,
2207 nb);
2208 else
2209 ret = -ENOTTY;
2210
2211 vfio_group_try_dissolve_container(group);
2212
2213 return ret;
2214}
2215
2216void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2217{
2218 group->kvm = kvm;
2219 blocking_notifier_call_chain(&group->notifier,
2220 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2221}
2222EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2223
2224static int vfio_register_group_notifier(struct vfio_group *group,
2225 unsigned long *events,
2226 struct notifier_block *nb)
2227{
2228 int ret;
2229 bool set_kvm = false;
2230
2231 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2232 set_kvm = true;
2233
2234
2235 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2236
2237
2238 if (*events)
2239 return -EINVAL;
2240
2241 ret = vfio_group_add_container_user(group);
2242 if (ret)
2243 return -EINVAL;
2244
2245 ret = blocking_notifier_chain_register(&group->notifier, nb);
2246
2247
2248
2249
2250
2251 if (!ret && set_kvm && group->kvm)
2252 blocking_notifier_call_chain(&group->notifier,
2253 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2254
2255 vfio_group_try_dissolve_container(group);
2256
2257 return ret;
2258}
2259
2260static int vfio_unregister_group_notifier(struct vfio_group *group,
2261 struct notifier_block *nb)
2262{
2263 int ret;
2264
2265 ret = vfio_group_add_container_user(group);
2266 if (ret)
2267 return -EINVAL;
2268
2269 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2270
2271 vfio_group_try_dissolve_container(group);
2272
2273 return ret;
2274}
2275
2276int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2277 unsigned long *events, struct notifier_block *nb)
2278{
2279 struct vfio_group *group;
2280 int ret;
2281
2282 if (!dev || !nb || !events || (*events == 0))
2283 return -EINVAL;
2284
2285 group = vfio_group_get_from_dev(dev);
2286 if (!group)
2287 return -ENODEV;
2288
2289 switch (type) {
2290 case VFIO_IOMMU_NOTIFY:
2291 ret = vfio_register_iommu_notifier(group, events, nb);
2292 break;
2293 case VFIO_GROUP_NOTIFY:
2294 ret = vfio_register_group_notifier(group, events, nb);
2295 break;
2296 default:
2297 ret = -EINVAL;
2298 }
2299
2300 vfio_group_put(group);
2301 return ret;
2302}
2303EXPORT_SYMBOL(vfio_register_notifier);
2304
2305int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2306 struct notifier_block *nb)
2307{
2308 struct vfio_group *group;
2309 int ret;
2310
2311 if (!dev || !nb)
2312 return -EINVAL;
2313
2314 group = vfio_group_get_from_dev(dev);
2315 if (!group)
2316 return -ENODEV;
2317
2318 switch (type) {
2319 case VFIO_IOMMU_NOTIFY:
2320 ret = vfio_unregister_iommu_notifier(group, nb);
2321 break;
2322 case VFIO_GROUP_NOTIFY:
2323 ret = vfio_unregister_group_notifier(group, nb);
2324 break;
2325 default:
2326 ret = -EINVAL;
2327 }
2328
2329 vfio_group_put(group);
2330 return ret;
2331}
2332EXPORT_SYMBOL(vfio_unregister_notifier);
2333
2334
2335
2336
2337static char *vfio_devnode(struct device *dev, umode_t *mode)
2338{
2339 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2340}
2341
2342static struct miscdevice vfio_dev = {
2343 .minor = VFIO_MINOR,
2344 .name = "vfio",
2345 .fops = &vfio_fops,
2346 .nodename = "vfio/vfio",
2347 .mode = S_IRUGO | S_IWUGO,
2348};
2349
2350static int __init vfio_init(void)
2351{
2352 int ret;
2353
2354 idr_init(&vfio.group_idr);
2355 mutex_init(&vfio.group_lock);
2356 mutex_init(&vfio.iommu_drivers_lock);
2357 INIT_LIST_HEAD(&vfio.group_list);
2358 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2359 init_waitqueue_head(&vfio.release_q);
2360
2361 ret = misc_register(&vfio_dev);
2362 if (ret) {
2363 pr_err("vfio: misc device register failed\n");
2364 return ret;
2365 }
2366
2367
2368 vfio.class = class_create(THIS_MODULE, "vfio");
2369 if (IS_ERR(vfio.class)) {
2370 ret = PTR_ERR(vfio.class);
2371 goto err_class;
2372 }
2373
2374 vfio.class->devnode = vfio_devnode;
2375
2376 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2377 if (ret)
2378 goto err_alloc_chrdev;
2379
2380 cdev_init(&vfio.group_cdev, &vfio_group_fops);
2381 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1);
2382 if (ret)
2383 goto err_cdev_add;
2384
2385 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2386
2387#ifdef CONFIG_VFIO_NOIOMMU
2388 vfio_register_iommu_driver(&vfio_noiommu_ops);
2389#endif
2390 return 0;
2391
2392err_cdev_add:
2393 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2394err_alloc_chrdev:
2395 class_destroy(vfio.class);
2396 vfio.class = NULL;
2397err_class:
2398 misc_deregister(&vfio_dev);
2399 return ret;
2400}
2401
2402static void __exit vfio_cleanup(void)
2403{
2404 WARN_ON(!list_empty(&vfio.group_list));
2405
2406#ifdef CONFIG_VFIO_NOIOMMU
2407 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2408#endif
2409 idr_destroy(&vfio.group_idr);
2410 cdev_del(&vfio.group_cdev);
2411 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2412 class_destroy(vfio.class);
2413 vfio.class = NULL;
2414 misc_deregister(&vfio_dev);
2415}
2416
2417module_init(vfio_init);
2418module_exit(vfio_cleanup);
2419
2420MODULE_VERSION(DRIVER_VERSION);
2421MODULE_LICENSE("GPL v2");
2422MODULE_AUTHOR(DRIVER_AUTHOR);
2423MODULE_DESCRIPTION(DRIVER_DESC);
2424MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2425MODULE_ALIAS("devname:vfio/vfio");
2426MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2427