1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/cdev.h>
14#include <linux/compat.h>
15#include <linux/device.h>
16#include <linux/file.h>
17#include <linux/anon_inodes.h>
18#include <linux/fs.h>
19#include <linux/idr.h>
20#include <linux/iommu.h>
21#include <linux/list.h>
22#include <linux/miscdevice.h>
23#include <linux/module.h>
24#include <linux/mutex.h>
25#include <linux/pci.h>
26#include <linux/rwsem.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/stat.h>
30#include <linux/string.h>
31#include <linux/uaccess.h>
32#include <linux/vfio.h>
33#include <linux/wait.h>
34#include <linux/sched/signal.h>
35
36#define DRIVER_VERSION "0.3"
37#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
38#define DRIVER_DESC "VFIO - User Level meta-driver"
39
40static struct vfio {
41 struct class *class;
42 struct list_head iommu_drivers_list;
43 struct mutex iommu_drivers_lock;
44 struct list_head group_list;
45 struct idr group_idr;
46 struct mutex group_lock;
47 struct cdev group_cdev;
48 dev_t group_devt;
49 wait_queue_head_t release_q;
50} vfio;
51
52struct vfio_iommu_driver {
53 const struct vfio_iommu_driver_ops *ops;
54 struct list_head vfio_next;
55};
56
57struct vfio_container {
58 struct kref kref;
59 struct list_head group_list;
60 struct rw_semaphore group_lock;
61 struct vfio_iommu_driver *iommu_driver;
62 void *iommu_data;
63 bool noiommu;
64};
65
66struct vfio_unbound_dev {
67 struct device *dev;
68 struct list_head unbound_next;
69};
70
71struct vfio_group {
72 struct kref kref;
73 int minor;
74 atomic_t container_users;
75 struct iommu_group *iommu_group;
76 struct vfio_container *container;
77 struct list_head device_list;
78 struct mutex device_lock;
79 struct device *dev;
80 struct notifier_block nb;
81 struct list_head vfio_next;
82 struct list_head container_next;
83 struct list_head unbound_list;
84 struct mutex unbound_lock;
85 atomic_t opened;
86 wait_queue_head_t container_q;
87 bool noiommu;
88 struct kvm *kvm;
89 struct blocking_notifier_head notifier;
90};
91
92struct vfio_device {
93 struct kref kref;
94 struct device *dev;
95 const struct vfio_device_ops *ops;
96 struct vfio_group *group;
97 struct list_head group_next;
98 void *device_data;
99};
100
101#ifdef CONFIG_VFIO_NOIOMMU
102static bool noiommu __read_mostly;
103module_param_named(enable_unsafe_noiommu_mode,
104 noiommu, bool, S_IRUGO | S_IWUSR);
105MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
106#endif
107
108
109
110
111
112
113
114
115
116struct iommu_group *vfio_iommu_group_get(struct device *dev)
117{
118 struct iommu_group *group;
119 int __maybe_unused ret;
120
121 group = iommu_group_get(dev);
122
123#ifdef CONFIG_VFIO_NOIOMMU
124
125
126
127
128
129
130 if (group || !noiommu || iommu_present(dev->bus))
131 return group;
132
133 group = iommu_group_alloc();
134 if (IS_ERR(group))
135 return NULL;
136
137 iommu_group_set_name(group, "vfio-noiommu");
138 iommu_group_set_iommudata(group, &noiommu, NULL);
139 ret = iommu_group_add_device(group, dev);
140 if (ret) {
141 iommu_group_put(group);
142 return NULL;
143 }
144
145
146
147
148
149
150
151
152
153 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
154 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
155#endif
156
157 return group;
158}
159EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
160
161void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
162{
163#ifdef CONFIG_VFIO_NOIOMMU
164 if (iommu_group_get_iommudata(group) == &noiommu)
165 iommu_group_remove_device(dev);
166#endif
167
168 iommu_group_put(group);
169}
170EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
171
172#ifdef CONFIG_VFIO_NOIOMMU
173static void *vfio_noiommu_open(unsigned long arg)
174{
175 if (arg != VFIO_NOIOMMU_IOMMU)
176 return ERR_PTR(-EINVAL);
177 if (!capable(CAP_SYS_RAWIO))
178 return ERR_PTR(-EPERM);
179
180 return NULL;
181}
182
183static void vfio_noiommu_release(void *iommu_data)
184{
185}
186
187static long vfio_noiommu_ioctl(void *iommu_data,
188 unsigned int cmd, unsigned long arg)
189{
190 if (cmd == VFIO_CHECK_EXTENSION)
191 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
192
193 return -ENOTTY;
194}
195
196static int vfio_noiommu_attach_group(void *iommu_data,
197 struct iommu_group *iommu_group)
198{
199 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
200}
201
202static void vfio_noiommu_detach_group(void *iommu_data,
203 struct iommu_group *iommu_group)
204{
205}
206
207static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
208 .name = "vfio-noiommu",
209 .owner = THIS_MODULE,
210 .open = vfio_noiommu_open,
211 .release = vfio_noiommu_release,
212 .ioctl = vfio_noiommu_ioctl,
213 .attach_group = vfio_noiommu_attach_group,
214 .detach_group = vfio_noiommu_detach_group,
215};
216#endif
217
218
219
220
221
222int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
223{
224 struct vfio_iommu_driver *driver, *tmp;
225
226 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
227 if (!driver)
228 return -ENOMEM;
229
230 driver->ops = ops;
231
232 mutex_lock(&vfio.iommu_drivers_lock);
233
234
235 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
236 if (tmp->ops == ops) {
237 mutex_unlock(&vfio.iommu_drivers_lock);
238 kfree(driver);
239 return -EINVAL;
240 }
241 }
242
243 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
244
245 mutex_unlock(&vfio.iommu_drivers_lock);
246
247 return 0;
248}
249EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
250
251void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
252{
253 struct vfio_iommu_driver *driver;
254
255 mutex_lock(&vfio.iommu_drivers_lock);
256 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
257 if (driver->ops == ops) {
258 list_del(&driver->vfio_next);
259 mutex_unlock(&vfio.iommu_drivers_lock);
260 kfree(driver);
261 return;
262 }
263 }
264 mutex_unlock(&vfio.iommu_drivers_lock);
265}
266EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
267
268
269
270
271static int vfio_alloc_group_minor(struct vfio_group *group)
272{
273 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
274}
275
276static void vfio_free_group_minor(int minor)
277{
278 idr_remove(&vfio.group_idr, minor);
279}
280
281static int vfio_iommu_group_notifier(struct notifier_block *nb,
282 unsigned long action, void *data);
283static void vfio_group_get(struct vfio_group *group);
284
285
286
287
288
289
290
291static void vfio_container_get(struct vfio_container *container)
292{
293 kref_get(&container->kref);
294}
295
296static void vfio_container_release(struct kref *kref)
297{
298 struct vfio_container *container;
299 container = container_of(kref, struct vfio_container, kref);
300
301 kfree(container);
302}
303
304static void vfio_container_put(struct vfio_container *container)
305{
306 kref_put(&container->kref, vfio_container_release);
307}
308
309static void vfio_group_unlock_and_free(struct vfio_group *group)
310{
311 mutex_unlock(&vfio.group_lock);
312
313
314
315
316 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
317 kfree(group);
318}
319
320
321
322
323static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
324{
325 struct vfio_group *group, *tmp;
326 struct device *dev;
327 int ret, minor;
328
329 group = kzalloc(sizeof(*group), GFP_KERNEL);
330 if (!group)
331 return ERR_PTR(-ENOMEM);
332
333 kref_init(&group->kref);
334 INIT_LIST_HEAD(&group->device_list);
335 mutex_init(&group->device_lock);
336 INIT_LIST_HEAD(&group->unbound_list);
337 mutex_init(&group->unbound_lock);
338 atomic_set(&group->container_users, 0);
339 atomic_set(&group->opened, 0);
340 init_waitqueue_head(&group->container_q);
341 group->iommu_group = iommu_group;
342#ifdef CONFIG_VFIO_NOIOMMU
343 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
344#endif
345 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
346
347 group->nb.notifier_call = vfio_iommu_group_notifier;
348
349
350
351
352
353
354
355
356 ret = iommu_group_register_notifier(iommu_group, &group->nb);
357 if (ret) {
358 kfree(group);
359 return ERR_PTR(ret);
360 }
361
362 mutex_lock(&vfio.group_lock);
363
364
365 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
366 if (tmp->iommu_group == iommu_group) {
367 vfio_group_get(tmp);
368 vfio_group_unlock_and_free(group);
369 return tmp;
370 }
371 }
372
373 minor = vfio_alloc_group_minor(group);
374 if (minor < 0) {
375 vfio_group_unlock_and_free(group);
376 return ERR_PTR(minor);
377 }
378
379 dev = device_create(vfio.class, NULL,
380 MKDEV(MAJOR(vfio.group_devt), minor),
381 group, "%s%d", group->noiommu ? "noiommu-" : "",
382 iommu_group_id(iommu_group));
383 if (IS_ERR(dev)) {
384 vfio_free_group_minor(minor);
385 vfio_group_unlock_and_free(group);
386 return ERR_CAST(dev);
387 }
388
389 group->minor = minor;
390 group->dev = dev;
391
392 list_add(&group->vfio_next, &vfio.group_list);
393
394 mutex_unlock(&vfio.group_lock);
395
396 return group;
397}
398
399
400static void vfio_group_release(struct kref *kref)
401{
402 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
403 struct vfio_unbound_dev *unbound, *tmp;
404 struct iommu_group *iommu_group = group->iommu_group;
405
406 WARN_ON(!list_empty(&group->device_list));
407 WARN_ON(group->notifier.head);
408
409 list_for_each_entry_safe(unbound, tmp,
410 &group->unbound_list, unbound_next) {
411 list_del(&unbound->unbound_next);
412 kfree(unbound);
413 }
414
415 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
416 list_del(&group->vfio_next);
417 vfio_free_group_minor(group->minor);
418 vfio_group_unlock_and_free(group);
419 iommu_group_put(iommu_group);
420}
421
422static void vfio_group_put(struct vfio_group *group)
423{
424 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
425}
426
427struct vfio_group_put_work {
428 struct work_struct work;
429 struct vfio_group *group;
430};
431
432static void vfio_group_put_bg(struct work_struct *work)
433{
434 struct vfio_group_put_work *do_work;
435
436 do_work = container_of(work, struct vfio_group_put_work, work);
437
438 vfio_group_put(do_work->group);
439 kfree(do_work);
440}
441
442static void vfio_group_schedule_put(struct vfio_group *group)
443{
444 struct vfio_group_put_work *do_work;
445
446 do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
447 if (WARN_ON(!do_work))
448 return;
449
450 INIT_WORK(&do_work->work, vfio_group_put_bg);
451 do_work->group = group;
452 schedule_work(&do_work->work);
453}
454
455
456static void vfio_group_get(struct vfio_group *group)
457{
458 kref_get(&group->kref);
459}
460
461
462
463
464
465static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
466{
467 struct vfio_group *target = group;
468
469 mutex_lock(&vfio.group_lock);
470 list_for_each_entry(group, &vfio.group_list, vfio_next) {
471 if (group == target) {
472 vfio_group_get(group);
473 mutex_unlock(&vfio.group_lock);
474 return group;
475 }
476 }
477 mutex_unlock(&vfio.group_lock);
478
479 return NULL;
480}
481
482static
483struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
484{
485 struct vfio_group *group;
486
487 mutex_lock(&vfio.group_lock);
488 list_for_each_entry(group, &vfio.group_list, vfio_next) {
489 if (group->iommu_group == iommu_group) {
490 vfio_group_get(group);
491 mutex_unlock(&vfio.group_lock);
492 return group;
493 }
494 }
495 mutex_unlock(&vfio.group_lock);
496
497 return NULL;
498}
499
500static struct vfio_group *vfio_group_get_from_minor(int minor)
501{
502 struct vfio_group *group;
503
504 mutex_lock(&vfio.group_lock);
505 group = idr_find(&vfio.group_idr, minor);
506 if (!group) {
507 mutex_unlock(&vfio.group_lock);
508 return NULL;
509 }
510 vfio_group_get(group);
511 mutex_unlock(&vfio.group_lock);
512
513 return group;
514}
515
516static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
517{
518 struct iommu_group *iommu_group;
519 struct vfio_group *group;
520
521 iommu_group = iommu_group_get(dev);
522 if (!iommu_group)
523 return NULL;
524
525 group = vfio_group_get_from_iommu(iommu_group);
526 iommu_group_put(iommu_group);
527
528 return group;
529}
530
531
532
533
534static
535struct vfio_device *vfio_group_create_device(struct vfio_group *group,
536 struct device *dev,
537 const struct vfio_device_ops *ops,
538 void *device_data)
539{
540 struct vfio_device *device;
541
542 device = kzalloc(sizeof(*device), GFP_KERNEL);
543 if (!device)
544 return ERR_PTR(-ENOMEM);
545
546 kref_init(&device->kref);
547 device->dev = dev;
548 device->group = group;
549 device->ops = ops;
550 device->device_data = device_data;
551 dev_set_drvdata(dev, device);
552
553
554 vfio_group_get(group);
555
556 mutex_lock(&group->device_lock);
557 list_add(&device->group_next, &group->device_list);
558 mutex_unlock(&group->device_lock);
559
560 return device;
561}
562
563static void vfio_device_release(struct kref *kref)
564{
565 struct vfio_device *device = container_of(kref,
566 struct vfio_device, kref);
567 struct vfio_group *group = device->group;
568
569 list_del(&device->group_next);
570 mutex_unlock(&group->device_lock);
571
572 dev_set_drvdata(device->dev, NULL);
573
574 kfree(device);
575
576
577 wake_up(&vfio.release_q);
578}
579
580
581void vfio_device_put(struct vfio_device *device)
582{
583 struct vfio_group *group = device->group;
584 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
585 vfio_group_put(group);
586}
587EXPORT_SYMBOL_GPL(vfio_device_put);
588
589static void vfio_device_get(struct vfio_device *device)
590{
591 vfio_group_get(device->group);
592 kref_get(&device->kref);
593}
594
595static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
596 struct device *dev)
597{
598 struct vfio_device *device;
599
600 mutex_lock(&group->device_lock);
601 list_for_each_entry(device, &group->device_list, group_next) {
602 if (device->dev == dev) {
603 vfio_device_get(device);
604 mutex_unlock(&group->device_lock);
605 return device;
606 }
607 }
608 mutex_unlock(&group->device_lock);
609 return NULL;
610}
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627static const char * const vfio_driver_whitelist[] = { "pci-stub" };
628
629static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
630{
631 if (dev_is_pci(dev)) {
632 struct pci_dev *pdev = to_pci_dev(dev);
633
634 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
635 return true;
636 }
637
638 return match_string(vfio_driver_whitelist,
639 ARRAY_SIZE(vfio_driver_whitelist),
640 drv->name) >= 0;
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657static int vfio_dev_viable(struct device *dev, void *data)
658{
659 struct vfio_group *group = data;
660 struct vfio_device *device;
661 struct device_driver *drv = READ_ONCE(dev->driver);
662 struct vfio_unbound_dev *unbound;
663 int ret = -EINVAL;
664
665 mutex_lock(&group->unbound_lock);
666 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
667 if (dev == unbound->dev) {
668 ret = 0;
669 break;
670 }
671 }
672 mutex_unlock(&group->unbound_lock);
673
674 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
675 return 0;
676
677 device = vfio_group_get_device(group, dev);
678 if (device) {
679 vfio_device_put(device);
680 return 0;
681 }
682
683 return ret;
684}
685
686
687
688
689static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
690{
691 struct vfio_device *device;
692
693
694 device = vfio_group_get_device(group, dev);
695 if (WARN_ON_ONCE(device)) {
696 vfio_device_put(device);
697 return 0;
698 }
699
700
701 if (!atomic_read(&group->container_users))
702 return 0;
703
704
705 dev_WARN(dev, "Device added to live group %d!\n",
706 iommu_group_id(group->iommu_group));
707
708 return 0;
709}
710
711static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
712{
713
714 if (!atomic_read(&group->container_users))
715 return 0;
716
717 return vfio_dev_viable(dev, group);
718}
719
720static int vfio_iommu_group_notifier(struct notifier_block *nb,
721 unsigned long action, void *data)
722{
723 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
724 struct device *dev = data;
725 struct vfio_unbound_dev *unbound;
726
727
728
729
730
731 group = vfio_group_try_get(group);
732 if (!group)
733 return NOTIFY_OK;
734
735 switch (action) {
736 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
737 vfio_group_nb_add_dev(group, dev);
738 break;
739 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
740
741
742
743
744
745
746
747 break;
748 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
749 dev_dbg(dev, "%s: group %d binding to driver\n", __func__,
750 iommu_group_id(group->iommu_group));
751 break;
752 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
753 dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__,
754 iommu_group_id(group->iommu_group), dev->driver->name);
755 BUG_ON(vfio_group_nb_verify(group, dev));
756 break;
757 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
758 dev_dbg(dev, "%s: group %d unbinding from driver %s\n",
759 __func__, iommu_group_id(group->iommu_group),
760 dev->driver->name);
761 break;
762 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
763 dev_dbg(dev, "%s: group %d unbound from driver\n", __func__,
764 iommu_group_id(group->iommu_group));
765
766
767
768
769
770
771
772
773 mutex_lock(&group->unbound_lock);
774 list_for_each_entry(unbound,
775 &group->unbound_list, unbound_next) {
776 if (dev == unbound->dev) {
777 list_del(&unbound->unbound_next);
778 kfree(unbound);
779 break;
780 }
781 }
782 mutex_unlock(&group->unbound_lock);
783 break;
784 }
785
786
787
788
789
790
791
792
793 vfio_group_schedule_put(group);
794 return NOTIFY_OK;
795}
796
797
798
799
800int vfio_add_group_dev(struct device *dev,
801 const struct vfio_device_ops *ops, void *device_data)
802{
803 struct iommu_group *iommu_group;
804 struct vfio_group *group;
805 struct vfio_device *device;
806
807 iommu_group = iommu_group_get(dev);
808 if (!iommu_group)
809 return -EINVAL;
810
811 group = vfio_group_get_from_iommu(iommu_group);
812 if (!group) {
813 group = vfio_create_group(iommu_group);
814 if (IS_ERR(group)) {
815 iommu_group_put(iommu_group);
816 return PTR_ERR(group);
817 }
818 } else {
819
820
821
822
823 iommu_group_put(iommu_group);
824 }
825
826 device = vfio_group_get_device(group, dev);
827 if (device) {
828 dev_WARN(dev, "Device already exists on group %d\n",
829 iommu_group_id(iommu_group));
830 vfio_device_put(device);
831 vfio_group_put(group);
832 return -EBUSY;
833 }
834
835 device = vfio_group_create_device(group, dev, ops, device_data);
836 if (IS_ERR(device)) {
837 vfio_group_put(group);
838 return PTR_ERR(device);
839 }
840
841
842
843
844
845
846 vfio_group_put(group);
847
848 return 0;
849}
850EXPORT_SYMBOL_GPL(vfio_add_group_dev);
851
852
853
854
855
856
857
858
859struct vfio_device *vfio_device_get_from_dev(struct device *dev)
860{
861 struct vfio_group *group;
862 struct vfio_device *device;
863
864 group = vfio_group_get_from_dev(dev);
865 if (!group)
866 return NULL;
867
868 device = vfio_group_get_device(group, dev);
869 vfio_group_put(group);
870
871 return device;
872}
873EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
874
875static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
876 char *buf)
877{
878 struct vfio_device *it, *device = ERR_PTR(-ENODEV);
879
880 mutex_lock(&group->device_lock);
881 list_for_each_entry(it, &group->device_list, group_next) {
882 int ret;
883
884 if (it->ops->match) {
885 ret = it->ops->match(it->device_data, buf);
886 if (ret < 0) {
887 device = ERR_PTR(ret);
888 break;
889 }
890 } else {
891 ret = !strcmp(dev_name(it->dev), buf);
892 }
893
894 if (ret) {
895 device = it;
896 vfio_device_get(device);
897 break;
898 }
899 }
900 mutex_unlock(&group->device_lock);
901
902 return device;
903}
904
905
906
907
908void *vfio_device_data(struct vfio_device *device)
909{
910 return device->device_data;
911}
912EXPORT_SYMBOL_GPL(vfio_device_data);
913
914
915
916
917void *vfio_del_group_dev(struct device *dev)
918{
919 DEFINE_WAIT_FUNC(wait, woken_wake_function);
920 struct vfio_device *device = dev_get_drvdata(dev);
921 struct vfio_group *group = device->group;
922 void *device_data = device->device_data;
923 struct vfio_unbound_dev *unbound;
924 unsigned int i = 0;
925 bool interrupted = false;
926
927
928
929
930
931 vfio_group_get(group);
932
933
934
935
936
937
938
939
940
941
942 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
943 if (unbound) {
944 unbound->dev = dev;
945 mutex_lock(&group->unbound_lock);
946 list_add(&unbound->unbound_next, &group->unbound_list);
947 mutex_unlock(&group->unbound_lock);
948 }
949 WARN_ON(!unbound);
950
951 vfio_device_put(device);
952
953
954
955
956
957
958
959
960
961 add_wait_queue(&vfio.release_q, &wait);
962
963 do {
964 device = vfio_group_get_device(group, dev);
965 if (!device)
966 break;
967
968 if (device->ops->request)
969 device->ops->request(device_data, i++);
970
971 vfio_device_put(device);
972
973 if (interrupted) {
974 wait_woken(&wait, TASK_UNINTERRUPTIBLE, HZ * 10);
975 } else {
976 wait_woken(&wait, TASK_INTERRUPTIBLE, HZ * 10);
977 if (signal_pending(current)) {
978 interrupted = true;
979 dev_warn(dev,
980 "Device is currently in use, task"
981 " \"%s\" (%d) "
982 "blocked until device is released",
983 current->comm, task_pid_nr(current));
984 }
985 }
986
987 } while (1);
988
989 remove_wait_queue(&vfio.release_q, &wait);
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004 if (list_empty(&group->device_list))
1005 wait_event(group->container_q, !group->container);
1006
1007 vfio_group_put(group);
1008
1009 return device_data;
1010}
1011EXPORT_SYMBOL_GPL(vfio_del_group_dev);
1012
1013
1014
1015
1016static long vfio_ioctl_check_extension(struct vfio_container *container,
1017 unsigned long arg)
1018{
1019 struct vfio_iommu_driver *driver;
1020 long ret = 0;
1021
1022 down_read(&container->group_lock);
1023
1024 driver = container->iommu_driver;
1025
1026 switch (arg) {
1027
1028 default:
1029
1030
1031
1032
1033
1034
1035 if (!driver) {
1036 mutex_lock(&vfio.iommu_drivers_lock);
1037 list_for_each_entry(driver, &vfio.iommu_drivers_list,
1038 vfio_next) {
1039
1040#ifdef CONFIG_VFIO_NOIOMMU
1041 if (!list_empty(&container->group_list) &&
1042 (container->noiommu !=
1043 (driver->ops == &vfio_noiommu_ops)))
1044 continue;
1045#endif
1046
1047 if (!try_module_get(driver->ops->owner))
1048 continue;
1049
1050 ret = driver->ops->ioctl(NULL,
1051 VFIO_CHECK_EXTENSION,
1052 arg);
1053 module_put(driver->ops->owner);
1054 if (ret > 0)
1055 break;
1056 }
1057 mutex_unlock(&vfio.iommu_drivers_lock);
1058 } else
1059 ret = driver->ops->ioctl(container->iommu_data,
1060 VFIO_CHECK_EXTENSION, arg);
1061 }
1062
1063 up_read(&container->group_lock);
1064
1065 return ret;
1066}
1067
1068
1069static int __vfio_container_attach_groups(struct vfio_container *container,
1070 struct vfio_iommu_driver *driver,
1071 void *data)
1072{
1073 struct vfio_group *group;
1074 int ret = -ENODEV;
1075
1076 list_for_each_entry(group, &container->group_list, container_next) {
1077 ret = driver->ops->attach_group(data, group->iommu_group);
1078 if (ret)
1079 goto unwind;
1080 }
1081
1082 return ret;
1083
1084unwind:
1085 list_for_each_entry_continue_reverse(group, &container->group_list,
1086 container_next) {
1087 driver->ops->detach_group(data, group->iommu_group);
1088 }
1089
1090 return ret;
1091}
1092
1093static long vfio_ioctl_set_iommu(struct vfio_container *container,
1094 unsigned long arg)
1095{
1096 struct vfio_iommu_driver *driver;
1097 long ret = -ENODEV;
1098
1099 down_write(&container->group_lock);
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109 if (list_empty(&container->group_list) || container->iommu_driver) {
1110 up_write(&container->group_lock);
1111 return -EINVAL;
1112 }
1113
1114 mutex_lock(&vfio.iommu_drivers_lock);
1115 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1116 void *data;
1117
1118#ifdef CONFIG_VFIO_NOIOMMU
1119
1120
1121
1122
1123 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1124 continue;
1125#endif
1126
1127 if (!try_module_get(driver->ops->owner))
1128 continue;
1129
1130
1131
1132
1133
1134
1135
1136
1137 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1138 module_put(driver->ops->owner);
1139 continue;
1140 }
1141
1142 data = driver->ops->open(arg);
1143 if (IS_ERR(data)) {
1144 ret = PTR_ERR(data);
1145 module_put(driver->ops->owner);
1146 continue;
1147 }
1148
1149 ret = __vfio_container_attach_groups(container, driver, data);
1150 if (ret) {
1151 driver->ops->release(data);
1152 module_put(driver->ops->owner);
1153 continue;
1154 }
1155
1156 container->iommu_driver = driver;
1157 container->iommu_data = data;
1158 break;
1159 }
1160
1161 mutex_unlock(&vfio.iommu_drivers_lock);
1162 up_write(&container->group_lock);
1163
1164 return ret;
1165}
1166
1167static long vfio_fops_unl_ioctl(struct file *filep,
1168 unsigned int cmd, unsigned long arg)
1169{
1170 struct vfio_container *container = filep->private_data;
1171 struct vfio_iommu_driver *driver;
1172 void *data;
1173 long ret = -EINVAL;
1174
1175 if (!container)
1176 return ret;
1177
1178 switch (cmd) {
1179 case VFIO_GET_API_VERSION:
1180 ret = VFIO_API_VERSION;
1181 break;
1182 case VFIO_CHECK_EXTENSION:
1183 ret = vfio_ioctl_check_extension(container, arg);
1184 break;
1185 case VFIO_SET_IOMMU:
1186 ret = vfio_ioctl_set_iommu(container, arg);
1187 break;
1188 default:
1189 driver = container->iommu_driver;
1190 data = container->iommu_data;
1191
1192 if (driver)
1193 ret = driver->ops->ioctl(data, cmd, arg);
1194 }
1195
1196 return ret;
1197}
1198
1199static int vfio_fops_open(struct inode *inode, struct file *filep)
1200{
1201 struct vfio_container *container;
1202
1203 container = kzalloc(sizeof(*container), GFP_KERNEL);
1204 if (!container)
1205 return -ENOMEM;
1206
1207 INIT_LIST_HEAD(&container->group_list);
1208 init_rwsem(&container->group_lock);
1209 kref_init(&container->kref);
1210
1211 filep->private_data = container;
1212
1213 return 0;
1214}
1215
1216static int vfio_fops_release(struct inode *inode, struct file *filep)
1217{
1218 struct vfio_container *container = filep->private_data;
1219
1220 filep->private_data = NULL;
1221
1222 vfio_container_put(container);
1223
1224 return 0;
1225}
1226
1227
1228
1229
1230
1231static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1232 size_t count, loff_t *ppos)
1233{
1234 struct vfio_container *container = filep->private_data;
1235 struct vfio_iommu_driver *driver;
1236 ssize_t ret = -EINVAL;
1237
1238 driver = container->iommu_driver;
1239 if (likely(driver && driver->ops->read))
1240 ret = driver->ops->read(container->iommu_data,
1241 buf, count, ppos);
1242
1243 return ret;
1244}
1245
1246static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1247 size_t count, loff_t *ppos)
1248{
1249 struct vfio_container *container = filep->private_data;
1250 struct vfio_iommu_driver *driver;
1251 ssize_t ret = -EINVAL;
1252
1253 driver = container->iommu_driver;
1254 if (likely(driver && driver->ops->write))
1255 ret = driver->ops->write(container->iommu_data,
1256 buf, count, ppos);
1257
1258 return ret;
1259}
1260
1261static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1262{
1263 struct vfio_container *container = filep->private_data;
1264 struct vfio_iommu_driver *driver;
1265 int ret = -EINVAL;
1266
1267 driver = container->iommu_driver;
1268 if (likely(driver && driver->ops->mmap))
1269 ret = driver->ops->mmap(container->iommu_data, vma);
1270
1271 return ret;
1272}
1273
1274static const struct file_operations vfio_fops = {
1275 .owner = THIS_MODULE,
1276 .open = vfio_fops_open,
1277 .release = vfio_fops_release,
1278 .read = vfio_fops_read,
1279 .write = vfio_fops_write,
1280 .unlocked_ioctl = vfio_fops_unl_ioctl,
1281 .compat_ioctl = compat_ptr_ioctl,
1282 .mmap = vfio_fops_mmap,
1283};
1284
1285
1286
1287
1288static void __vfio_group_unset_container(struct vfio_group *group)
1289{
1290 struct vfio_container *container = group->container;
1291 struct vfio_iommu_driver *driver;
1292
1293 down_write(&container->group_lock);
1294
1295 driver = container->iommu_driver;
1296 if (driver)
1297 driver->ops->detach_group(container->iommu_data,
1298 group->iommu_group);
1299
1300 group->container = NULL;
1301 wake_up(&group->container_q);
1302 list_del(&group->container_next);
1303
1304
1305 if (driver && list_empty(&container->group_list)) {
1306 driver->ops->release(container->iommu_data);
1307 module_put(driver->ops->owner);
1308 container->iommu_driver = NULL;
1309 container->iommu_data = NULL;
1310 }
1311
1312 up_write(&container->group_lock);
1313
1314 vfio_container_put(container);
1315}
1316
1317
1318
1319
1320
1321
1322
1323static int vfio_group_unset_container(struct vfio_group *group)
1324{
1325 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1326
1327 if (!users)
1328 return -EINVAL;
1329 if (users != 1)
1330 return -EBUSY;
1331
1332 __vfio_group_unset_container(group);
1333
1334 return 0;
1335}
1336
1337
1338
1339
1340
1341
1342
1343static void vfio_group_try_dissolve_container(struct vfio_group *group)
1344{
1345 if (0 == atomic_dec_if_positive(&group->container_users))
1346 __vfio_group_unset_container(group);
1347}
1348
1349static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1350{
1351 struct fd f;
1352 struct vfio_container *container;
1353 struct vfio_iommu_driver *driver;
1354 int ret = 0;
1355
1356 if (atomic_read(&group->container_users))
1357 return -EINVAL;
1358
1359 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1360 return -EPERM;
1361
1362 f = fdget(container_fd);
1363 if (!f.file)
1364 return -EBADF;
1365
1366
1367 if (f.file->f_op != &vfio_fops) {
1368 fdput(f);
1369 return -EINVAL;
1370 }
1371
1372 container = f.file->private_data;
1373 WARN_ON(!container);
1374
1375 down_write(&container->group_lock);
1376
1377
1378 if (!list_empty(&container->group_list) &&
1379 container->noiommu != group->noiommu) {
1380 ret = -EPERM;
1381 goto unlock_out;
1382 }
1383
1384 driver = container->iommu_driver;
1385 if (driver) {
1386 ret = driver->ops->attach_group(container->iommu_data,
1387 group->iommu_group);
1388 if (ret)
1389 goto unlock_out;
1390 }
1391
1392 group->container = container;
1393 container->noiommu = group->noiommu;
1394 list_add(&group->container_next, &container->group_list);
1395
1396
1397 vfio_container_get(container);
1398 atomic_inc(&group->container_users);
1399
1400unlock_out:
1401 up_write(&container->group_lock);
1402 fdput(f);
1403 return ret;
1404}
1405
1406static bool vfio_group_viable(struct vfio_group *group)
1407{
1408 return (iommu_group_for_each_dev(group->iommu_group,
1409 group, vfio_dev_viable) == 0);
1410}
1411
1412static int vfio_group_add_container_user(struct vfio_group *group)
1413{
1414 if (!atomic_inc_not_zero(&group->container_users))
1415 return -EINVAL;
1416
1417 if (group->noiommu) {
1418 atomic_dec(&group->container_users);
1419 return -EPERM;
1420 }
1421 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1422 atomic_dec(&group->container_users);
1423 return -EINVAL;
1424 }
1425
1426 return 0;
1427}
1428
1429static const struct file_operations vfio_device_fops;
1430
1431static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1432{
1433 struct vfio_device *device;
1434 struct file *filep;
1435 int ret;
1436
1437 if (0 == atomic_read(&group->container_users) ||
1438 !group->container->iommu_driver || !vfio_group_viable(group))
1439 return -EINVAL;
1440
1441 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1442 return -EPERM;
1443
1444 device = vfio_device_get_from_name(group, buf);
1445 if (IS_ERR(device))
1446 return PTR_ERR(device);
1447
1448 ret = device->ops->open(device->device_data);
1449 if (ret) {
1450 vfio_device_put(device);
1451 return ret;
1452 }
1453
1454
1455
1456
1457
1458 ret = get_unused_fd_flags(O_CLOEXEC);
1459 if (ret < 0) {
1460 device->ops->release(device->device_data);
1461 vfio_device_put(device);
1462 return ret;
1463 }
1464
1465 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1466 device, O_RDWR);
1467 if (IS_ERR(filep)) {
1468 put_unused_fd(ret);
1469 ret = PTR_ERR(filep);
1470 device->ops->release(device->device_data);
1471 vfio_device_put(device);
1472 return ret;
1473 }
1474
1475
1476
1477
1478
1479
1480 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1481
1482 atomic_inc(&group->container_users);
1483
1484 fd_install(ret, filep);
1485
1486 if (group->noiommu)
1487 dev_warn(device->dev, "vfio-noiommu device opened by user "
1488 "(%s:%d)\n", current->comm, task_pid_nr(current));
1489
1490 return ret;
1491}
1492
1493static long vfio_group_fops_unl_ioctl(struct file *filep,
1494 unsigned int cmd, unsigned long arg)
1495{
1496 struct vfio_group *group = filep->private_data;
1497 long ret = -ENOTTY;
1498
1499 switch (cmd) {
1500 case VFIO_GROUP_GET_STATUS:
1501 {
1502 struct vfio_group_status status;
1503 unsigned long minsz;
1504
1505 minsz = offsetofend(struct vfio_group_status, flags);
1506
1507 if (copy_from_user(&status, (void __user *)arg, minsz))
1508 return -EFAULT;
1509
1510 if (status.argsz < minsz)
1511 return -EINVAL;
1512
1513 status.flags = 0;
1514
1515 if (vfio_group_viable(group))
1516 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1517
1518 if (group->container)
1519 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1520
1521 if (copy_to_user((void __user *)arg, &status, minsz))
1522 return -EFAULT;
1523
1524 ret = 0;
1525 break;
1526 }
1527 case VFIO_GROUP_SET_CONTAINER:
1528 {
1529 int fd;
1530
1531 if (get_user(fd, (int __user *)arg))
1532 return -EFAULT;
1533
1534 if (fd < 0)
1535 return -EINVAL;
1536
1537 ret = vfio_group_set_container(group, fd);
1538 break;
1539 }
1540 case VFIO_GROUP_UNSET_CONTAINER:
1541 ret = vfio_group_unset_container(group);
1542 break;
1543 case VFIO_GROUP_GET_DEVICE_FD:
1544 {
1545 char *buf;
1546
1547 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1548 if (IS_ERR(buf))
1549 return PTR_ERR(buf);
1550
1551 ret = vfio_group_get_device_fd(group, buf);
1552 kfree(buf);
1553 break;
1554 }
1555 }
1556
1557 return ret;
1558}
1559
1560static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1561{
1562 struct vfio_group *group;
1563 int opened;
1564
1565 group = vfio_group_get_from_minor(iminor(inode));
1566 if (!group)
1567 return -ENODEV;
1568
1569 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1570 vfio_group_put(group);
1571 return -EPERM;
1572 }
1573
1574
1575 opened = atomic_cmpxchg(&group->opened, 0, 1);
1576 if (opened) {
1577 vfio_group_put(group);
1578 return -EBUSY;
1579 }
1580
1581
1582 if (group->container) {
1583 atomic_dec(&group->opened);
1584 vfio_group_put(group);
1585 return -EBUSY;
1586 }
1587
1588
1589 if (WARN_ON(group->notifier.head))
1590 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1591
1592 filep->private_data = group;
1593
1594 return 0;
1595}
1596
1597static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1598{
1599 struct vfio_group *group = filep->private_data;
1600
1601 filep->private_data = NULL;
1602
1603 vfio_group_try_dissolve_container(group);
1604
1605 atomic_dec(&group->opened);
1606
1607 vfio_group_put(group);
1608
1609 return 0;
1610}
1611
1612static const struct file_operations vfio_group_fops = {
1613 .owner = THIS_MODULE,
1614 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1615 .compat_ioctl = compat_ptr_ioctl,
1616 .open = vfio_group_fops_open,
1617 .release = vfio_group_fops_release,
1618};
1619
1620
1621
1622
1623static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1624{
1625 struct vfio_device *device = filep->private_data;
1626
1627 device->ops->release(device->device_data);
1628
1629 vfio_group_try_dissolve_container(device->group);
1630
1631 vfio_device_put(device);
1632
1633 return 0;
1634}
1635
1636static long vfio_device_fops_unl_ioctl(struct file *filep,
1637 unsigned int cmd, unsigned long arg)
1638{
1639 struct vfio_device *device = filep->private_data;
1640
1641 if (unlikely(!device->ops->ioctl))
1642 return -EINVAL;
1643
1644 return device->ops->ioctl(device->device_data, cmd, arg);
1645}
1646
1647static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1648 size_t count, loff_t *ppos)
1649{
1650 struct vfio_device *device = filep->private_data;
1651
1652 if (unlikely(!device->ops->read))
1653 return -EINVAL;
1654
1655 return device->ops->read(device->device_data, buf, count, ppos);
1656}
1657
1658static ssize_t vfio_device_fops_write(struct file *filep,
1659 const char __user *buf,
1660 size_t count, loff_t *ppos)
1661{
1662 struct vfio_device *device = filep->private_data;
1663
1664 if (unlikely(!device->ops->write))
1665 return -EINVAL;
1666
1667 return device->ops->write(device->device_data, buf, count, ppos);
1668}
1669
1670static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1671{
1672 struct vfio_device *device = filep->private_data;
1673
1674 if (unlikely(!device->ops->mmap))
1675 return -EINVAL;
1676
1677 return device->ops->mmap(device->device_data, vma);
1678}
1679
1680static const struct file_operations vfio_device_fops = {
1681 .owner = THIS_MODULE,
1682 .release = vfio_device_fops_release,
1683 .read = vfio_device_fops_read,
1684 .write = vfio_device_fops_write,
1685 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1686 .compat_ioctl = compat_ptr_ioctl,
1687 .mmap = vfio_device_fops_mmap,
1688};
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717struct vfio_group *vfio_group_get_external_user(struct file *filep)
1718{
1719 struct vfio_group *group = filep->private_data;
1720 int ret;
1721
1722 if (filep->f_op != &vfio_group_fops)
1723 return ERR_PTR(-EINVAL);
1724
1725 ret = vfio_group_add_container_user(group);
1726 if (ret)
1727 return ERR_PTR(ret);
1728
1729 vfio_group_get(group);
1730
1731 return group;
1732}
1733EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev)
1755{
1756 struct vfio_group *group;
1757 int ret;
1758
1759 group = vfio_group_get_from_dev(dev);
1760 if (!group)
1761 return ERR_PTR(-ENODEV);
1762
1763 ret = vfio_group_add_container_user(group);
1764 if (ret) {
1765 vfio_group_put(group);
1766 return ERR_PTR(ret);
1767 }
1768
1769 return group;
1770}
1771EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev);
1772
1773void vfio_group_put_external_user(struct vfio_group *group)
1774{
1775 vfio_group_try_dissolve_container(group);
1776 vfio_group_put(group);
1777}
1778EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1779
1780bool vfio_external_group_match_file(struct vfio_group *test_group,
1781 struct file *filep)
1782{
1783 struct vfio_group *group = filep->private_data;
1784
1785 return (filep->f_op == &vfio_group_fops) && (group == test_group);
1786}
1787EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
1788
1789int vfio_external_user_iommu_id(struct vfio_group *group)
1790{
1791 return iommu_group_id(group->iommu_group);
1792}
1793EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1794
1795long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1796{
1797 return vfio_ioctl_check_extension(group->container, arg);
1798}
1799EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1814 size_t size, u16 id, u16 version)
1815{
1816 void *buf;
1817 struct vfio_info_cap_header *header, *tmp;
1818
1819 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1820 if (!buf) {
1821 kfree(caps->buf);
1822 caps->size = 0;
1823 return ERR_PTR(-ENOMEM);
1824 }
1825
1826 caps->buf = buf;
1827 header = buf + caps->size;
1828
1829
1830 memset(header, 0, size);
1831
1832 header->id = id;
1833 header->version = version;
1834
1835
1836 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1837 ;
1838
1839 tmp->next = caps->size;
1840 caps->size += size;
1841
1842 return header;
1843}
1844EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1845
1846void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1847{
1848 struct vfio_info_cap_header *tmp;
1849 void *buf = (void *)caps->buf;
1850
1851 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1852 tmp->next += offset;
1853}
1854EXPORT_SYMBOL(vfio_info_cap_shift);
1855
1856int vfio_info_add_capability(struct vfio_info_cap *caps,
1857 struct vfio_info_cap_header *cap, size_t size)
1858{
1859 struct vfio_info_cap_header *header;
1860
1861 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1862 if (IS_ERR(header))
1863 return PTR_ERR(header);
1864
1865 memcpy(header + 1, cap + 1, size - sizeof(*header));
1866
1867 return 0;
1868}
1869EXPORT_SYMBOL(vfio_info_add_capability);
1870
1871int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1872 int max_irq_type, size_t *data_size)
1873{
1874 unsigned long minsz;
1875 size_t size;
1876
1877 minsz = offsetofend(struct vfio_irq_set, count);
1878
1879 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1880 (hdr->count >= (U32_MAX - hdr->start)) ||
1881 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1882 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1883 return -EINVAL;
1884
1885 if (data_size)
1886 *data_size = 0;
1887
1888 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1889 return -EINVAL;
1890
1891 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1892 case VFIO_IRQ_SET_DATA_NONE:
1893 size = 0;
1894 break;
1895 case VFIO_IRQ_SET_DATA_BOOL:
1896 size = sizeof(uint8_t);
1897 break;
1898 case VFIO_IRQ_SET_DATA_EVENTFD:
1899 size = sizeof(int32_t);
1900 break;
1901 default:
1902 return -EINVAL;
1903 }
1904
1905 if (size) {
1906 if (hdr->argsz - minsz < hdr->count * size)
1907 return -EINVAL;
1908
1909 if (!data_size)
1910 return -EINVAL;
1911
1912 *data_size = hdr->count * size;
1913 }
1914
1915 return 0;
1916}
1917EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1931 int prot, unsigned long *phys_pfn)
1932{
1933 struct vfio_container *container;
1934 struct vfio_group *group;
1935 struct vfio_iommu_driver *driver;
1936 int ret;
1937
1938 if (!dev || !user_pfn || !phys_pfn || !npage)
1939 return -EINVAL;
1940
1941 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1942 return -E2BIG;
1943
1944 group = vfio_group_get_from_dev(dev);
1945 if (!group)
1946 return -ENODEV;
1947
1948 ret = vfio_group_add_container_user(group);
1949 if (ret)
1950 goto err_pin_pages;
1951
1952 container = group->container;
1953 driver = container->iommu_driver;
1954 if (likely(driver && driver->ops->pin_pages))
1955 ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1956 npage, prot, phys_pfn);
1957 else
1958 ret = -ENOTTY;
1959
1960 vfio_group_try_dissolve_container(group);
1961
1962err_pin_pages:
1963 vfio_group_put(group);
1964 return ret;
1965}
1966EXPORT_SYMBOL(vfio_pin_pages);
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1978{
1979 struct vfio_container *container;
1980 struct vfio_group *group;
1981 struct vfio_iommu_driver *driver;
1982 int ret;
1983
1984 if (!dev || !user_pfn || !npage)
1985 return -EINVAL;
1986
1987 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1988 return -E2BIG;
1989
1990 group = vfio_group_get_from_dev(dev);
1991 if (!group)
1992 return -ENODEV;
1993
1994 ret = vfio_group_add_container_user(group);
1995 if (ret)
1996 goto err_unpin_pages;
1997
1998 container = group->container;
1999 driver = container->iommu_driver;
2000 if (likely(driver && driver->ops->unpin_pages))
2001 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
2002 npage);
2003 else
2004 ret = -ENOTTY;
2005
2006 vfio_group_try_dissolve_container(group);
2007
2008err_unpin_pages:
2009 vfio_group_put(group);
2010 return ret;
2011}
2012EXPORT_SYMBOL(vfio_unpin_pages);
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035int vfio_group_pin_pages(struct vfio_group *group,
2036 unsigned long *user_iova_pfn, int npage,
2037 int prot, unsigned long *phys_pfn)
2038{
2039 struct vfio_container *container;
2040 struct vfio_iommu_driver *driver;
2041 int ret;
2042
2043 if (!group || !user_iova_pfn || !phys_pfn || !npage)
2044 return -EINVAL;
2045
2046 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
2047 return -E2BIG;
2048
2049 container = group->container;
2050 driver = container->iommu_driver;
2051 if (likely(driver && driver->ops->pin_pages))
2052 ret = driver->ops->pin_pages(container->iommu_data,
2053 user_iova_pfn, npage,
2054 prot, phys_pfn);
2055 else
2056 ret = -ENOTTY;
2057
2058 return ret;
2059}
2060EXPORT_SYMBOL(vfio_group_pin_pages);
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080int vfio_group_unpin_pages(struct vfio_group *group,
2081 unsigned long *user_iova_pfn, int npage)
2082{
2083 struct vfio_container *container;
2084 struct vfio_iommu_driver *driver;
2085 int ret;
2086
2087 if (!group || !user_iova_pfn || !npage)
2088 return -EINVAL;
2089
2090 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
2091 return -E2BIG;
2092
2093 container = group->container;
2094 driver = container->iommu_driver;
2095 if (likely(driver && driver->ops->unpin_pages))
2096 ret = driver->ops->unpin_pages(container->iommu_data,
2097 user_iova_pfn, npage);
2098 else
2099 ret = -ENOTTY;
2100
2101 return ret;
2102}
2103EXPORT_SYMBOL(vfio_group_unpin_pages);
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
2132 void *data, size_t len, bool write)
2133{
2134 struct vfio_container *container;
2135 struct vfio_iommu_driver *driver;
2136 int ret = 0;
2137
2138 if (!group || !data || len <= 0)
2139 return -EINVAL;
2140
2141 container = group->container;
2142 driver = container->iommu_driver;
2143
2144 if (likely(driver && driver->ops->dma_rw))
2145 ret = driver->ops->dma_rw(container->iommu_data,
2146 user_iova, data, len, write);
2147 else
2148 ret = -ENOTTY;
2149
2150 return ret;
2151}
2152EXPORT_SYMBOL(vfio_dma_rw);
2153
2154static int vfio_register_iommu_notifier(struct vfio_group *group,
2155 unsigned long *events,
2156 struct notifier_block *nb)
2157{
2158 struct vfio_container *container;
2159 struct vfio_iommu_driver *driver;
2160 int ret;
2161
2162 ret = vfio_group_add_container_user(group);
2163 if (ret)
2164 return -EINVAL;
2165
2166 container = group->container;
2167 driver = container->iommu_driver;
2168 if (likely(driver && driver->ops->register_notifier))
2169 ret = driver->ops->register_notifier(container->iommu_data,
2170 events, nb);
2171 else
2172 ret = -ENOTTY;
2173
2174 vfio_group_try_dissolve_container(group);
2175
2176 return ret;
2177}
2178
2179static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2180 struct notifier_block *nb)
2181{
2182 struct vfio_container *container;
2183 struct vfio_iommu_driver *driver;
2184 int ret;
2185
2186 ret = vfio_group_add_container_user(group);
2187 if (ret)
2188 return -EINVAL;
2189
2190 container = group->container;
2191 driver = container->iommu_driver;
2192 if (likely(driver && driver->ops->unregister_notifier))
2193 ret = driver->ops->unregister_notifier(container->iommu_data,
2194 nb);
2195 else
2196 ret = -ENOTTY;
2197
2198 vfio_group_try_dissolve_container(group);
2199
2200 return ret;
2201}
2202
2203void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2204{
2205 group->kvm = kvm;
2206 blocking_notifier_call_chain(&group->notifier,
2207 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2208}
2209EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2210
2211static int vfio_register_group_notifier(struct vfio_group *group,
2212 unsigned long *events,
2213 struct notifier_block *nb)
2214{
2215 int ret;
2216 bool set_kvm = false;
2217
2218 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2219 set_kvm = true;
2220
2221
2222 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2223
2224
2225 if (*events)
2226 return -EINVAL;
2227
2228 ret = vfio_group_add_container_user(group);
2229 if (ret)
2230 return -EINVAL;
2231
2232 ret = blocking_notifier_chain_register(&group->notifier, nb);
2233
2234
2235
2236
2237
2238 if (!ret && set_kvm && group->kvm)
2239 blocking_notifier_call_chain(&group->notifier,
2240 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2241
2242 vfio_group_try_dissolve_container(group);
2243
2244 return ret;
2245}
2246
2247static int vfio_unregister_group_notifier(struct vfio_group *group,
2248 struct notifier_block *nb)
2249{
2250 int ret;
2251
2252 ret = vfio_group_add_container_user(group);
2253 if (ret)
2254 return -EINVAL;
2255
2256 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2257
2258 vfio_group_try_dissolve_container(group);
2259
2260 return ret;
2261}
2262
2263int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2264 unsigned long *events, struct notifier_block *nb)
2265{
2266 struct vfio_group *group;
2267 int ret;
2268
2269 if (!dev || !nb || !events || (*events == 0))
2270 return -EINVAL;
2271
2272 group = vfio_group_get_from_dev(dev);
2273 if (!group)
2274 return -ENODEV;
2275
2276 switch (type) {
2277 case VFIO_IOMMU_NOTIFY:
2278 ret = vfio_register_iommu_notifier(group, events, nb);
2279 break;
2280 case VFIO_GROUP_NOTIFY:
2281 ret = vfio_register_group_notifier(group, events, nb);
2282 break;
2283 default:
2284 ret = -EINVAL;
2285 }
2286
2287 vfio_group_put(group);
2288 return ret;
2289}
2290EXPORT_SYMBOL(vfio_register_notifier);
2291
2292int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2293 struct notifier_block *nb)
2294{
2295 struct vfio_group *group;
2296 int ret;
2297
2298 if (!dev || !nb)
2299 return -EINVAL;
2300
2301 group = vfio_group_get_from_dev(dev);
2302 if (!group)
2303 return -ENODEV;
2304
2305 switch (type) {
2306 case VFIO_IOMMU_NOTIFY:
2307 ret = vfio_unregister_iommu_notifier(group, nb);
2308 break;
2309 case VFIO_GROUP_NOTIFY:
2310 ret = vfio_unregister_group_notifier(group, nb);
2311 break;
2312 default:
2313 ret = -EINVAL;
2314 }
2315
2316 vfio_group_put(group);
2317 return ret;
2318}
2319EXPORT_SYMBOL(vfio_unregister_notifier);
2320
2321
2322
2323
2324static char *vfio_devnode(struct device *dev, umode_t *mode)
2325{
2326 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2327}
2328
2329static struct miscdevice vfio_dev = {
2330 .minor = VFIO_MINOR,
2331 .name = "vfio",
2332 .fops = &vfio_fops,
2333 .nodename = "vfio/vfio",
2334 .mode = S_IRUGO | S_IWUGO,
2335};
2336
2337static int __init vfio_init(void)
2338{
2339 int ret;
2340
2341 idr_init(&vfio.group_idr);
2342 mutex_init(&vfio.group_lock);
2343 mutex_init(&vfio.iommu_drivers_lock);
2344 INIT_LIST_HEAD(&vfio.group_list);
2345 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2346 init_waitqueue_head(&vfio.release_q);
2347
2348 ret = misc_register(&vfio_dev);
2349 if (ret) {
2350 pr_err("vfio: misc device register failed\n");
2351 return ret;
2352 }
2353
2354
2355 vfio.class = class_create(THIS_MODULE, "vfio");
2356 if (IS_ERR(vfio.class)) {
2357 ret = PTR_ERR(vfio.class);
2358 goto err_class;
2359 }
2360
2361 vfio.class->devnode = vfio_devnode;
2362
2363 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2364 if (ret)
2365 goto err_alloc_chrdev;
2366
2367 cdev_init(&vfio.group_cdev, &vfio_group_fops);
2368 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1);
2369 if (ret)
2370 goto err_cdev_add;
2371
2372 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2373
2374#ifdef CONFIG_VFIO_NOIOMMU
2375 vfio_register_iommu_driver(&vfio_noiommu_ops);
2376#endif
2377 return 0;
2378
2379err_cdev_add:
2380 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2381err_alloc_chrdev:
2382 class_destroy(vfio.class);
2383 vfio.class = NULL;
2384err_class:
2385 misc_deregister(&vfio_dev);
2386 return ret;
2387}
2388
2389static void __exit vfio_cleanup(void)
2390{
2391 WARN_ON(!list_empty(&vfio.group_list));
2392
2393#ifdef CONFIG_VFIO_NOIOMMU
2394 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2395#endif
2396 idr_destroy(&vfio.group_idr);
2397 cdev_del(&vfio.group_cdev);
2398 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2399 class_destroy(vfio.class);
2400 vfio.class = NULL;
2401 misc_deregister(&vfio_dev);
2402}
2403
2404module_init(vfio_init);
2405module_exit(vfio_cleanup);
2406
2407MODULE_VERSION(DRIVER_VERSION);
2408MODULE_LICENSE("GPL v2");
2409MODULE_AUTHOR(DRIVER_AUTHOR);
2410MODULE_DESCRIPTION(DRIVER_DESC);
2411MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2412MODULE_ALIAS("devname:vfio/vfio");
2413MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
2414