1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "qemu/osdep.h"
18#include "qapi/error.h"
19#include <sys/ioctl.h>
20#include <linux/vfio.h>
21
22#include "hw/vfio/vfio-platform.h"
23#include "qemu/error-report.h"
24#include "qemu/range.h"
25#include "sysemu/sysemu.h"
26#include "exec/memory.h"
27#include "qemu/queue.h"
28#include "hw/sysbus.h"
29#include "trace.h"
30#include "hw/platform-bus.h"
31#include "sysemu/kvm.h"
32
33
34
35
36
37static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
38{
39 return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
40}
41
42
43
44
45
46
47
48
49static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
50 struct vfio_irq_info info, Error **errp)
51{
52 int ret;
53 VFIOPlatformDevice *vdev =
54 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
55 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
56 VFIOINTp *intp;
57
58 intp = g_malloc0(sizeof(*intp));
59 intp->vdev = vdev;
60 intp->pin = info.index;
61 intp->flags = info.flags;
62 intp->state = VFIO_IRQ_INACTIVE;
63 intp->kvm_accel = false;
64
65 sysbus_init_irq(sbdev, &intp->qemuirq);
66
67
68 intp->interrupt = g_malloc0(sizeof(EventNotifier));
69 ret = event_notifier_init(intp->interrupt, 0);
70 if (ret) {
71 g_free(intp->interrupt);
72 g_free(intp);
73 error_setg_errno(errp, -ret,
74 "failed to initialize trigger eventd notifier");
75 return NULL;
76 }
77 if (vfio_irq_is_automasked(intp)) {
78
79 intp->unmask = g_malloc0(sizeof(EventNotifier));
80 ret = event_notifier_init(intp->unmask, 0);
81 if (ret) {
82 g_free(intp->interrupt);
83 g_free(intp->unmask);
84 g_free(intp);
85 error_setg_errno(errp, -ret,
86 "failed to initialize resample eventd notifier");
87 return NULL;
88 }
89 }
90
91 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
92 return intp;
93}
94
95
96
97
98
99
100
101
102
103
104static int vfio_set_trigger_eventfd(VFIOINTp *intp,
105 eventfd_user_side_handler_t handler)
106{
107 VFIODevice *vbasedev = &intp->vdev->vbasedev;
108 struct vfio_irq_set *irq_set;
109 int argsz, ret;
110 int32_t *pfd;
111
112 argsz = sizeof(*irq_set) + sizeof(*pfd);
113 irq_set = g_malloc0(argsz);
114 irq_set->argsz = argsz;
115 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
116 irq_set->index = intp->pin;
117 irq_set->start = 0;
118 irq_set->count = 1;
119 pfd = (int32_t *)&irq_set->data;
120 *pfd = event_notifier_get_fd(intp->interrupt);
121 qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
122 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
123 if (ret < 0) {
124 error_report("vfio: Failed to set trigger eventfd: %m");
125 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
126 }
127 g_free(irq_set);
128 return ret;
129}
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
147{
148 int i;
149
150 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
151 vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
152 }
153}
154
155
156
157
158
159
160
161
162
163
164
165static void vfio_intp_mmap_enable(void *opaque)
166{
167 VFIOINTp *tmp;
168 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
169
170 qemu_mutex_lock(&vdev->intp_mutex);
171 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
172 if (tmp->state == VFIO_IRQ_ACTIVE) {
173 trace_vfio_platform_intp_mmap_enable(tmp->pin);
174
175 timer_mod(vdev->mmap_timer,
176 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
177 vdev->mmap_timeout);
178 qemu_mutex_unlock(&vdev->intp_mutex);
179 return;
180 }
181 }
182 vfio_mmap_set_enabled(vdev, true);
183 qemu_mutex_unlock(&vdev->intp_mutex);
184}
185
186
187
188
189
190
191
192
193
194
195static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
196{
197 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
198 event_notifier_get_fd(intp->interrupt));
199
200 intp->state = VFIO_IRQ_ACTIVE;
201
202
203 qemu_set_irq(intp->qemuirq, 1);
204}
205
206
207
208
209
210
211
212
213
214static void vfio_intp_interrupt(VFIOINTp *intp)
215{
216 int ret;
217 VFIOINTp *tmp;
218 VFIOPlatformDevice *vdev = intp->vdev;
219 bool delay_handling = false;
220
221 qemu_mutex_lock(&vdev->intp_mutex);
222 if (intp->state == VFIO_IRQ_INACTIVE) {
223 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
224 if (tmp->state == VFIO_IRQ_ACTIVE ||
225 tmp->state == VFIO_IRQ_PENDING) {
226 delay_handling = true;
227 break;
228 }
229 }
230 }
231 if (delay_handling) {
232
233
234
235
236 intp->state = VFIO_IRQ_PENDING;
237 trace_vfio_intp_interrupt_set_pending(intp->pin);
238 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
239 intp, pqnext);
240 ret = event_notifier_test_and_clear(intp->interrupt);
241 qemu_mutex_unlock(&vdev->intp_mutex);
242 return;
243 }
244
245 trace_vfio_platform_intp_interrupt(intp->pin,
246 event_notifier_get_fd(intp->interrupt));
247
248 ret = event_notifier_test_and_clear(intp->interrupt);
249 if (!ret) {
250 error_report("Error when clearing fd=%d (ret = %d)",
251 event_notifier_get_fd(intp->interrupt), ret);
252 }
253
254 intp->state = VFIO_IRQ_ACTIVE;
255
256
257 vfio_mmap_set_enabled(vdev, false);
258
259
260 qemu_set_irq(intp->qemuirq, 1);
261
262
263
264
265
266 if (vdev->mmap_timeout) {
267 timer_mod(vdev->mmap_timer,
268 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
269 vdev->mmap_timeout);
270 }
271 qemu_mutex_unlock(&vdev->intp_mutex);
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289static void vfio_platform_eoi(VFIODevice *vbasedev)
290{
291 VFIOINTp *intp;
292 VFIOPlatformDevice *vdev =
293 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
294
295 qemu_mutex_lock(&vdev->intp_mutex);
296 QLIST_FOREACH(intp, &vdev->intp_list, next) {
297 if (intp->state == VFIO_IRQ_ACTIVE) {
298 trace_vfio_platform_eoi(intp->pin,
299 event_notifier_get_fd(intp->interrupt));
300 intp->state = VFIO_IRQ_INACTIVE;
301
302
303 qemu_set_irq(intp->qemuirq, 0);
304
305 if (vfio_irq_is_automasked(intp)) {
306
307 vfio_unmask_single_irqindex(vbasedev, intp->pin);
308 }
309
310
311 break;
312 }
313 }
314
315 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
316 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
317 vfio_intp_inject_pending_lockheld(intp);
318 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
319 }
320 qemu_mutex_unlock(&vdev->intp_mutex);
321}
322
323
324
325
326
327
328
329
330static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
331{
332 int ret;
333 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
334 VFIOINTp *intp;
335
336 QLIST_FOREACH(intp, &vdev->intp_list, next) {
337 if (intp->qemuirq == irq) {
338 break;
339 }
340 }
341 assert(intp);
342
343 ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
344 if (ret) {
345 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
346 intp->pin);
347 abort();
348 }
349}
350
351
352
353
354
355
356
357
358
359
360
361static int vfio_set_resample_eventfd(VFIOINTp *intp)
362{
363 VFIODevice *vbasedev = &intp->vdev->vbasedev;
364 struct vfio_irq_set *irq_set;
365 int argsz, ret;
366 int32_t *pfd;
367
368 argsz = sizeof(*irq_set) + sizeof(*pfd);
369 irq_set = g_malloc0(argsz);
370 irq_set->argsz = argsz;
371 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
372 irq_set->index = intp->pin;
373 irq_set->start = 0;
374 irq_set->count = 1;
375 pfd = (int32_t *)&irq_set->data;
376 *pfd = event_notifier_get_fd(intp->unmask);
377 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
378 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
379 g_free(irq_set);
380 if (ret < 0) {
381 error_report("vfio: Failed to set resample eventfd: %m");
382 }
383 return ret;
384}
385
386
387
388
389
390
391
392
393
394
395static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
396{
397 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
398 VFIOINTp *intp;
399
400 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
401 !vdev->irqfd_allowed) {
402 goto fail_irqfd;
403 }
404
405 QLIST_FOREACH(intp, &vdev->intp_list, next) {
406 if (intp->qemuirq == irq) {
407 break;
408 }
409 }
410 assert(intp);
411
412 if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
413 intp->unmask, irq) < 0) {
414 goto fail_irqfd;
415 }
416
417 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
418 goto fail_vfio;
419 }
420 if (vfio_irq_is_automasked(intp)) {
421 if (vfio_set_resample_eventfd(intp) < 0) {
422 goto fail_vfio;
423 }
424 trace_vfio_platform_start_level_irqfd_injection(intp->pin,
425 event_notifier_get_fd(intp->interrupt),
426 event_notifier_get_fd(intp->unmask));
427 } else {
428 trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
429 event_notifier_get_fd(intp->interrupt));
430 }
431
432 intp->kvm_accel = true;
433
434 return;
435fail_vfio:
436 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
437 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
438 intp->pin);
439 abort();
440fail_irqfd:
441 vfio_start_eventfd_injection(sbdev, irq);
442 return;
443}
444
445
446
447static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
448{
449 vbasedev->needs_reset = true;
450}
451
452
453static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
454{
455 return -1;
456}
457
458
459
460
461
462
463
464
465static int vfio_populate_device(VFIODevice *vbasedev, Error **errp)
466{
467 VFIOINTp *intp, *tmp;
468 int i, ret = -1;
469 VFIOPlatformDevice *vdev =
470 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
471
472 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
473 error_setg(errp, "this isn't a platform device");
474 return ret;
475 }
476
477 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
478
479 for (i = 0; i < vbasedev->num_regions; i++) {
480 char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
481
482 vdev->regions[i] = g_new0(VFIORegion, 1);
483 ret = vfio_region_setup(OBJECT(vdev), vbasedev,
484 vdev->regions[i], i, name);
485 g_free(name);
486 if (ret) {
487 error_setg_errno(errp, -ret, "failed to get region %d info", i);
488 goto reg_error;
489 }
490 }
491
492 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
493 vfio_intp_mmap_enable, vdev);
494
495 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
496
497 for (i = 0; i < vbasedev->num_irqs; i++) {
498 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
499
500 irq.index = i;
501 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
502 if (ret) {
503 error_setg_errno(errp, -ret, "failed to get device irq info");
504 goto irq_err;
505 } else {
506 trace_vfio_platform_populate_interrupts(irq.index,
507 irq.count,
508 irq.flags);
509 intp = vfio_init_intp(vbasedev, irq, errp);
510 if (!intp) {
511 ret = -1;
512 goto irq_err;
513 }
514 }
515 }
516 return 0;
517irq_err:
518 timer_del(vdev->mmap_timer);
519 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
520 QLIST_REMOVE(intp, next);
521 g_free(intp);
522 }
523reg_error:
524 for (i = 0; i < vbasedev->num_regions; i++) {
525 if (vdev->regions[i]) {
526 vfio_region_finalize(vdev->regions[i]);
527 }
528 g_free(vdev->regions[i]);
529 }
530 g_free(vdev->regions);
531 return ret;
532}
533
534
535static VFIODeviceOps vfio_platform_ops = {
536 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
537 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
538 .vfio_eoi = vfio_platform_eoi,
539};
540
541
542
543
544
545
546
547
548
549
550
551static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
552{
553 VFIOGroup *group;
554 VFIODevice *vbasedev_iter;
555 char *tmp, group_path[PATH_MAX], *group_name;
556 ssize_t len;
557 struct stat st;
558 int groupid;
559 int ret;
560
561
562 if (vbasedev->sysfsdev) {
563 g_free(vbasedev->name);
564 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
565 } else {
566 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
567 error_setg(errp, "wrong host device name");
568 return -EINVAL;
569 }
570
571 vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
572 vbasedev->name);
573 }
574
575 if (stat(vbasedev->sysfsdev, &st) < 0) {
576 error_setg_errno(errp, errno,
577 "failed to get the sysfs host device file status");
578 return -errno;
579 }
580
581 tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
582 len = readlink(tmp, group_path, sizeof(group_path));
583 g_free(tmp);
584
585 if (len < 0 || len >= sizeof(group_path)) {
586 ret = len < 0 ? -errno : -ENAMETOOLONG;
587 error_setg_errno(errp, -ret, "no iommu_group found");
588 return ret;
589 }
590
591 group_path[len] = 0;
592
593 group_name = basename(group_path);
594 if (sscanf(group_name, "%d", &groupid) != 1) {
595 error_setg_errno(errp, errno, "failed to read %s", group_path);
596 return -errno;
597 }
598
599 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
600
601 group = vfio_get_group(groupid, &address_space_memory, errp);
602 if (!group) {
603 return -ENOENT;
604 }
605
606 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
607 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
608 error_setg(errp, "device is already attached");
609 vfio_put_group(group);
610 return -EBUSY;
611 }
612 }
613 ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
614 if (ret) {
615 vfio_put_group(group);
616 return ret;
617 }
618
619 ret = vfio_populate_device(vbasedev, errp);
620 if (ret) {
621 vfio_put_group(group);
622 }
623
624 return ret;
625}
626
627
628
629
630
631
632
633
634
635static void vfio_platform_realize(DeviceState *dev, Error **errp)
636{
637 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
638 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
639 VFIODevice *vbasedev = &vdev->vbasedev;
640 int i, ret;
641
642 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
643 vbasedev->dev = dev;
644 vbasedev->ops = &vfio_platform_ops;
645
646 qemu_mutex_init(&vdev->intp_mutex);
647
648 trace_vfio_platform_realize(vbasedev->sysfsdev ?
649 vbasedev->sysfsdev : vbasedev->name,
650 vdev->compat);
651
652 ret = vfio_base_device_init(vbasedev, errp);
653 if (ret) {
654 goto out;
655 }
656
657 for (i = 0; i < vbasedev->num_regions; i++) {
658 if (vfio_region_mmap(vdev->regions[i])) {
659 error_report("%s mmap unsupported. Performance may be slow",
660 memory_region_name(vdev->regions[i]->mem));
661 }
662 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
663 }
664out:
665 if (!ret) {
666 return;
667 }
668
669 if (vdev->vbasedev.name) {
670 error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
671 } else {
672 error_prepend(errp, "vfio error: ");
673 }
674}
675
676static const VMStateDescription vfio_platform_vmstate = {
677 .name = TYPE_VFIO_PLATFORM,
678 .unmigratable = 1,
679};
680
681static Property vfio_platform_dev_properties[] = {
682 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
683 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
684 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
685 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
686 mmap_timeout, 1100),
687 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
688 DEFINE_PROP_END_OF_LIST(),
689};
690
691static void vfio_platform_class_init(ObjectClass *klass, void *data)
692{
693 DeviceClass *dc = DEVICE_CLASS(klass);
694 SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
695
696 dc->realize = vfio_platform_realize;
697 dc->props = vfio_platform_dev_properties;
698 dc->vmsd = &vfio_platform_vmstate;
699 dc->desc = "VFIO-based platform device assignment";
700 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
701 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
702}
703
704static const TypeInfo vfio_platform_dev_info = {
705 .name = TYPE_VFIO_PLATFORM,
706 .parent = TYPE_SYS_BUS_DEVICE,
707 .instance_size = sizeof(VFIOPlatformDevice),
708 .class_init = vfio_platform_class_init,
709 .class_size = sizeof(VFIOPlatformDeviceClass),
710 .abstract = true,
711};
712
713static void register_vfio_platform_dev_type(void)
714{
715 type_register_static(&vfio_platform_dev_info);
716}
717
718type_init(register_vfio_platform_dev_type)
719