1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "qemu/osdep.h"
18#include "qapi/error.h"
19#include <sys/ioctl.h>
20#include <linux/vfio.h>
21
22#include "hw/vfio/vfio-platform.h"
23#include "migration/vmstate.h"
24#include "qemu/error-report.h"
25#include "qemu/main-loop.h"
26#include "qemu/module.h"
27#include "qemu/range.h"
28#include "exec/memory.h"
29#include "exec/address-spaces.h"
30#include "qemu/queue.h"
31#include "hw/sysbus.h"
32#include "trace.h"
33#include "hw/irq.h"
34#include "hw/platform-bus.h"
35#include "hw/qdev-properties.h"
36#include "sysemu/kvm.h"
37
38
39
40
41
42static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
43{
44 return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
45}
46
47
48
49
50
51
52
53
54static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
55 struct vfio_irq_info info, Error **errp)
56{
57 int ret;
58 VFIOPlatformDevice *vdev =
59 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
60 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
61 VFIOINTp *intp;
62
63 intp = g_malloc0(sizeof(*intp));
64 intp->vdev = vdev;
65 intp->pin = info.index;
66 intp->flags = info.flags;
67 intp->state = VFIO_IRQ_INACTIVE;
68 intp->kvm_accel = false;
69
70 sysbus_init_irq(sbdev, &intp->qemuirq);
71
72
73 intp->interrupt = g_malloc0(sizeof(EventNotifier));
74 ret = event_notifier_init(intp->interrupt, 0);
75 if (ret) {
76 g_free(intp->interrupt);
77 g_free(intp);
78 error_setg_errno(errp, -ret,
79 "failed to initialize trigger eventfd notifier");
80 return NULL;
81 }
82 if (vfio_irq_is_automasked(intp)) {
83
84 intp->unmask = g_malloc0(sizeof(EventNotifier));
85 ret = event_notifier_init(intp->unmask, 0);
86 if (ret) {
87 g_free(intp->interrupt);
88 g_free(intp->unmask);
89 g_free(intp);
90 error_setg_errno(errp, -ret,
91 "failed to initialize resample eventfd notifier");
92 return NULL;
93 }
94 }
95
96 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
97 return intp;
98}
99
100
101
102
103
104
105
106
107
108
109static int vfio_set_trigger_eventfd(VFIOINTp *intp,
110 eventfd_user_side_handler_t handler)
111{
112 VFIODevice *vbasedev = &intp->vdev->vbasedev;
113 int32_t fd = event_notifier_get_fd(intp->interrupt);
114 Error *err = NULL;
115 int ret;
116
117 qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
118
119 ret = vfio_set_irq_signaling(vbasedev, intp->pin, 0,
120 VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err);
121 if (ret) {
122 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
123 qemu_set_fd_handler(fd, NULL, NULL, NULL);
124 }
125
126 return ret;
127}
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
145{
146 int i;
147
148 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
149 vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
150 }
151}
152
153
154
155
156
157
158
159
160
161
162
163static void vfio_intp_mmap_enable(void *opaque)
164{
165 VFIOINTp *tmp;
166 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
167
168 qemu_mutex_lock(&vdev->intp_mutex);
169 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
170 if (tmp->state == VFIO_IRQ_ACTIVE) {
171 trace_vfio_platform_intp_mmap_enable(tmp->pin);
172
173 timer_mod(vdev->mmap_timer,
174 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
175 vdev->mmap_timeout);
176 qemu_mutex_unlock(&vdev->intp_mutex);
177 return;
178 }
179 }
180 vfio_mmap_set_enabled(vdev, true);
181 qemu_mutex_unlock(&vdev->intp_mutex);
182}
183
184
185
186
187
188
189
190
191
192
193static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
194{
195 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
196 event_notifier_get_fd(intp->interrupt));
197
198 intp->state = VFIO_IRQ_ACTIVE;
199
200
201 qemu_set_irq(intp->qemuirq, 1);
202}
203
204
205
206
207
208
209
210
211
212static void vfio_intp_interrupt(VFIOINTp *intp)
213{
214 int ret;
215 VFIOINTp *tmp;
216 VFIOPlatformDevice *vdev = intp->vdev;
217 bool delay_handling = false;
218
219 qemu_mutex_lock(&vdev->intp_mutex);
220 if (intp->state == VFIO_IRQ_INACTIVE) {
221 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
222 if (tmp->state == VFIO_IRQ_ACTIVE ||
223 tmp->state == VFIO_IRQ_PENDING) {
224 delay_handling = true;
225 break;
226 }
227 }
228 }
229 if (delay_handling) {
230
231
232
233
234 intp->state = VFIO_IRQ_PENDING;
235 trace_vfio_intp_interrupt_set_pending(intp->pin);
236 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
237 intp, pqnext);
238 ret = event_notifier_test_and_clear(intp->interrupt);
239 qemu_mutex_unlock(&vdev->intp_mutex);
240 return;
241 }
242
243 trace_vfio_platform_intp_interrupt(intp->pin,
244 event_notifier_get_fd(intp->interrupt));
245
246 ret = event_notifier_test_and_clear(intp->interrupt);
247 if (!ret) {
248 error_report("Error when clearing fd=%d (ret = %d)",
249 event_notifier_get_fd(intp->interrupt), ret);
250 }
251
252 intp->state = VFIO_IRQ_ACTIVE;
253
254
255 vfio_mmap_set_enabled(vdev, false);
256
257
258 qemu_set_irq(intp->qemuirq, 1);
259
260
261
262
263
264 if (vdev->mmap_timeout) {
265 timer_mod(vdev->mmap_timer,
266 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
267 vdev->mmap_timeout);
268 }
269 qemu_mutex_unlock(&vdev->intp_mutex);
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287static void vfio_platform_eoi(VFIODevice *vbasedev)
288{
289 VFIOINTp *intp;
290 VFIOPlatformDevice *vdev =
291 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
292
293 qemu_mutex_lock(&vdev->intp_mutex);
294 QLIST_FOREACH(intp, &vdev->intp_list, next) {
295 if (intp->state == VFIO_IRQ_ACTIVE) {
296 trace_vfio_platform_eoi(intp->pin,
297 event_notifier_get_fd(intp->interrupt));
298 intp->state = VFIO_IRQ_INACTIVE;
299
300
301 qemu_set_irq(intp->qemuirq, 0);
302
303 if (vfio_irq_is_automasked(intp)) {
304
305 vfio_unmask_single_irqindex(vbasedev, intp->pin);
306 }
307
308
309 break;
310 }
311 }
312
313 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
314 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
315 vfio_intp_inject_pending_lockheld(intp);
316 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
317 }
318 qemu_mutex_unlock(&vdev->intp_mutex);
319}
320
321
322
323
324
325
326
327
328static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
329{
330 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
331 VFIOINTp *intp;
332
333 QLIST_FOREACH(intp, &vdev->intp_list, next) {
334 if (intp->qemuirq == irq) {
335 break;
336 }
337 }
338 assert(intp);
339
340 if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
341 abort();
342 }
343}
344
345
346
347
348
349
350
351
352
353
354
355static int vfio_set_resample_eventfd(VFIOINTp *intp)
356{
357 int32_t fd = event_notifier_get_fd(intp->unmask);
358 VFIODevice *vbasedev = &intp->vdev->vbasedev;
359 Error *err = NULL;
360 int ret;
361
362 qemu_set_fd_handler(fd, NULL, NULL, NULL);
363 ret = vfio_set_irq_signaling(vbasedev, intp->pin, 0,
364 VFIO_IRQ_SET_ACTION_UNMASK, fd, &err);
365 if (ret) {
366 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
367 }
368 return ret;
369}
370
371
372
373
374
375
376
377
378
379
380static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
381{
382 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
383 VFIOINTp *intp;
384
385 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
386 !vdev->irqfd_allowed) {
387 goto fail_irqfd;
388 }
389
390 QLIST_FOREACH(intp, &vdev->intp_list, next) {
391 if (intp->qemuirq == irq) {
392 break;
393 }
394 }
395 assert(intp);
396
397 if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
398 intp->unmask, irq) < 0) {
399 goto fail_irqfd;
400 }
401
402 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
403 goto fail_vfio;
404 }
405 if (vfio_irq_is_automasked(intp)) {
406 if (vfio_set_resample_eventfd(intp) < 0) {
407 goto fail_vfio;
408 }
409 trace_vfio_platform_start_level_irqfd_injection(intp->pin,
410 event_notifier_get_fd(intp->interrupt),
411 event_notifier_get_fd(intp->unmask));
412 } else {
413 trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
414 event_notifier_get_fd(intp->interrupt));
415 }
416
417 intp->kvm_accel = true;
418
419 return;
420fail_vfio:
421 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
422 abort();
423fail_irqfd:
424 vfio_start_eventfd_injection(sbdev, irq);
425 return;
426}
427
428
429
430static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
431{
432 vbasedev->needs_reset = true;
433}
434
435
436static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
437{
438 return -1;
439}
440
441
442
443
444
445
446
447
448static int vfio_populate_device(VFIODevice *vbasedev, Error **errp)
449{
450 VFIOINTp *intp, *tmp;
451 int i, ret = -1;
452 VFIOPlatformDevice *vdev =
453 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
454
455 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
456 error_setg(errp, "this isn't a platform device");
457 return ret;
458 }
459
460 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
461
462 for (i = 0; i < vbasedev->num_regions; i++) {
463 char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
464
465 vdev->regions[i] = g_new0(VFIORegion, 1);
466 ret = vfio_region_setup(OBJECT(vdev), vbasedev,
467 vdev->regions[i], i, name);
468 g_free(name);
469 if (ret) {
470 error_setg_errno(errp, -ret, "failed to get region %d info", i);
471 goto reg_error;
472 }
473 }
474
475 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
476 vfio_intp_mmap_enable, vdev);
477
478 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
479
480 for (i = 0; i < vbasedev->num_irqs; i++) {
481 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
482
483 irq.index = i;
484 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
485 if (ret) {
486 error_setg_errno(errp, -ret, "failed to get device irq info");
487 goto irq_err;
488 } else {
489 trace_vfio_platform_populate_interrupts(irq.index,
490 irq.count,
491 irq.flags);
492 intp = vfio_init_intp(vbasedev, irq, errp);
493 if (!intp) {
494 ret = -1;
495 goto irq_err;
496 }
497 }
498 }
499 return 0;
500irq_err:
501 timer_del(vdev->mmap_timer);
502 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
503 QLIST_REMOVE(intp, next);
504 g_free(intp);
505 }
506reg_error:
507 for (i = 0; i < vbasedev->num_regions; i++) {
508 if (vdev->regions[i]) {
509 vfio_region_finalize(vdev->regions[i]);
510 }
511 g_free(vdev->regions[i]);
512 }
513 g_free(vdev->regions);
514 return ret;
515}
516
517
518static VFIODeviceOps vfio_platform_ops = {
519 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
520 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
521 .vfio_eoi = vfio_platform_eoi,
522};
523
524
525
526
527
528
529
530
531
532
533
534static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
535{
536 VFIOGroup *group;
537 VFIODevice *vbasedev_iter;
538 char *tmp, group_path[PATH_MAX], *group_name;
539 ssize_t len;
540 struct stat st;
541 int groupid;
542 int ret;
543
544
545 if (vbasedev->sysfsdev) {
546 g_free(vbasedev->name);
547 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
548 } else {
549 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
550 error_setg(errp, "wrong host device name");
551 return -EINVAL;
552 }
553
554 vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
555 vbasedev->name);
556 }
557
558 if (stat(vbasedev->sysfsdev, &st) < 0) {
559 error_setg_errno(errp, errno,
560 "failed to get the sysfs host device file status");
561 return -errno;
562 }
563
564 tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
565 len = readlink(tmp, group_path, sizeof(group_path));
566 g_free(tmp);
567
568 if (len < 0 || len >= sizeof(group_path)) {
569 ret = len < 0 ? -errno : -ENAMETOOLONG;
570 error_setg_errno(errp, -ret, "no iommu_group found");
571 return ret;
572 }
573
574 group_path[len] = 0;
575
576 group_name = basename(group_path);
577 if (sscanf(group_name, "%d", &groupid) != 1) {
578 error_setg_errno(errp, errno, "failed to read %s", group_path);
579 return -errno;
580 }
581
582 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
583
584 group = vfio_get_group(groupid, &address_space_memory, errp);
585 if (!group) {
586 return -ENOENT;
587 }
588
589 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
590 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
591 error_setg(errp, "device is already attached");
592 vfio_put_group(group);
593 return -EBUSY;
594 }
595 }
596 ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
597 if (ret) {
598 vfio_put_group(group);
599 return ret;
600 }
601
602 ret = vfio_populate_device(vbasedev, errp);
603 if (ret) {
604 vfio_put_group(group);
605 }
606
607 return ret;
608}
609
610
611
612
613
614
615
616
617
618static void vfio_platform_realize(DeviceState *dev, Error **errp)
619{
620 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
621 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
622 VFIODevice *vbasedev = &vdev->vbasedev;
623 int i, ret;
624
625 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
626 vbasedev->dev = dev;
627 vbasedev->ops = &vfio_platform_ops;
628
629 qemu_mutex_init(&vdev->intp_mutex);
630
631 trace_vfio_platform_realize(vbasedev->sysfsdev ?
632 vbasedev->sysfsdev : vbasedev->name,
633 vdev->compat);
634
635 ret = vfio_base_device_init(vbasedev, errp);
636 if (ret) {
637 goto out;
638 }
639
640 if (!vdev->compat) {
641 GError *gerr = NULL;
642 gchar *contents;
643 gsize length;
644 char *path;
645
646 path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
647 if (!g_file_get_contents(path, &contents, &length, &gerr)) {
648 error_setg(errp, "%s", gerr->message);
649 g_error_free(gerr);
650 g_free(path);
651 return;
652 }
653 g_free(path);
654 vdev->compat = contents;
655 for (vdev->num_compat = 0; length; vdev->num_compat++) {
656 size_t skip = strlen(contents) + 1;
657 contents += skip;
658 length -= skip;
659 }
660 }
661
662 for (i = 0; i < vbasedev->num_regions; i++) {
663 if (vfio_region_mmap(vdev->regions[i])) {
664 warn_report("%s mmap unsupported, performance may be slow",
665 memory_region_name(vdev->regions[i]->mem));
666 }
667 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
668 }
669out:
670 if (!ret) {
671 return;
672 }
673
674 if (vdev->vbasedev.name) {
675 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
676 } else {
677 error_prepend(errp, "vfio error: ");
678 }
679}
680
681static const VMStateDescription vfio_platform_vmstate = {
682 .name = "vfio-platform",
683 .unmigratable = 1,
684};
685
686static Property vfio_platform_dev_properties[] = {
687 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
688 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
689 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
690 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
691 mmap_timeout, 1100),
692 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
693 DEFINE_PROP_END_OF_LIST(),
694};
695
696static void vfio_platform_class_init(ObjectClass *klass, void *data)
697{
698 DeviceClass *dc = DEVICE_CLASS(klass);
699 SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
700
701 dc->realize = vfio_platform_realize;
702 dc->props = vfio_platform_dev_properties;
703 dc->vmsd = &vfio_platform_vmstate;
704 dc->desc = "VFIO-based platform device assignment";
705 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
706 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
707
708 dc->user_creatable = true;
709}
710
711static const TypeInfo vfio_platform_dev_info = {
712 .name = TYPE_VFIO_PLATFORM,
713 .parent = TYPE_SYS_BUS_DEVICE,
714 .instance_size = sizeof(VFIOPlatformDevice),
715 .class_init = vfio_platform_class_init,
716 .class_size = sizeof(VFIOPlatformDeviceClass),
717};
718
719static void register_vfio_platform_dev_type(void)
720{
721 type_register_static(&vfio_platform_dev_info);
722}
723
724type_init(register_vfio_platform_dev_type)
725