1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "qemu/osdep.h"
18#include "qapi/error.h"
19#include <sys/ioctl.h>
20#include <linux/vfio.h>
21
22#include "hw/vfio/vfio-platform.h"
23#include "qemu/error-report.h"
24#include "qemu/range.h"
25#include "sysemu/sysemu.h"
26#include "exec/memory.h"
27#include "exec/address-spaces.h"
28#include "qemu/queue.h"
29#include "hw/sysbus.h"
30#include "trace.h"
31#include "hw/platform-bus.h"
32#include "sysemu/kvm.h"
33
34
35
36
37
38static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
39{
40 return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
41}
42
43
44
45
46
47
48
49
50static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
51 struct vfio_irq_info info, Error **errp)
52{
53 int ret;
54 VFIOPlatformDevice *vdev =
55 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
56 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
57 VFIOINTp *intp;
58
59 intp = g_malloc0(sizeof(*intp));
60 intp->vdev = vdev;
61 intp->pin = info.index;
62 intp->flags = info.flags;
63 intp->state = VFIO_IRQ_INACTIVE;
64 intp->kvm_accel = false;
65
66 sysbus_init_irq(sbdev, &intp->qemuirq);
67
68
69 intp->interrupt = g_malloc0(sizeof(EventNotifier));
70 ret = event_notifier_init(intp->interrupt, 0);
71 if (ret) {
72 g_free(intp->interrupt);
73 g_free(intp);
74 error_setg_errno(errp, -ret,
75 "failed to initialize trigger eventd notifier");
76 return NULL;
77 }
78 if (vfio_irq_is_automasked(intp)) {
79
80 intp->unmask = g_malloc0(sizeof(EventNotifier));
81 ret = event_notifier_init(intp->unmask, 0);
82 if (ret) {
83 g_free(intp->interrupt);
84 g_free(intp->unmask);
85 g_free(intp);
86 error_setg_errno(errp, -ret,
87 "failed to initialize resample eventd notifier");
88 return NULL;
89 }
90 }
91
92 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
93 return intp;
94}
95
96
97
98
99
100
101
102
103
104
105static int vfio_set_trigger_eventfd(VFIOINTp *intp,
106 eventfd_user_side_handler_t handler)
107{
108 VFIODevice *vbasedev = &intp->vdev->vbasedev;
109 struct vfio_irq_set *irq_set;
110 int argsz, ret;
111 int32_t *pfd;
112
113 argsz = sizeof(*irq_set) + sizeof(*pfd);
114 irq_set = g_malloc0(argsz);
115 irq_set->argsz = argsz;
116 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
117 irq_set->index = intp->pin;
118 irq_set->start = 0;
119 irq_set->count = 1;
120 pfd = (int32_t *)&irq_set->data;
121 *pfd = event_notifier_get_fd(intp->interrupt);
122 qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
123 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
124 if (ret < 0) {
125 error_report("vfio: Failed to set trigger eventfd: %m");
126 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
127 }
128 g_free(irq_set);
129 return ret;
130}
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
148{
149 int i;
150
151 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
152 vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
153 }
154}
155
156
157
158
159
160
161
162
163
164
165
166static void vfio_intp_mmap_enable(void *opaque)
167{
168 VFIOINTp *tmp;
169 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
170
171 qemu_mutex_lock(&vdev->intp_mutex);
172 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
173 if (tmp->state == VFIO_IRQ_ACTIVE) {
174 trace_vfio_platform_intp_mmap_enable(tmp->pin);
175
176 timer_mod(vdev->mmap_timer,
177 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
178 vdev->mmap_timeout);
179 qemu_mutex_unlock(&vdev->intp_mutex);
180 return;
181 }
182 }
183 vfio_mmap_set_enabled(vdev, true);
184 qemu_mutex_unlock(&vdev->intp_mutex);
185}
186
187
188
189
190
191
192
193
194
195
196static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
197{
198 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
199 event_notifier_get_fd(intp->interrupt));
200
201 intp->state = VFIO_IRQ_ACTIVE;
202
203
204 qemu_set_irq(intp->qemuirq, 1);
205}
206
207
208
209
210
211
212
213
214
215static void vfio_intp_interrupt(VFIOINTp *intp)
216{
217 int ret;
218 VFIOINTp *tmp;
219 VFIOPlatformDevice *vdev = intp->vdev;
220 bool delay_handling = false;
221
222 qemu_mutex_lock(&vdev->intp_mutex);
223 if (intp->state == VFIO_IRQ_INACTIVE) {
224 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
225 if (tmp->state == VFIO_IRQ_ACTIVE ||
226 tmp->state == VFIO_IRQ_PENDING) {
227 delay_handling = true;
228 break;
229 }
230 }
231 }
232 if (delay_handling) {
233
234
235
236
237 intp->state = VFIO_IRQ_PENDING;
238 trace_vfio_intp_interrupt_set_pending(intp->pin);
239 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
240 intp, pqnext);
241 ret = event_notifier_test_and_clear(intp->interrupt);
242 qemu_mutex_unlock(&vdev->intp_mutex);
243 return;
244 }
245
246 trace_vfio_platform_intp_interrupt(intp->pin,
247 event_notifier_get_fd(intp->interrupt));
248
249 ret = event_notifier_test_and_clear(intp->interrupt);
250 if (!ret) {
251 error_report("Error when clearing fd=%d (ret = %d)",
252 event_notifier_get_fd(intp->interrupt), ret);
253 }
254
255 intp->state = VFIO_IRQ_ACTIVE;
256
257
258 vfio_mmap_set_enabled(vdev, false);
259
260
261 qemu_set_irq(intp->qemuirq, 1);
262
263
264
265
266
267 if (vdev->mmap_timeout) {
268 timer_mod(vdev->mmap_timer,
269 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
270 vdev->mmap_timeout);
271 }
272 qemu_mutex_unlock(&vdev->intp_mutex);
273}
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290static void vfio_platform_eoi(VFIODevice *vbasedev)
291{
292 VFIOINTp *intp;
293 VFIOPlatformDevice *vdev =
294 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
295
296 qemu_mutex_lock(&vdev->intp_mutex);
297 QLIST_FOREACH(intp, &vdev->intp_list, next) {
298 if (intp->state == VFIO_IRQ_ACTIVE) {
299 trace_vfio_platform_eoi(intp->pin,
300 event_notifier_get_fd(intp->interrupt));
301 intp->state = VFIO_IRQ_INACTIVE;
302
303
304 qemu_set_irq(intp->qemuirq, 0);
305
306 if (vfio_irq_is_automasked(intp)) {
307
308 vfio_unmask_single_irqindex(vbasedev, intp->pin);
309 }
310
311
312 break;
313 }
314 }
315
316 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
317 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
318 vfio_intp_inject_pending_lockheld(intp);
319 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
320 }
321 qemu_mutex_unlock(&vdev->intp_mutex);
322}
323
324
325
326
327
328
329
330
331static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
332{
333 int ret;
334 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
335 VFIOINTp *intp;
336
337 QLIST_FOREACH(intp, &vdev->intp_list, next) {
338 if (intp->qemuirq == irq) {
339 break;
340 }
341 }
342 assert(intp);
343
344 ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
345 if (ret) {
346 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
347 intp->pin);
348 abort();
349 }
350}
351
352
353
354
355
356
357
358
359
360
361
362static int vfio_set_resample_eventfd(VFIOINTp *intp)
363{
364 VFIODevice *vbasedev = &intp->vdev->vbasedev;
365 struct vfio_irq_set *irq_set;
366 int argsz, ret;
367 int32_t *pfd;
368
369 argsz = sizeof(*irq_set) + sizeof(*pfd);
370 irq_set = g_malloc0(argsz);
371 irq_set->argsz = argsz;
372 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
373 irq_set->index = intp->pin;
374 irq_set->start = 0;
375 irq_set->count = 1;
376 pfd = (int32_t *)&irq_set->data;
377 *pfd = event_notifier_get_fd(intp->unmask);
378 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
379 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
380 g_free(irq_set);
381 if (ret < 0) {
382 error_report("vfio: Failed to set resample eventfd: %m");
383 }
384 return ret;
385}
386
387
388
389
390
391
392
393
394
395
396static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
397{
398 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
399 VFIOINTp *intp;
400
401 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
402 !vdev->irqfd_allowed) {
403 goto fail_irqfd;
404 }
405
406 QLIST_FOREACH(intp, &vdev->intp_list, next) {
407 if (intp->qemuirq == irq) {
408 break;
409 }
410 }
411 assert(intp);
412
413 if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
414 intp->unmask, irq) < 0) {
415 goto fail_irqfd;
416 }
417
418 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
419 goto fail_vfio;
420 }
421 if (vfio_irq_is_automasked(intp)) {
422 if (vfio_set_resample_eventfd(intp) < 0) {
423 goto fail_vfio;
424 }
425 trace_vfio_platform_start_level_irqfd_injection(intp->pin,
426 event_notifier_get_fd(intp->interrupt),
427 event_notifier_get_fd(intp->unmask));
428 } else {
429 trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
430 event_notifier_get_fd(intp->interrupt));
431 }
432
433 intp->kvm_accel = true;
434
435 return;
436fail_vfio:
437 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
438 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
439 intp->pin);
440 abort();
441fail_irqfd:
442 vfio_start_eventfd_injection(sbdev, irq);
443 return;
444}
445
446
447
448static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
449{
450 vbasedev->needs_reset = true;
451}
452
453
454static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
455{
456 return -1;
457}
458
459
460
461
462
463
464
465
466static int vfio_populate_device(VFIODevice *vbasedev, Error **errp)
467{
468 VFIOINTp *intp, *tmp;
469 int i, ret = -1;
470 VFIOPlatformDevice *vdev =
471 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
472
473 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
474 error_setg(errp, "this isn't a platform device");
475 return ret;
476 }
477
478 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
479
480 for (i = 0; i < vbasedev->num_regions; i++) {
481 char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
482
483 vdev->regions[i] = g_new0(VFIORegion, 1);
484 ret = vfio_region_setup(OBJECT(vdev), vbasedev,
485 vdev->regions[i], i, name);
486 g_free(name);
487 if (ret) {
488 error_setg_errno(errp, -ret, "failed to get region %d info", i);
489 goto reg_error;
490 }
491 }
492
493 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
494 vfio_intp_mmap_enable, vdev);
495
496 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
497
498 for (i = 0; i < vbasedev->num_irqs; i++) {
499 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
500
501 irq.index = i;
502 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
503 if (ret) {
504 error_setg_errno(errp, -ret, "failed to get device irq info");
505 goto irq_err;
506 } else {
507 trace_vfio_platform_populate_interrupts(irq.index,
508 irq.count,
509 irq.flags);
510 intp = vfio_init_intp(vbasedev, irq, errp);
511 if (!intp) {
512 ret = -1;
513 goto irq_err;
514 }
515 }
516 }
517 return 0;
518irq_err:
519 timer_del(vdev->mmap_timer);
520 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
521 QLIST_REMOVE(intp, next);
522 g_free(intp);
523 }
524reg_error:
525 for (i = 0; i < vbasedev->num_regions; i++) {
526 if (vdev->regions[i]) {
527 vfio_region_finalize(vdev->regions[i]);
528 }
529 g_free(vdev->regions[i]);
530 }
531 g_free(vdev->regions);
532 return ret;
533}
534
535
536static VFIODeviceOps vfio_platform_ops = {
537 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
538 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
539 .vfio_eoi = vfio_platform_eoi,
540};
541
542
543
544
545
546
547
548
549
550
551
552static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
553{
554 VFIOGroup *group;
555 VFIODevice *vbasedev_iter;
556 char *tmp, group_path[PATH_MAX], *group_name;
557 ssize_t len;
558 struct stat st;
559 int groupid;
560 int ret;
561
562
563 if (vbasedev->sysfsdev) {
564 g_free(vbasedev->name);
565 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
566 } else {
567 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
568 error_setg(errp, "wrong host device name");
569 return -EINVAL;
570 }
571
572 vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
573 vbasedev->name);
574 }
575
576 if (stat(vbasedev->sysfsdev, &st) < 0) {
577 error_setg_errno(errp, errno,
578 "failed to get the sysfs host device file status");
579 return -errno;
580 }
581
582 tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
583 len = readlink(tmp, group_path, sizeof(group_path));
584 g_free(tmp);
585
586 if (len < 0 || len >= sizeof(group_path)) {
587 ret = len < 0 ? -errno : -ENAMETOOLONG;
588 error_setg_errno(errp, -ret, "no iommu_group found");
589 return ret;
590 }
591
592 group_path[len] = 0;
593
594 group_name = basename(group_path);
595 if (sscanf(group_name, "%d", &groupid) != 1) {
596 error_setg_errno(errp, errno, "failed to read %s", group_path);
597 return -errno;
598 }
599
600 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
601
602 group = vfio_get_group(groupid, &address_space_memory, errp);
603 if (!group) {
604 return -ENOENT;
605 }
606
607 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
608 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
609 error_setg(errp, "device is already attached");
610 vfio_put_group(group);
611 return -EBUSY;
612 }
613 }
614 ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
615 if (ret) {
616 vfio_put_group(group);
617 return ret;
618 }
619
620 ret = vfio_populate_device(vbasedev, errp);
621 if (ret) {
622 vfio_put_group(group);
623 }
624
625 return ret;
626}
627
628
629
630
631
632
633
634
635
636static void vfio_platform_realize(DeviceState *dev, Error **errp)
637{
638 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
639 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
640 VFIODevice *vbasedev = &vdev->vbasedev;
641 int i, ret;
642
643 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
644 vbasedev->dev = dev;
645 vbasedev->ops = &vfio_platform_ops;
646
647 qemu_mutex_init(&vdev->intp_mutex);
648
649 trace_vfio_platform_realize(vbasedev->sysfsdev ?
650 vbasedev->sysfsdev : vbasedev->name,
651 vdev->compat);
652
653 ret = vfio_base_device_init(vbasedev, errp);
654 if (ret) {
655 goto out;
656 }
657
658 if (!vdev->compat) {
659 GError *gerr = NULL;
660 gchar *contents;
661 gsize length;
662 char *path;
663
664 path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
665 if (!g_file_get_contents(path, &contents, &length, &gerr)) {
666 error_setg(errp, "%s", gerr->message);
667 g_error_free(gerr);
668 g_free(path);
669 return;
670 }
671 g_free(path);
672 vdev->compat = contents;
673 for (vdev->num_compat = 0; length; vdev->num_compat++) {
674 size_t skip = strlen(contents) + 1;
675 contents += skip;
676 length -= skip;
677 }
678 }
679
680 for (i = 0; i < vbasedev->num_regions; i++) {
681 if (vfio_region_mmap(vdev->regions[i])) {
682 warn_report("%s mmap unsupported, performance may be slow",
683 memory_region_name(vdev->regions[i]->mem));
684 }
685 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
686 }
687out:
688 if (!ret) {
689 return;
690 }
691
692 if (vdev->vbasedev.name) {
693 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
694 } else {
695 error_prepend(errp, "vfio error: ");
696 }
697}
698
699static const VMStateDescription vfio_platform_vmstate = {
700 .name = TYPE_VFIO_PLATFORM,
701 .unmigratable = 1,
702};
703
704static Property vfio_platform_dev_properties[] = {
705 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
706 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
707 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
708 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
709 mmap_timeout, 1100),
710 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
711 DEFINE_PROP_END_OF_LIST(),
712};
713
714static void vfio_platform_class_init(ObjectClass *klass, void *data)
715{
716 DeviceClass *dc = DEVICE_CLASS(klass);
717 SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
718
719 dc->realize = vfio_platform_realize;
720 dc->props = vfio_platform_dev_properties;
721 dc->vmsd = &vfio_platform_vmstate;
722 dc->desc = "VFIO-based platform device assignment";
723 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
724 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
725
726 dc->user_creatable = true;
727}
728
729static const TypeInfo vfio_platform_dev_info = {
730 .name = TYPE_VFIO_PLATFORM,
731 .parent = TYPE_SYS_BUS_DEVICE,
732 .instance_size = sizeof(VFIOPlatformDevice),
733 .class_init = vfio_platform_class_init,
734 .class_size = sizeof(VFIOPlatformDeviceClass),
735};
736
737static void register_vfio_platform_dev_type(void)
738{
739 type_register_static(&vfio_platform_dev_info);
740}
741
742type_init(register_vfio_platform_dev_type)
743