1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "qemu/osdep.h"
18#include "qapi/error.h"
19#include <sys/ioctl.h>
20#include <linux/vfio.h>
21
22#include "hw/vfio/vfio-platform.h"
23#include "qemu/error-report.h"
24#include "qemu/module.h"
25#include "qemu/range.h"
26#include "sysemu/sysemu.h"
27#include "exec/memory.h"
28#include "exec/address-spaces.h"
29#include "qemu/queue.h"
30#include "hw/sysbus.h"
31#include "trace.h"
32#include "hw/platform-bus.h"
33#include "sysemu/kvm.h"
34
35
36
37
38
39static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
40{
41 return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
42}
43
44
45
46
47
48
49
50
51static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
52 struct vfio_irq_info info, Error **errp)
53{
54 int ret;
55 VFIOPlatformDevice *vdev =
56 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
57 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
58 VFIOINTp *intp;
59
60 intp = g_malloc0(sizeof(*intp));
61 intp->vdev = vdev;
62 intp->pin = info.index;
63 intp->flags = info.flags;
64 intp->state = VFIO_IRQ_INACTIVE;
65 intp->kvm_accel = false;
66
67 sysbus_init_irq(sbdev, &intp->qemuirq);
68
69
70 intp->interrupt = g_malloc0(sizeof(EventNotifier));
71 ret = event_notifier_init(intp->interrupt, 0);
72 if (ret) {
73 g_free(intp->interrupt);
74 g_free(intp);
75 error_setg_errno(errp, -ret,
76 "failed to initialize trigger eventfd notifier");
77 return NULL;
78 }
79 if (vfio_irq_is_automasked(intp)) {
80
81 intp->unmask = g_malloc0(sizeof(EventNotifier));
82 ret = event_notifier_init(intp->unmask, 0);
83 if (ret) {
84 g_free(intp->interrupt);
85 g_free(intp->unmask);
86 g_free(intp);
87 error_setg_errno(errp, -ret,
88 "failed to initialize resample eventfd notifier");
89 return NULL;
90 }
91 }
92
93 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
94 return intp;
95}
96
97
98
99
100
101
102
103
104
105
106static int vfio_set_trigger_eventfd(VFIOINTp *intp,
107 eventfd_user_side_handler_t handler)
108{
109 VFIODevice *vbasedev = &intp->vdev->vbasedev;
110 int32_t fd = event_notifier_get_fd(intp->interrupt);
111 Error *err = NULL;
112 int ret;
113
114 qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
115
116 ret = vfio_set_irq_signaling(vbasedev, intp->pin, 0,
117 VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err);
118 if (ret) {
119 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
120 qemu_set_fd_handler(fd, NULL, NULL, NULL);
121 }
122
123 return ret;
124}
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
142{
143 int i;
144
145 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
146 vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
147 }
148}
149
150
151
152
153
154
155
156
157
158
159
160static void vfio_intp_mmap_enable(void *opaque)
161{
162 VFIOINTp *tmp;
163 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
164
165 qemu_mutex_lock(&vdev->intp_mutex);
166 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
167 if (tmp->state == VFIO_IRQ_ACTIVE) {
168 trace_vfio_platform_intp_mmap_enable(tmp->pin);
169
170 timer_mod(vdev->mmap_timer,
171 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
172 vdev->mmap_timeout);
173 qemu_mutex_unlock(&vdev->intp_mutex);
174 return;
175 }
176 }
177 vfio_mmap_set_enabled(vdev, true);
178 qemu_mutex_unlock(&vdev->intp_mutex);
179}
180
181
182
183
184
185
186
187
188
189
190static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
191{
192 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
193 event_notifier_get_fd(intp->interrupt));
194
195 intp->state = VFIO_IRQ_ACTIVE;
196
197
198 qemu_set_irq(intp->qemuirq, 1);
199}
200
201
202
203
204
205
206
207
208
209static void vfio_intp_interrupt(VFIOINTp *intp)
210{
211 int ret;
212 VFIOINTp *tmp;
213 VFIOPlatformDevice *vdev = intp->vdev;
214 bool delay_handling = false;
215
216 qemu_mutex_lock(&vdev->intp_mutex);
217 if (intp->state == VFIO_IRQ_INACTIVE) {
218 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
219 if (tmp->state == VFIO_IRQ_ACTIVE ||
220 tmp->state == VFIO_IRQ_PENDING) {
221 delay_handling = true;
222 break;
223 }
224 }
225 }
226 if (delay_handling) {
227
228
229
230
231 intp->state = VFIO_IRQ_PENDING;
232 trace_vfio_intp_interrupt_set_pending(intp->pin);
233 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
234 intp, pqnext);
235 ret = event_notifier_test_and_clear(intp->interrupt);
236 qemu_mutex_unlock(&vdev->intp_mutex);
237 return;
238 }
239
240 trace_vfio_platform_intp_interrupt(intp->pin,
241 event_notifier_get_fd(intp->interrupt));
242
243 ret = event_notifier_test_and_clear(intp->interrupt);
244 if (!ret) {
245 error_report("Error when clearing fd=%d (ret = %d)",
246 event_notifier_get_fd(intp->interrupt), ret);
247 }
248
249 intp->state = VFIO_IRQ_ACTIVE;
250
251
252 vfio_mmap_set_enabled(vdev, false);
253
254
255 qemu_set_irq(intp->qemuirq, 1);
256
257
258
259
260
261 if (vdev->mmap_timeout) {
262 timer_mod(vdev->mmap_timer,
263 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
264 vdev->mmap_timeout);
265 }
266 qemu_mutex_unlock(&vdev->intp_mutex);
267}
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284static void vfio_platform_eoi(VFIODevice *vbasedev)
285{
286 VFIOINTp *intp;
287 VFIOPlatformDevice *vdev =
288 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
289
290 qemu_mutex_lock(&vdev->intp_mutex);
291 QLIST_FOREACH(intp, &vdev->intp_list, next) {
292 if (intp->state == VFIO_IRQ_ACTIVE) {
293 trace_vfio_platform_eoi(intp->pin,
294 event_notifier_get_fd(intp->interrupt));
295 intp->state = VFIO_IRQ_INACTIVE;
296
297
298 qemu_set_irq(intp->qemuirq, 0);
299
300 if (vfio_irq_is_automasked(intp)) {
301
302 vfio_unmask_single_irqindex(vbasedev, intp->pin);
303 }
304
305
306 break;
307 }
308 }
309
310 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
311 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
312 vfio_intp_inject_pending_lockheld(intp);
313 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
314 }
315 qemu_mutex_unlock(&vdev->intp_mutex);
316}
317
318
319
320
321
322
323
324
325static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
326{
327 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
328 VFIOINTp *intp;
329
330 QLIST_FOREACH(intp, &vdev->intp_list, next) {
331 if (intp->qemuirq == irq) {
332 break;
333 }
334 }
335 assert(intp);
336
337 if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
338 abort();
339 }
340}
341
342
343
344
345
346
347
348
349
350
351
352static int vfio_set_resample_eventfd(VFIOINTp *intp)
353{
354 int32_t fd = event_notifier_get_fd(intp->unmask);
355 VFIODevice *vbasedev = &intp->vdev->vbasedev;
356 Error *err = NULL;
357 int ret;
358
359 qemu_set_fd_handler(fd, NULL, NULL, NULL);
360 ret = vfio_set_irq_signaling(vbasedev, intp->pin, 0,
361 VFIO_IRQ_SET_ACTION_UNMASK, fd, &err);
362 if (ret) {
363 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
364 }
365 return ret;
366}
367
368
369
370
371
372
373
374
375
376
377static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
378{
379 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
380 VFIOINTp *intp;
381
382 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
383 !vdev->irqfd_allowed) {
384 goto fail_irqfd;
385 }
386
387 QLIST_FOREACH(intp, &vdev->intp_list, next) {
388 if (intp->qemuirq == irq) {
389 break;
390 }
391 }
392 assert(intp);
393
394 if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
395 intp->unmask, irq) < 0) {
396 goto fail_irqfd;
397 }
398
399 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
400 goto fail_vfio;
401 }
402 if (vfio_irq_is_automasked(intp)) {
403 if (vfio_set_resample_eventfd(intp) < 0) {
404 goto fail_vfio;
405 }
406 trace_vfio_platform_start_level_irqfd_injection(intp->pin,
407 event_notifier_get_fd(intp->interrupt),
408 event_notifier_get_fd(intp->unmask));
409 } else {
410 trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
411 event_notifier_get_fd(intp->interrupt));
412 }
413
414 intp->kvm_accel = true;
415
416 return;
417fail_vfio:
418 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
419 abort();
420fail_irqfd:
421 vfio_start_eventfd_injection(sbdev, irq);
422 return;
423}
424
425
426
427static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
428{
429 vbasedev->needs_reset = true;
430}
431
432
433static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
434{
435 return -1;
436}
437
438
439
440
441
442
443
444
445static int vfio_populate_device(VFIODevice *vbasedev, Error **errp)
446{
447 VFIOINTp *intp, *tmp;
448 int i, ret = -1;
449 VFIOPlatformDevice *vdev =
450 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
451
452 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
453 error_setg(errp, "this isn't a platform device");
454 return ret;
455 }
456
457 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
458
459 for (i = 0; i < vbasedev->num_regions; i++) {
460 char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
461
462 vdev->regions[i] = g_new0(VFIORegion, 1);
463 ret = vfio_region_setup(OBJECT(vdev), vbasedev,
464 vdev->regions[i], i, name);
465 g_free(name);
466 if (ret) {
467 error_setg_errno(errp, -ret, "failed to get region %d info", i);
468 goto reg_error;
469 }
470 }
471
472 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
473 vfio_intp_mmap_enable, vdev);
474
475 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
476
477 for (i = 0; i < vbasedev->num_irqs; i++) {
478 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
479
480 irq.index = i;
481 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
482 if (ret) {
483 error_setg_errno(errp, -ret, "failed to get device irq info");
484 goto irq_err;
485 } else {
486 trace_vfio_platform_populate_interrupts(irq.index,
487 irq.count,
488 irq.flags);
489 intp = vfio_init_intp(vbasedev, irq, errp);
490 if (!intp) {
491 ret = -1;
492 goto irq_err;
493 }
494 }
495 }
496 return 0;
497irq_err:
498 timer_del(vdev->mmap_timer);
499 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
500 QLIST_REMOVE(intp, next);
501 g_free(intp);
502 }
503reg_error:
504 for (i = 0; i < vbasedev->num_regions; i++) {
505 if (vdev->regions[i]) {
506 vfio_region_finalize(vdev->regions[i]);
507 }
508 g_free(vdev->regions[i]);
509 }
510 g_free(vdev->regions);
511 return ret;
512}
513
514
515static VFIODeviceOps vfio_platform_ops = {
516 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
517 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
518 .vfio_eoi = vfio_platform_eoi,
519};
520
521
522
523
524
525
526
527
528
529
530
531static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
532{
533 VFIOGroup *group;
534 VFIODevice *vbasedev_iter;
535 char *tmp, group_path[PATH_MAX], *group_name;
536 ssize_t len;
537 struct stat st;
538 int groupid;
539 int ret;
540
541
542 if (vbasedev->sysfsdev) {
543 g_free(vbasedev->name);
544 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
545 } else {
546 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
547 error_setg(errp, "wrong host device name");
548 return -EINVAL;
549 }
550
551 vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
552 vbasedev->name);
553 }
554
555 if (stat(vbasedev->sysfsdev, &st) < 0) {
556 error_setg_errno(errp, errno,
557 "failed to get the sysfs host device file status");
558 return -errno;
559 }
560
561 tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
562 len = readlink(tmp, group_path, sizeof(group_path));
563 g_free(tmp);
564
565 if (len < 0 || len >= sizeof(group_path)) {
566 ret = len < 0 ? -errno : -ENAMETOOLONG;
567 error_setg_errno(errp, -ret, "no iommu_group found");
568 return ret;
569 }
570
571 group_path[len] = 0;
572
573 group_name = basename(group_path);
574 if (sscanf(group_name, "%d", &groupid) != 1) {
575 error_setg_errno(errp, errno, "failed to read %s", group_path);
576 return -errno;
577 }
578
579 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
580
581 group = vfio_get_group(groupid, &address_space_memory, errp);
582 if (!group) {
583 return -ENOENT;
584 }
585
586 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
587 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
588 error_setg(errp, "device is already attached");
589 vfio_put_group(group);
590 return -EBUSY;
591 }
592 }
593 ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
594 if (ret) {
595 vfio_put_group(group);
596 return ret;
597 }
598
599 ret = vfio_populate_device(vbasedev, errp);
600 if (ret) {
601 vfio_put_group(group);
602 }
603
604 return ret;
605}
606
607
608
609
610
611
612
613
614
615static void vfio_platform_realize(DeviceState *dev, Error **errp)
616{
617 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
618 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
619 VFIODevice *vbasedev = &vdev->vbasedev;
620 int i, ret;
621
622 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
623 vbasedev->dev = dev;
624 vbasedev->ops = &vfio_platform_ops;
625
626 qemu_mutex_init(&vdev->intp_mutex);
627
628 trace_vfio_platform_realize(vbasedev->sysfsdev ?
629 vbasedev->sysfsdev : vbasedev->name,
630 vdev->compat);
631
632 ret = vfio_base_device_init(vbasedev, errp);
633 if (ret) {
634 goto out;
635 }
636
637 if (!vdev->compat) {
638 GError *gerr = NULL;
639 gchar *contents;
640 gsize length;
641 char *path;
642
643 path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
644 if (!g_file_get_contents(path, &contents, &length, &gerr)) {
645 error_setg(errp, "%s", gerr->message);
646 g_error_free(gerr);
647 g_free(path);
648 return;
649 }
650 g_free(path);
651 vdev->compat = contents;
652 for (vdev->num_compat = 0; length; vdev->num_compat++) {
653 size_t skip = strlen(contents) + 1;
654 contents += skip;
655 length -= skip;
656 }
657 }
658
659 for (i = 0; i < vbasedev->num_regions; i++) {
660 if (vfio_region_mmap(vdev->regions[i])) {
661 warn_report("%s mmap unsupported, performance may be slow",
662 memory_region_name(vdev->regions[i]->mem));
663 }
664 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
665 }
666out:
667 if (!ret) {
668 return;
669 }
670
671 if (vdev->vbasedev.name) {
672 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
673 } else {
674 error_prepend(errp, "vfio error: ");
675 }
676}
677
678static const VMStateDescription vfio_platform_vmstate = {
679 .name = "vfio-platform",
680 .unmigratable = 1,
681};
682
683static Property vfio_platform_dev_properties[] = {
684 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
685 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
686 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
687 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
688 mmap_timeout, 1100),
689 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
690 DEFINE_PROP_END_OF_LIST(),
691};
692
693static void vfio_platform_class_init(ObjectClass *klass, void *data)
694{
695 DeviceClass *dc = DEVICE_CLASS(klass);
696 SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
697
698 dc->realize = vfio_platform_realize;
699 dc->props = vfio_platform_dev_properties;
700 dc->vmsd = &vfio_platform_vmstate;
701 dc->desc = "VFIO-based platform device assignment";
702 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
703 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
704
705 dc->user_creatable = true;
706}
707
708static const TypeInfo vfio_platform_dev_info = {
709 .name = TYPE_VFIO_PLATFORM,
710 .parent = TYPE_SYS_BUS_DEVICE,
711 .instance_size = sizeof(VFIOPlatformDevice),
712 .class_init = vfio_platform_class_init,
713 .class_size = sizeof(VFIOPlatformDeviceClass),
714};
715
716static void register_vfio_platform_dev_type(void)
717{
718 type_register_static(&vfio_platform_dev_info);
719}
720
721type_init(register_vfio_platform_dev_type)
722