1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include "qemu/osdep.h"
22#include <sys/ioctl.h>
23#ifdef CONFIG_KVM
24#include <linux/kvm.h>
25#endif
26#include <linux/vfio.h>
27
28#include "hw/vfio/vfio-common.h"
29#include "hw/vfio/vfio.h"
30#include "exec/address-spaces.h"
31#include "exec/memory.h"
32#include "exec/ram_addr.h"
33#include "hw/hw.h"
34#include "qemu/error-report.h"
35#include "qemu/main-loop.h"
36#include "qemu/range.h"
37#include "sysemu/kvm.h"
38#include "sysemu/reset.h"
39#include "sysemu/runstate.h"
40#include "trace.h"
41#include "qapi/error.h"
42#include "migration/migration.h"
43#include "migration/misc.h"
44#include "migration/blocker.h"
45#include "migration/qemu-file.h"
46#include "sysemu/tpm.h"
47
48VFIOGroupList vfio_group_list =
49 QLIST_HEAD_INITIALIZER(vfio_group_list);
50static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
51 QLIST_HEAD_INITIALIZER(vfio_address_spaces);
52
53#ifdef CONFIG_KVM
54
55
56
57
58
59
60
61static int vfio_kvm_device_fd = -1;
62#endif
63
64
65
66
67void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
68{
69 struct vfio_irq_set irq_set = {
70 .argsz = sizeof(irq_set),
71 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
72 .index = index,
73 .start = 0,
74 .count = 0,
75 };
76
77 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
78}
79
80void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
81{
82 struct vfio_irq_set irq_set = {
83 .argsz = sizeof(irq_set),
84 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
85 .index = index,
86 .start = 0,
87 .count = 1,
88 };
89
90 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
91}
92
93void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
94{
95 struct vfio_irq_set irq_set = {
96 .argsz = sizeof(irq_set),
97 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
98 .index = index,
99 .start = 0,
100 .count = 1,
101 };
102
103 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
104}
105
106static inline const char *action_to_str(int action)
107{
108 switch (action) {
109 case VFIO_IRQ_SET_ACTION_MASK:
110 return "MASK";
111 case VFIO_IRQ_SET_ACTION_UNMASK:
112 return "UNMASK";
113 case VFIO_IRQ_SET_ACTION_TRIGGER:
114 return "TRIGGER";
115 default:
116 return "UNKNOWN ACTION";
117 }
118}
119
120static const char *index_to_str(VFIODevice *vbasedev, int index)
121{
122 if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
123 return NULL;
124 }
125
126 switch (index) {
127 case VFIO_PCI_INTX_IRQ_INDEX:
128 return "INTX";
129 case VFIO_PCI_MSI_IRQ_INDEX:
130 return "MSI";
131 case VFIO_PCI_MSIX_IRQ_INDEX:
132 return "MSIX";
133 case VFIO_PCI_ERR_IRQ_INDEX:
134 return "ERR";
135 case VFIO_PCI_REQ_IRQ_INDEX:
136 return "REQ";
137 default:
138 return NULL;
139 }
140}
141
142static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
143{
144 switch (container->iommu_type) {
145 case VFIO_TYPE1v2_IOMMU:
146 case VFIO_TYPE1_IOMMU:
147
148
149
150 return ram_block_uncoordinated_discard_disable(state);
151 default:
152
153
154
155
156
157
158
159
160
161 return ram_block_discard_disable(state);
162 }
163}
164
165int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
166 int action, int fd, Error **errp)
167{
168 struct vfio_irq_set *irq_set;
169 int argsz, ret = 0;
170 const char *name;
171 int32_t *pfd;
172
173 argsz = sizeof(*irq_set) + sizeof(*pfd);
174
175 irq_set = g_malloc0(argsz);
176 irq_set->argsz = argsz;
177 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action;
178 irq_set->index = index;
179 irq_set->start = subindex;
180 irq_set->count = 1;
181 pfd = (int32_t *)&irq_set->data;
182 *pfd = fd;
183
184 if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
185 ret = -errno;
186 }
187 g_free(irq_set);
188
189 if (!ret) {
190 return 0;
191 }
192
193 error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure");
194
195 name = index_to_str(vbasedev, index);
196 if (name) {
197 error_prepend(errp, "%s-%d: ", name, subindex);
198 } else {
199 error_prepend(errp, "index %d-%d: ", index, subindex);
200 }
201 error_prepend(errp,
202 "Failed to %s %s eventfd signaling for interrupt ",
203 fd < 0 ? "tear down" : "set up", action_to_str(action));
204 return ret;
205}
206
207
208
209
210void vfio_region_write(void *opaque, hwaddr addr,
211 uint64_t data, unsigned size)
212{
213 VFIORegion *region = opaque;
214 VFIODevice *vbasedev = region->vbasedev;
215 union {
216 uint8_t byte;
217 uint16_t word;
218 uint32_t dword;
219 uint64_t qword;
220 } buf;
221
222 switch (size) {
223 case 1:
224 buf.byte = data;
225 break;
226 case 2:
227 buf.word = cpu_to_le16(data);
228 break;
229 case 4:
230 buf.dword = cpu_to_le32(data);
231 break;
232 case 8:
233 buf.qword = cpu_to_le64(data);
234 break;
235 default:
236 hw_error("vfio: unsupported write size, %u bytes", size);
237 break;
238 }
239
240 if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
241 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
242 ",%d) failed: %m",
243 __func__, vbasedev->name, region->nr,
244 addr, data, size);
245 }
246
247 trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
248
249
250
251
252
253
254
255
256
257 vbasedev->ops->vfio_eoi(vbasedev);
258}
259
260uint64_t vfio_region_read(void *opaque,
261 hwaddr addr, unsigned size)
262{
263 VFIORegion *region = opaque;
264 VFIODevice *vbasedev = region->vbasedev;
265 union {
266 uint8_t byte;
267 uint16_t word;
268 uint32_t dword;
269 uint64_t qword;
270 } buf;
271 uint64_t data = 0;
272
273 if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
274 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
275 __func__, vbasedev->name, region->nr,
276 addr, size);
277 return (uint64_t)-1;
278 }
279 switch (size) {
280 case 1:
281 data = buf.byte;
282 break;
283 case 2:
284 data = le16_to_cpu(buf.word);
285 break;
286 case 4:
287 data = le32_to_cpu(buf.dword);
288 break;
289 case 8:
290 data = le64_to_cpu(buf.qword);
291 break;
292 default:
293 hw_error("vfio: unsupported read size, %u bytes", size);
294 break;
295 }
296
297 trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data);
298
299
300 vbasedev->ops->vfio_eoi(vbasedev);
301
302 return data;
303}
304
305const MemoryRegionOps vfio_region_ops = {
306 .read = vfio_region_read,
307 .write = vfio_region_write,
308 .endianness = DEVICE_LITTLE_ENDIAN,
309 .valid = {
310 .min_access_size = 1,
311 .max_access_size = 8,
312 },
313 .impl = {
314 .min_access_size = 1,
315 .max_access_size = 8,
316 },
317};
318
319
320
321
322
323typedef struct {
324 unsigned long *bitmap;
325 hwaddr size;
326 hwaddr pages;
327} VFIOBitmap;
328
329static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
330{
331 vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
332 vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) /
333 BITS_PER_BYTE;
334 vbmap->bitmap = g_try_malloc0(vbmap->size);
335 if (!vbmap->bitmap) {
336 return -ENOMEM;
337 }
338
339 return 0;
340}
341
342static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
343 uint64_t size, ram_addr_t ram_addr);
344
345bool vfio_mig_active(void)
346{
347 VFIOGroup *group;
348 VFIODevice *vbasedev;
349
350 if (QLIST_EMPTY(&vfio_group_list)) {
351 return false;
352 }
353
354 QLIST_FOREACH(group, &vfio_group_list, next) {
355 QLIST_FOREACH(vbasedev, &group->device_list, next) {
356 if (vbasedev->migration_blocker) {
357 return false;
358 }
359 }
360 }
361 return true;
362}
363
364static Error *multiple_devices_migration_blocker;
365
366static unsigned int vfio_migratable_device_num(void)
367{
368 VFIOGroup *group;
369 VFIODevice *vbasedev;
370 unsigned int device_num = 0;
371
372 QLIST_FOREACH(group, &vfio_group_list, next) {
373 QLIST_FOREACH(vbasedev, &group->device_list, next) {
374 if (vbasedev->migration) {
375 device_num++;
376 }
377 }
378 }
379
380 return device_num;
381}
382
383int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
384{
385 int ret;
386
387 if (multiple_devices_migration_blocker ||
388 vfio_migratable_device_num() <= 1) {
389 return 0;
390 }
391
392 if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
393 error_setg(errp, "Migration is currently not supported with multiple "
394 "VFIO devices");
395 return -EINVAL;
396 }
397
398 error_setg(&multiple_devices_migration_blocker,
399 "Migration is currently not supported with multiple "
400 "VFIO devices");
401 ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
402 if (ret < 0) {
403 error_free(multiple_devices_migration_blocker);
404 multiple_devices_migration_blocker = NULL;
405 }
406
407 return ret;
408}
409
410void vfio_unblock_multiple_devices_migration(void)
411{
412 if (!multiple_devices_migration_blocker ||
413 vfio_migratable_device_num() > 1) {
414 return;
415 }
416
417 migrate_del_blocker(multiple_devices_migration_blocker);
418 error_free(multiple_devices_migration_blocker);
419 multiple_devices_migration_blocker = NULL;
420}
421
422bool vfio_viommu_preset(VFIODevice *vbasedev)
423{
424 return vbasedev->group->container->space->as != &address_space_memory;
425}
426
427static void vfio_set_migration_error(int err)
428{
429 MigrationState *ms = migrate_get_current();
430
431 if (migration_is_setup_or_active(ms->state)) {
432 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
433 if (ms->to_dst_file) {
434 qemu_file_set_error(ms->to_dst_file, err);
435 }
436 }
437 }
438}
439
440static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
441{
442 VFIOGroup *group;
443 VFIODevice *vbasedev;
444 MigrationState *ms = migrate_get_current();
445
446 if (ms->state != MIGRATION_STATUS_ACTIVE &&
447 ms->state != MIGRATION_STATUS_DEVICE) {
448 return false;
449 }
450
451 QLIST_FOREACH(group, &container->group_list, container_next) {
452 QLIST_FOREACH(vbasedev, &group->device_list, next) {
453 VFIOMigration *migration = vbasedev->migration;
454
455 if (!migration) {
456 return false;
457 }
458
459 if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
460 (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
461 migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) {
462 return false;
463 }
464 }
465 }
466 return true;
467}
468
469static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
470{
471 VFIOGroup *group;
472 VFIODevice *vbasedev;
473
474 QLIST_FOREACH(group, &container->group_list, container_next) {
475 QLIST_FOREACH(vbasedev, &group->device_list, next) {
476 if (!vbasedev->dirty_pages_supported) {
477 return false;
478 }
479 }
480 }
481
482 return true;
483}
484
485
486
487
488
489static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
490{
491 VFIOGroup *group;
492 VFIODevice *vbasedev;
493
494 if (!migration_is_active(migrate_get_current())) {
495 return false;
496 }
497
498 QLIST_FOREACH(group, &container->group_list, container_next) {
499 QLIST_FOREACH(vbasedev, &group->device_list, next) {
500 VFIOMigration *migration = vbasedev->migration;
501
502 if (!migration) {
503 return false;
504 }
505
506 if (migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
507 migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
508 continue;
509 } else {
510 return false;
511 }
512 }
513 }
514 return true;
515}
516
517static int vfio_dma_unmap_bitmap(VFIOContainer *container,
518 hwaddr iova, ram_addr_t size,
519 IOMMUTLBEntry *iotlb)
520{
521 struct vfio_iommu_type1_dma_unmap *unmap;
522 struct vfio_bitmap *bitmap;
523 VFIOBitmap vbmap;
524 int ret;
525
526 ret = vfio_bitmap_alloc(&vbmap, size);
527 if (ret) {
528 return ret;
529 }
530
531 unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
532
533 unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
534 unmap->iova = iova;
535 unmap->size = size;
536 unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
537 bitmap = (struct vfio_bitmap *)&unmap->data;
538
539
540
541
542
543
544 bitmap->pgsize = qemu_real_host_page_size();
545 bitmap->size = vbmap.size;
546 bitmap->data = (__u64 *)vbmap.bitmap;
547
548 if (vbmap.size > container->max_dirty_bitmap_size) {
549 error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
550 ret = -E2BIG;
551 goto unmap_exit;
552 }
553
554 ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
555 if (!ret) {
556 cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
557 iotlb->translated_addr, vbmap.pages);
558 } else {
559 error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
560 }
561
562unmap_exit:
563 g_free(unmap);
564 g_free(vbmap.bitmap);
565
566 return ret;
567}
568
569
570
571
572static int vfio_dma_unmap(VFIOContainer *container,
573 hwaddr iova, ram_addr_t size,
574 IOMMUTLBEntry *iotlb)
575{
576 struct vfio_iommu_type1_dma_unmap unmap = {
577 .argsz = sizeof(unmap),
578 .flags = 0,
579 .iova = iova,
580 .size = size,
581 };
582 bool need_dirty_sync = false;
583 int ret;
584
585 if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
586 if (!vfio_devices_all_device_dirty_tracking(container) &&
587 container->dirty_pages_supported) {
588 return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
589 }
590
591 need_dirty_sync = true;
592 }
593
594 while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
595
596
597
598
599
600
601
602
603
604
605
606
607 if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
608 container->iommu_type == VFIO_TYPE1v2_IOMMU) {
609 trace_vfio_dma_unmap_overflow_workaround();
610 unmap.size -= 1ULL << ctz64(container->pgsizes);
611 continue;
612 }
613 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
614 return -errno;
615 }
616
617 if (need_dirty_sync) {
618 ret = vfio_get_dirty_bitmap(container, iova, size,
619 iotlb->translated_addr);
620 if (ret) {
621 return ret;
622 }
623 }
624
625 return 0;
626}
627
628static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
629 ram_addr_t size, void *vaddr, bool readonly)
630{
631 struct vfio_iommu_type1_dma_map map = {
632 .argsz = sizeof(map),
633 .flags = VFIO_DMA_MAP_FLAG_READ,
634 .vaddr = (__u64)(uintptr_t)vaddr,
635 .iova = iova,
636 .size = size,
637 };
638
639 if (!readonly) {
640 map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
641 }
642
643
644
645
646
647
648 if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
649 (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
650 ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
651 return 0;
652 }
653
654 error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
655 return -errno;
656}
657
658static void vfio_host_win_add(VFIOContainer *container,
659 hwaddr min_iova, hwaddr max_iova,
660 uint64_t iova_pgsizes)
661{
662 VFIOHostDMAWindow *hostwin;
663
664 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
665 if (ranges_overlap(hostwin->min_iova,
666 hostwin->max_iova - hostwin->min_iova + 1,
667 min_iova,
668 max_iova - min_iova + 1)) {
669 hw_error("%s: Overlapped IOMMU are not enabled", __func__);
670 }
671 }
672
673 hostwin = g_malloc0(sizeof(*hostwin));
674
675 hostwin->min_iova = min_iova;
676 hostwin->max_iova = max_iova;
677 hostwin->iova_pgsizes = iova_pgsizes;
678 QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
679}
680
681static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
682 hwaddr max_iova)
683{
684 VFIOHostDMAWindow *hostwin;
685
686 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
687 if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
688 QLIST_REMOVE(hostwin, hostwin_next);
689 g_free(hostwin);
690 return 0;
691 }
692 }
693
694 return -1;
695}
696
697static bool vfio_listener_skipped_section(MemoryRegionSection *section)
698{
699 return (!memory_region_is_ram(section->mr) &&
700 !memory_region_is_iommu(section->mr)) ||
701 memory_region_is_protected(section->mr) ||
702
703
704
705
706
707
708 section->offset_within_address_space & (1ULL << 63);
709}
710
711
712static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
713 ram_addr_t *ram_addr, bool *read_only)
714{
715 bool ret, mr_has_discard_manager;
716
717 ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
718 &mr_has_discard_manager);
719 if (ret && mr_has_discard_manager) {
720
721
722
723
724
725
726
727
728
729
730
731
732 warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
733 " RAM (e.g., virtio-mem) works, however, malicious"
734 " guests can trigger pinning of more memory than"
735 " intended via an IOMMU. It's possible to mitigate "
736 " by setting/adjusting RLIMIT_MEMLOCK.");
737 }
738 return ret;
739}
740
741static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
742{
743 VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
744 VFIOContainer *container = giommu->container;
745 hwaddr iova = iotlb->iova + giommu->iommu_offset;
746 void *vaddr;
747 int ret;
748
749 trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
750 iova, iova + iotlb->addr_mask);
751
752 if (iotlb->target_as != &address_space_memory) {
753 error_report("Wrong target AS \"%s\", only system memory is allowed",
754 iotlb->target_as->name ? iotlb->target_as->name : "none");
755 vfio_set_migration_error(-EINVAL);
756 return;
757 }
758
759 rcu_read_lock();
760
761 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
762 bool read_only;
763
764 if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
765 goto out;
766 }
767
768
769
770
771
772
773
774 ret = vfio_dma_map(container, iova,
775 iotlb->addr_mask + 1, vaddr,
776 read_only);
777 if (ret) {
778 error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
779 "0x%"HWADDR_PRIx", %p) = %d (%s)",
780 container, iova,
781 iotlb->addr_mask + 1, vaddr, ret, strerror(-ret));
782 }
783 } else {
784 ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
785 if (ret) {
786 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
787 "0x%"HWADDR_PRIx") = %d (%s)",
788 container, iova,
789 iotlb->addr_mask + 1, ret, strerror(-ret));
790 vfio_set_migration_error(ret);
791 }
792 }
793out:
794 rcu_read_unlock();
795}
796
797static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
798 MemoryRegionSection *section)
799{
800 VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
801 listener);
802 const hwaddr size = int128_get64(section->size);
803 const hwaddr iova = section->offset_within_address_space;
804 int ret;
805
806
807 ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
808 if (ret) {
809 error_report("%s: vfio_dma_unmap() failed: %s", __func__,
810 strerror(-ret));
811 }
812}
813
814static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
815 MemoryRegionSection *section)
816{
817 VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
818 listener);
819 const hwaddr end = section->offset_within_region +
820 int128_get64(section->size);
821 hwaddr start, next, iova;
822 void *vaddr;
823 int ret;
824
825
826
827
828
829 for (start = section->offset_within_region; start < end; start = next) {
830 next = ROUND_UP(start + 1, vrdl->granularity);
831 next = MIN(next, end);
832
833 iova = start - section->offset_within_region +
834 section->offset_within_address_space;
835 vaddr = memory_region_get_ram_ptr(section->mr) + start;
836
837 ret = vfio_dma_map(vrdl->container, iova, next - start,
838 vaddr, section->readonly);
839 if (ret) {
840
841 vfio_ram_discard_notify_discard(rdl, section);
842 return ret;
843 }
844 }
845 return 0;
846}
847
848static void vfio_register_ram_discard_listener(VFIOContainer *container,
849 MemoryRegionSection *section)
850{
851 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
852 VFIORamDiscardListener *vrdl;
853
854
855 g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
856 g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
857 TARGET_PAGE_SIZE));
858 g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
859
860 vrdl = g_new0(VFIORamDiscardListener, 1);
861 vrdl->container = container;
862 vrdl->mr = section->mr;
863 vrdl->offset_within_address_space = section->offset_within_address_space;
864 vrdl->size = int128_get64(section->size);
865 vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
866 section->mr);
867
868 g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
869 g_assert(container->pgsizes &&
870 vrdl->granularity >= 1ULL << ctz64(container->pgsizes));
871
872 ram_discard_listener_init(&vrdl->listener,
873 vfio_ram_discard_notify_populate,
874 vfio_ram_discard_notify_discard, true);
875 ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
876 QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891 if (container->dma_max_mappings) {
892 unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
893
894#ifdef CONFIG_KVM
895 if (kvm_enabled()) {
896 max_memslots = kvm_get_max_memslots();
897 }
898#endif
899
900 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
901 hwaddr start, end;
902
903 start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
904 vrdl->granularity);
905 end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
906 vrdl->granularity);
907 vrdl_mappings += (end - start) / vrdl->granularity;
908 vrdl_count++;
909 }
910
911 if (vrdl_mappings + max_memslots - vrdl_count >
912 container->dma_max_mappings) {
913 warn_report("%s: possibly running out of DMA mappings. E.g., try"
914 " increasing the 'block-size' of virtio-mem devies."
915 " Maximum possible DMA mappings: %d, Maximum possible"
916 " memslots: %d", __func__, container->dma_max_mappings,
917 max_memslots);
918 }
919 }
920}
921
922static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
923 MemoryRegionSection *section)
924{
925 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
926 VFIORamDiscardListener *vrdl = NULL;
927
928 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
929 if (vrdl->mr == section->mr &&
930 vrdl->offset_within_address_space ==
931 section->offset_within_address_space) {
932 break;
933 }
934 }
935
936 if (!vrdl) {
937 hw_error("vfio: Trying to unregister missing RAM discard listener");
938 }
939
940 ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
941 QLIST_REMOVE(vrdl, next);
942 g_free(vrdl);
943}
944
945static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
946 hwaddr iova, hwaddr end)
947{
948 VFIOHostDMAWindow *hostwin;
949 bool hostwin_found = false;
950
951 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
952 if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
953 hostwin_found = true;
954 break;
955 }
956 }
957
958 return hostwin_found ? hostwin : NULL;
959}
960
961static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
962{
963 MemoryRegion *mr = section->mr;
964
965 if (!TPM_IS_CRB(mr->owner)) {
966 return false;
967 }
968
969
970 trace_vfio_known_safe_misalignment(memory_region_name(mr),
971 section->offset_within_address_space,
972 section->offset_within_region,
973 qemu_real_host_page_size());
974 return true;
975}
976
977static bool vfio_listener_valid_section(MemoryRegionSection *section,
978 const char *name)
979{
980 if (vfio_listener_skipped_section(section)) {
981 trace_vfio_listener_region_skip(name,
982 section->offset_within_address_space,
983 section->offset_within_address_space +
984 int128_get64(int128_sub(section->size, int128_one())));
985 return false;
986 }
987
988 if (unlikely((section->offset_within_address_space &
989 ~qemu_real_host_page_mask()) !=
990 (section->offset_within_region & ~qemu_real_host_page_mask()))) {
991 if (!vfio_known_safe_misalignment(section)) {
992 error_report("%s received unaligned region %s iova=0x%"PRIx64
993 " offset_within_region=0x%"PRIx64
994 " qemu_real_host_page_size=0x%"PRIxPTR,
995 __func__, memory_region_name(section->mr),
996 section->offset_within_address_space,
997 section->offset_within_region,
998 qemu_real_host_page_size());
999 }
1000 return false;
1001 }
1002
1003 return true;
1004}
1005
1006static bool vfio_get_section_iova_range(VFIOContainer *container,
1007 MemoryRegionSection *section,
1008 hwaddr *out_iova, hwaddr *out_end,
1009 Int128 *out_llend)
1010{
1011 Int128 llend;
1012 hwaddr iova;
1013
1014 iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
1015 llend = int128_make64(section->offset_within_address_space);
1016 llend = int128_add(llend, section->size);
1017 llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask()));
1018
1019 if (int128_ge(int128_make64(iova), llend)) {
1020 return false;
1021 }
1022
1023 *out_iova = iova;
1024 *out_end = int128_get64(int128_sub(llend, int128_one()));
1025 if (out_llend) {
1026 *out_llend = llend;
1027 }
1028 return true;
1029}
1030
1031static void vfio_listener_region_add(MemoryListener *listener,
1032 MemoryRegionSection *section)
1033{
1034 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1035 hwaddr iova, end;
1036 Int128 llend, llsize;
1037 void *vaddr;
1038 int ret;
1039 VFIOHostDMAWindow *hostwin;
1040 Error *err = NULL;
1041
1042 if (!vfio_listener_valid_section(section, "region_add")) {
1043 return;
1044 }
1045
1046 if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
1047 if (memory_region_is_ram_device(section->mr)) {
1048 trace_vfio_listener_region_add_no_dma_map(
1049 memory_region_name(section->mr),
1050 section->offset_within_address_space,
1051 int128_getlo(section->size),
1052 qemu_real_host_page_size());
1053 }
1054 return;
1055 }
1056
1057 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1058 hwaddr pgsize = 0;
1059
1060
1061 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
1062 if (ranges_overlap(hostwin->min_iova,
1063 hostwin->max_iova - hostwin->min_iova + 1,
1064 section->offset_within_address_space,
1065 int128_get64(section->size))) {
1066 error_setg(&err,
1067 "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
1068 "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
1069 section->offset_within_address_space,
1070 section->offset_within_address_space +
1071 int128_get64(section->size) - 1,
1072 hostwin->min_iova, hostwin->max_iova);
1073 goto fail;
1074 }
1075 }
1076
1077 ret = vfio_spapr_create_window(container, section, &pgsize);
1078 if (ret) {
1079 error_setg_errno(&err, -ret, "Failed to create SPAPR window");
1080 goto fail;
1081 }
1082
1083 vfio_host_win_add(container, section->offset_within_address_space,
1084 section->offset_within_address_space +
1085 int128_get64(section->size) - 1, pgsize);
1086#ifdef CONFIG_KVM
1087 if (kvm_enabled()) {
1088 VFIOGroup *group;
1089 IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
1090 struct kvm_vfio_spapr_tce param;
1091 struct kvm_device_attr attr = {
1092 .group = KVM_DEV_VFIO_GROUP,
1093 .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
1094 .addr = (uint64_t)(unsigned long)¶m,
1095 };
1096
1097 if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
1098 ¶m.tablefd)) {
1099 QLIST_FOREACH(group, &container->group_list, container_next) {
1100 param.groupfd = group->fd;
1101 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
1102 error_report("vfio: failed to setup fd %d "
1103 "for a group with fd %d: %s",
1104 param.tablefd, param.groupfd,
1105 strerror(errno));
1106 return;
1107 }
1108 trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
1109 }
1110 }
1111 }
1112#endif
1113 }
1114
1115 hostwin = vfio_find_hostwin(container, iova, end);
1116 if (!hostwin) {
1117 error_setg(&err, "Container %p can't map guest IOVA region"
1118 " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
1119 goto fail;
1120 }
1121
1122 memory_region_ref(section->mr);
1123
1124 if (memory_region_is_iommu(section->mr)) {
1125 VFIOGuestIOMMU *giommu;
1126 IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
1127 int iommu_idx;
1128
1129 trace_vfio_listener_region_add_iommu(iova, end);
1130
1131
1132
1133
1134
1135
1136 giommu = g_malloc0(sizeof(*giommu));
1137 giommu->iommu_mr = iommu_mr;
1138 giommu->iommu_offset = section->offset_within_address_space -
1139 section->offset_within_region;
1140 giommu->container = container;
1141 llend = int128_add(int128_make64(section->offset_within_region),
1142 section->size);
1143 llend = int128_sub(llend, int128_one());
1144 iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
1145 MEMTXATTRS_UNSPECIFIED);
1146 iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
1147 IOMMU_NOTIFIER_IOTLB_EVENTS,
1148 section->offset_within_region,
1149 int128_get64(llend),
1150 iommu_idx);
1151
1152 ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr,
1153 container->pgsizes,
1154 &err);
1155 if (ret) {
1156 g_free(giommu);
1157 goto fail;
1158 }
1159
1160 ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
1161 &err);
1162 if (ret) {
1163 g_free(giommu);
1164 goto fail;
1165 }
1166 QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
1167 memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
1168
1169 return;
1170 }
1171
1172
1173
1174
1175
1176
1177
1178
1179 if (memory_region_has_ram_discard_manager(section->mr)) {
1180 vfio_register_ram_discard_listener(container, section);
1181 return;
1182 }
1183
1184 vaddr = memory_region_get_ram_ptr(section->mr) +
1185 section->offset_within_region +
1186 (iova - section->offset_within_address_space);
1187
1188 trace_vfio_listener_region_add_ram(iova, end, vaddr);
1189
1190 llsize = int128_sub(llend, int128_make64(iova));
1191
1192 if (memory_region_is_ram_device(section->mr)) {
1193 hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
1194
1195 if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
1196 trace_vfio_listener_region_add_no_dma_map(
1197 memory_region_name(section->mr),
1198 section->offset_within_address_space,
1199 int128_getlo(section->size),
1200 pgmask + 1);
1201 return;
1202 }
1203 }
1204
1205 ret = vfio_dma_map(container, iova, int128_get64(llsize),
1206 vaddr, section->readonly);
1207 if (ret) {
1208 error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
1209 "0x%"HWADDR_PRIx", %p) = %d (%s)",
1210 container, iova, int128_get64(llsize), vaddr, ret,
1211 strerror(-ret));
1212 if (memory_region_is_ram_device(section->mr)) {
1213
1214 error_report_err(err);
1215 return;
1216 }
1217 goto fail;
1218 }
1219
1220 return;
1221
1222fail:
1223 if (memory_region_is_ram_device(section->mr)) {
1224 error_report("failed to vfio_dma_map. pci p2p may not work");
1225 return;
1226 }
1227
1228
1229
1230
1231
1232 if (!container->initialized) {
1233 if (!container->error) {
1234 error_propagate_prepend(&container->error, err,
1235 "Region %s: ",
1236 memory_region_name(section->mr));
1237 } else {
1238 error_free(err);
1239 }
1240 } else {
1241 error_report_err(err);
1242 hw_error("vfio: DMA mapping failed, unable to continue");
1243 }
1244}
1245
1246static void vfio_listener_region_del(MemoryListener *listener,
1247 MemoryRegionSection *section)
1248{
1249 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1250 hwaddr iova, end;
1251 Int128 llend, llsize;
1252 int ret;
1253 bool try_unmap = true;
1254
1255 if (!vfio_listener_valid_section(section, "region_del")) {
1256 return;
1257 }
1258
1259 if (memory_region_is_iommu(section->mr)) {
1260 VFIOGuestIOMMU *giommu;
1261
1262 QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
1263 if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
1264 giommu->n.start == section->offset_within_region) {
1265 memory_region_unregister_iommu_notifier(section->mr,
1266 &giommu->n);
1267 QLIST_REMOVE(giommu, giommu_next);
1268 g_free(giommu);
1269 break;
1270 }
1271 }
1272
1273
1274
1275
1276
1277
1278
1279
1280 }
1281
1282 if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
1283 return;
1284 }
1285
1286 llsize = int128_sub(llend, int128_make64(iova));
1287
1288 trace_vfio_listener_region_del(iova, end);
1289
1290 if (memory_region_is_ram_device(section->mr)) {
1291 hwaddr pgmask;
1292 VFIOHostDMAWindow *hostwin;
1293
1294 hostwin = vfio_find_hostwin(container, iova, end);
1295 assert(hostwin);
1296
1297 pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
1298 try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
1299 } else if (memory_region_has_ram_discard_manager(section->mr)) {
1300 vfio_unregister_ram_discard_listener(container, section);
1301
1302 try_unmap = false;
1303 }
1304
1305 if (try_unmap) {
1306 if (int128_eq(llsize, int128_2_64())) {
1307
1308 llsize = int128_rshift(llsize, 1);
1309 ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
1310 if (ret) {
1311 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
1312 "0x%"HWADDR_PRIx") = %d (%s)",
1313 container, iova, int128_get64(llsize), ret,
1314 strerror(-ret));
1315 }
1316 iova += int128_get64(llsize);
1317 }
1318 ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
1319 if (ret) {
1320 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
1321 "0x%"HWADDR_PRIx") = %d (%s)",
1322 container, iova, int128_get64(llsize), ret,
1323 strerror(-ret));
1324 }
1325 }
1326
1327 memory_region_unref(section->mr);
1328
1329 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1330 vfio_spapr_remove_window(container,
1331 section->offset_within_address_space);
1332 if (vfio_host_win_del(container,
1333 section->offset_within_address_space,
1334 section->offset_within_address_space +
1335 int128_get64(section->size) - 1) < 0) {
1336 hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
1337 __func__, section->offset_within_address_space);
1338 }
1339 }
1340}
1341
1342static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
1343{
1344 int ret;
1345 struct vfio_iommu_type1_dirty_bitmap dirty = {
1346 .argsz = sizeof(dirty),
1347 };
1348
1349 if (!container->dirty_pages_supported) {
1350 return 0;
1351 }
1352
1353 if (start) {
1354 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
1355 } else {
1356 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
1357 }
1358
1359 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
1360 if (ret) {
1361 ret = -errno;
1362 error_report("Failed to set dirty tracking flag 0x%x errno: %d",
1363 dirty.flags, errno);
1364 }
1365
1366 return ret;
1367}
1368
1369typedef struct VFIODirtyRanges {
1370 hwaddr min32;
1371 hwaddr max32;
1372 hwaddr min64;
1373 hwaddr max64;
1374} VFIODirtyRanges;
1375
1376typedef struct VFIODirtyRangesListener {
1377 VFIOContainer *container;
1378 VFIODirtyRanges ranges;
1379 MemoryListener listener;
1380} VFIODirtyRangesListener;
1381
1382static void vfio_dirty_tracking_update(MemoryListener *listener,
1383 MemoryRegionSection *section)
1384{
1385 VFIODirtyRangesListener *dirty = container_of(listener,
1386 VFIODirtyRangesListener,
1387 listener);
1388 VFIODirtyRanges *range = &dirty->ranges;
1389 hwaddr iova, end, *min, *max;
1390
1391 if (!vfio_listener_valid_section(section, "tracking_update") ||
1392 !vfio_get_section_iova_range(dirty->container, section,
1393 &iova, &end, NULL)) {
1394 return;
1395 }
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
1409 max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
1410
1411 if (*min > iova) {
1412 *min = iova;
1413 }
1414 if (*max < end) {
1415 *max = end;
1416 }
1417
1418 trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
1419 return;
1420}
1421
1422static const MemoryListener vfio_dirty_tracking_listener = {
1423 .name = "vfio-tracking",
1424 .region_add = vfio_dirty_tracking_update,
1425};
1426
1427static void vfio_dirty_tracking_init(VFIOContainer *container,
1428 VFIODirtyRanges *ranges)
1429{
1430 VFIODirtyRangesListener dirty;
1431
1432 memset(&dirty, 0, sizeof(dirty));
1433 dirty.ranges.min32 = UINT32_MAX;
1434 dirty.ranges.min64 = UINT64_MAX;
1435 dirty.listener = vfio_dirty_tracking_listener;
1436 dirty.container = container;
1437
1438 memory_listener_register(&dirty.listener,
1439 container->space->as);
1440
1441 *ranges = dirty.ranges;
1442
1443
1444
1445
1446
1447
1448 memory_listener_unregister(&dirty.listener);
1449}
1450
1451static void vfio_devices_dma_logging_stop(VFIOContainer *container)
1452{
1453 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
1454 sizeof(uint64_t))] = {};
1455 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
1456 VFIODevice *vbasedev;
1457 VFIOGroup *group;
1458
1459 feature->argsz = sizeof(buf);
1460 feature->flags = VFIO_DEVICE_FEATURE_SET |
1461 VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
1462
1463 QLIST_FOREACH(group, &container->group_list, container_next) {
1464 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1465 if (!vbasedev->dirty_tracking) {
1466 continue;
1467 }
1468
1469 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
1470 warn_report("%s: Failed to stop DMA logging, err %d (%s)",
1471 vbasedev->name, -errno, strerror(errno));
1472 }
1473 vbasedev->dirty_tracking = false;
1474 }
1475 }
1476}
1477
1478static struct vfio_device_feature *
1479vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
1480 VFIODirtyRanges *tracking)
1481{
1482 struct vfio_device_feature *feature;
1483 size_t feature_size;
1484 struct vfio_device_feature_dma_logging_control *control;
1485 struct vfio_device_feature_dma_logging_range *ranges;
1486
1487 feature_size = sizeof(struct vfio_device_feature) +
1488 sizeof(struct vfio_device_feature_dma_logging_control);
1489 feature = g_try_malloc0(feature_size);
1490 if (!feature) {
1491 errno = ENOMEM;
1492 return NULL;
1493 }
1494 feature->argsz = feature_size;
1495 feature->flags = VFIO_DEVICE_FEATURE_SET |
1496 VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
1497
1498 control = (struct vfio_device_feature_dma_logging_control *)feature->data;
1499 control->page_size = qemu_real_host_page_size();
1500
1501
1502
1503
1504
1505 control->num_ranges = !!tracking->max32 + !!tracking->max64;
1506 ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
1507 control->num_ranges);
1508 if (!ranges) {
1509 g_free(feature);
1510 errno = ENOMEM;
1511
1512 return NULL;
1513 }
1514
1515 control->ranges = (__u64)(uintptr_t)ranges;
1516 if (tracking->max32) {
1517 ranges->iova = tracking->min32;
1518 ranges->length = (tracking->max32 - tracking->min32) + 1;
1519 ranges++;
1520 }
1521 if (tracking->max64) {
1522 ranges->iova = tracking->min64;
1523 ranges->length = (tracking->max64 - tracking->min64) + 1;
1524 }
1525
1526 trace_vfio_device_dirty_tracking_start(control->num_ranges,
1527 tracking->min32, tracking->max32,
1528 tracking->min64, tracking->max64);
1529
1530 return feature;
1531}
1532
1533static void vfio_device_feature_dma_logging_start_destroy(
1534 struct vfio_device_feature *feature)
1535{
1536 struct vfio_device_feature_dma_logging_control *control =
1537 (struct vfio_device_feature_dma_logging_control *)feature->data;
1538 struct vfio_device_feature_dma_logging_range *ranges =
1539 (struct vfio_device_feature_dma_logging_range *)(uintptr_t)control->ranges;
1540
1541 g_free(ranges);
1542 g_free(feature);
1543}
1544
1545static int vfio_devices_dma_logging_start(VFIOContainer *container)
1546{
1547 struct vfio_device_feature *feature;
1548 VFIODirtyRanges ranges;
1549 VFIODevice *vbasedev;
1550 VFIOGroup *group;
1551 int ret = 0;
1552
1553 vfio_dirty_tracking_init(container, &ranges);
1554 feature = vfio_device_feature_dma_logging_start_create(container,
1555 &ranges);
1556 if (!feature) {
1557 return -errno;
1558 }
1559
1560 QLIST_FOREACH(group, &container->group_list, container_next) {
1561 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1562 if (vbasedev->dirty_tracking) {
1563 continue;
1564 }
1565
1566 ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
1567 if (ret) {
1568 ret = -errno;
1569 error_report("%s: Failed to start DMA logging, err %d (%s)",
1570 vbasedev->name, ret, strerror(errno));
1571 goto out;
1572 }
1573 vbasedev->dirty_tracking = true;
1574 }
1575 }
1576
1577out:
1578 if (ret) {
1579 vfio_devices_dma_logging_stop(container);
1580 }
1581
1582 vfio_device_feature_dma_logging_start_destroy(feature);
1583
1584 return ret;
1585}
1586
1587static void vfio_listener_log_global_start(MemoryListener *listener)
1588{
1589 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1590 int ret;
1591
1592 if (vfio_devices_all_device_dirty_tracking(container)) {
1593 ret = vfio_devices_dma_logging_start(container);
1594 } else {
1595 ret = vfio_set_dirty_page_tracking(container, true);
1596 }
1597
1598 if (ret) {
1599 error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
1600 ret, strerror(-ret));
1601 vfio_set_migration_error(ret);
1602 }
1603}
1604
1605static void vfio_listener_log_global_stop(MemoryListener *listener)
1606{
1607 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1608 int ret = 0;
1609
1610 if (vfio_devices_all_device_dirty_tracking(container)) {
1611 vfio_devices_dma_logging_stop(container);
1612 } else {
1613 ret = vfio_set_dirty_page_tracking(container, false);
1614 }
1615
1616 if (ret) {
1617 error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
1618 ret, strerror(-ret));
1619 vfio_set_migration_error(ret);
1620 }
1621}
1622
1623static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
1624 hwaddr size, void *bitmap)
1625{
1626 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
1627 sizeof(struct vfio_device_feature_dma_logging_report),
1628 sizeof(__u64))] = {};
1629 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
1630 struct vfio_device_feature_dma_logging_report *report =
1631 (struct vfio_device_feature_dma_logging_report *)feature->data;
1632
1633 report->iova = iova;
1634 report->length = size;
1635 report->page_size = qemu_real_host_page_size();
1636 report->bitmap = (__u64)(uintptr_t)bitmap;
1637
1638 feature->argsz = sizeof(buf);
1639 feature->flags = VFIO_DEVICE_FEATURE_GET |
1640 VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
1641
1642 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
1643 return -errno;
1644 }
1645
1646 return 0;
1647}
1648
1649static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
1650 VFIOBitmap *vbmap, hwaddr iova,
1651 hwaddr size)
1652{
1653 VFIODevice *vbasedev;
1654 VFIOGroup *group;
1655 int ret;
1656
1657 QLIST_FOREACH(group, &container->group_list, container_next) {
1658 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1659 ret = vfio_device_dma_logging_report(vbasedev, iova, size,
1660 vbmap->bitmap);
1661 if (ret) {
1662 error_report("%s: Failed to get DMA logging report, iova: "
1663 "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
1664 ", err: %d (%s)",
1665 vbasedev->name, iova, size, ret, strerror(-ret));
1666
1667 return ret;
1668 }
1669 }
1670 }
1671
1672 return 0;
1673}
1674
1675static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
1676 hwaddr iova, hwaddr size)
1677{
1678 struct vfio_iommu_type1_dirty_bitmap *dbitmap;
1679 struct vfio_iommu_type1_dirty_bitmap_get *range;
1680 int ret;
1681
1682 dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
1683
1684 dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
1685 dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
1686 range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
1687 range->iova = iova;
1688 range->size = size;
1689
1690
1691
1692
1693
1694
1695 range->bitmap.pgsize = qemu_real_host_page_size();
1696 range->bitmap.size = vbmap->size;
1697 range->bitmap.data = (__u64 *)vbmap->bitmap;
1698
1699 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
1700 if (ret) {
1701 ret = -errno;
1702 error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
1703 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
1704 (uint64_t)range->size, errno);
1705 }
1706
1707 g_free(dbitmap);
1708
1709 return ret;
1710}
1711
1712static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
1713 uint64_t size, ram_addr_t ram_addr)
1714{
1715 bool all_device_dirty_tracking =
1716 vfio_devices_all_device_dirty_tracking(container);
1717 uint64_t dirty_pages;
1718 VFIOBitmap vbmap;
1719 int ret;
1720
1721 if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
1722 cpu_physical_memory_set_dirty_range(ram_addr, size,
1723 tcg_enabled() ? DIRTY_CLIENTS_ALL :
1724 DIRTY_CLIENTS_NOCODE);
1725 return 0;
1726 }
1727
1728 ret = vfio_bitmap_alloc(&vbmap, size);
1729 if (ret) {
1730 return ret;
1731 }
1732
1733 if (all_device_dirty_tracking) {
1734 ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
1735 } else {
1736 ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
1737 }
1738
1739 if (ret) {
1740 goto out;
1741 }
1742
1743 dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
1744 vbmap.pages);
1745
1746 trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size,
1747 ram_addr, dirty_pages);
1748out:
1749 g_free(vbmap.bitmap);
1750
1751 return ret;
1752}
1753
1754typedef struct {
1755 IOMMUNotifier n;
1756 VFIOGuestIOMMU *giommu;
1757} vfio_giommu_dirty_notifier;
1758
1759static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
1760{
1761 vfio_giommu_dirty_notifier *gdn = container_of(n,
1762 vfio_giommu_dirty_notifier, n);
1763 VFIOGuestIOMMU *giommu = gdn->giommu;
1764 VFIOContainer *container = giommu->container;
1765 hwaddr iova = iotlb->iova + giommu->iommu_offset;
1766 ram_addr_t translated_addr;
1767 int ret = -EINVAL;
1768
1769 trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
1770
1771 if (iotlb->target_as != &address_space_memory) {
1772 error_report("Wrong target AS \"%s\", only system memory is allowed",
1773 iotlb->target_as->name ? iotlb->target_as->name : "none");
1774 goto out;
1775 }
1776
1777 rcu_read_lock();
1778 if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
1779 ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
1780 translated_addr);
1781 if (ret) {
1782 error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
1783 "0x%"HWADDR_PRIx") = %d (%s)",
1784 container, iova, iotlb->addr_mask + 1, ret,
1785 strerror(-ret));
1786 }
1787 }
1788 rcu_read_unlock();
1789
1790out:
1791 if (ret) {
1792 vfio_set_migration_error(ret);
1793 }
1794}
1795
1796static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
1797 void *opaque)
1798{
1799 const hwaddr size = int128_get64(section->size);
1800 const hwaddr iova = section->offset_within_address_space;
1801 const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
1802 section->offset_within_region;
1803 VFIORamDiscardListener *vrdl = opaque;
1804
1805
1806
1807
1808
1809 return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
1810}
1811
1812static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
1813 MemoryRegionSection *section)
1814{
1815 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
1816 VFIORamDiscardListener *vrdl = NULL;
1817
1818 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
1819 if (vrdl->mr == section->mr &&
1820 vrdl->offset_within_address_space ==
1821 section->offset_within_address_space) {
1822 break;
1823 }
1824 }
1825
1826 if (!vrdl) {
1827 hw_error("vfio: Trying to sync missing RAM discard listener");
1828 }
1829
1830
1831
1832
1833
1834 return ram_discard_manager_replay_populated(rdm, section,
1835 vfio_ram_discard_get_dirty_bitmap,
1836 &vrdl);
1837}
1838
1839static int vfio_sync_dirty_bitmap(VFIOContainer *container,
1840 MemoryRegionSection *section)
1841{
1842 ram_addr_t ram_addr;
1843
1844 if (memory_region_is_iommu(section->mr)) {
1845 VFIOGuestIOMMU *giommu;
1846
1847 QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
1848 if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
1849 giommu->n.start == section->offset_within_region) {
1850 Int128 llend;
1851 vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
1852 int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
1853 MEMTXATTRS_UNSPECIFIED);
1854
1855 llend = int128_add(int128_make64(section->offset_within_region),
1856 section->size);
1857 llend = int128_sub(llend, int128_one());
1858
1859 iommu_notifier_init(&gdn.n,
1860 vfio_iommu_map_dirty_notify,
1861 IOMMU_NOTIFIER_MAP,
1862 section->offset_within_region,
1863 int128_get64(llend),
1864 idx);
1865 memory_region_iommu_replay(giommu->iommu_mr, &gdn.n);
1866 break;
1867 }
1868 }
1869 return 0;
1870 } else if (memory_region_has_ram_discard_manager(section->mr)) {
1871 return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
1872 }
1873
1874 ram_addr = memory_region_get_ram_addr(section->mr) +
1875 section->offset_within_region;
1876
1877 return vfio_get_dirty_bitmap(container,
1878 REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
1879 int128_get64(section->size), ram_addr);
1880}
1881
1882static void vfio_listener_log_sync(MemoryListener *listener,
1883 MemoryRegionSection *section)
1884{
1885 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1886 int ret;
1887
1888 if (vfio_listener_skipped_section(section)) {
1889 return;
1890 }
1891
1892 if (vfio_devices_all_dirty_tracking(container)) {
1893 ret = vfio_sync_dirty_bitmap(container, section);
1894 if (ret) {
1895 error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
1896 strerror(-ret));
1897 vfio_set_migration_error(ret);
1898 }
1899 }
1900}
1901
1902static const MemoryListener vfio_memory_listener = {
1903 .name = "vfio",
1904 .region_add = vfio_listener_region_add,
1905 .region_del = vfio_listener_region_del,
1906 .log_global_start = vfio_listener_log_global_start,
1907 .log_global_stop = vfio_listener_log_global_stop,
1908 .log_sync = vfio_listener_log_sync,
1909};
1910
1911static void vfio_listener_release(VFIOContainer *container)
1912{
1913 memory_listener_unregister(&container->listener);
1914 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1915 memory_listener_unregister(&container->prereg_listener);
1916 }
1917}
1918
1919static struct vfio_info_cap_header *
1920vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id)
1921{
1922 struct vfio_info_cap_header *hdr;
1923
1924 for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
1925 if (hdr->id == id) {
1926 return hdr;
1927 }
1928 }
1929
1930 return NULL;
1931}
1932
1933struct vfio_info_cap_header *
1934vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
1935{
1936 if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
1937 return NULL;
1938 }
1939
1940 return vfio_get_cap((void *)info, info->cap_offset, id);
1941}
1942
1943static struct vfio_info_cap_header *
1944vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
1945{
1946 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
1947 return NULL;
1948 }
1949
1950 return vfio_get_cap((void *)info, info->cap_offset, id);
1951}
1952
1953struct vfio_info_cap_header *
1954vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id)
1955{
1956 if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) {
1957 return NULL;
1958 }
1959
1960 return vfio_get_cap((void *)info, info->cap_offset, id);
1961}
1962
1963bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
1964 unsigned int *avail)
1965{
1966 struct vfio_info_cap_header *hdr;
1967 struct vfio_iommu_type1_info_dma_avail *cap;
1968
1969
1970 hdr = vfio_get_iommu_type1_info_cap(info,
1971 VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
1972 if (hdr == NULL) {
1973 return false;
1974 }
1975
1976 if (avail != NULL) {
1977 cap = (void *) hdr;
1978 *avail = cap->avail;
1979 }
1980
1981 return true;
1982}
1983
1984static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
1985 struct vfio_region_info *info)
1986{
1987 struct vfio_info_cap_header *hdr;
1988 struct vfio_region_info_cap_sparse_mmap *sparse;
1989 int i, j;
1990
1991 hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
1992 if (!hdr) {
1993 return -ENODEV;
1994 }
1995
1996 sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
1997
1998 trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
1999 region->nr, sparse->nr_areas);
2000
2001 region->mmaps = g_new0(VFIOMmap, sparse->nr_areas);
2002
2003 for (i = 0, j = 0; i < sparse->nr_areas; i++) {
2004 if (sparse->areas[i].size) {
2005 trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
2006 sparse->areas[i].offset +
2007 sparse->areas[i].size - 1);
2008 region->mmaps[j].offset = sparse->areas[i].offset;
2009 region->mmaps[j].size = sparse->areas[i].size;
2010 j++;
2011 }
2012 }
2013
2014 region->nr_mmaps = j;
2015 region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap));
2016
2017 return 0;
2018}
2019
2020int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
2021 int index, const char *name)
2022{
2023 struct vfio_region_info *info;
2024 int ret;
2025
2026 ret = vfio_get_region_info(vbasedev, index, &info);
2027 if (ret) {
2028 return ret;
2029 }
2030
2031 region->vbasedev = vbasedev;
2032 region->flags = info->flags;
2033 region->size = info->size;
2034 region->fd_offset = info->offset;
2035 region->nr = index;
2036
2037 if (region->size) {
2038 region->mem = g_new0(MemoryRegion, 1);
2039 memory_region_init_io(region->mem, obj, &vfio_region_ops,
2040 region, name, region->size);
2041
2042 if (!vbasedev->no_mmap &&
2043 region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
2044
2045 ret = vfio_setup_region_sparse_mmaps(region, info);
2046
2047 if (ret) {
2048 region->nr_mmaps = 1;
2049 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
2050 region->mmaps[0].offset = 0;
2051 region->mmaps[0].size = region->size;
2052 }
2053 }
2054 }
2055
2056 g_free(info);
2057
2058 trace_vfio_region_setup(vbasedev->name, index, name,
2059 region->flags, region->fd_offset, region->size);
2060 return 0;
2061}
2062
2063static void vfio_subregion_unmap(VFIORegion *region, int index)
2064{
2065 trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem),
2066 region->mmaps[index].offset,
2067 region->mmaps[index].offset +
2068 region->mmaps[index].size - 1);
2069 memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem);
2070 munmap(region->mmaps[index].mmap, region->mmaps[index].size);
2071 object_unparent(OBJECT(®ion->mmaps[index].mem));
2072 region->mmaps[index].mmap = NULL;
2073}
2074
2075int vfio_region_mmap(VFIORegion *region)
2076{
2077 int i, prot = 0;
2078 char *name;
2079
2080 if (!region->mem) {
2081 return 0;
2082 }
2083
2084 prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
2085 prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
2086
2087 for (i = 0; i < region->nr_mmaps; i++) {
2088 region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
2089 MAP_SHARED, region->vbasedev->fd,
2090 region->fd_offset +
2091 region->mmaps[i].offset);
2092 if (region->mmaps[i].mmap == MAP_FAILED) {
2093 int ret = -errno;
2094
2095 trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
2096 region->fd_offset +
2097 region->mmaps[i].offset,
2098 region->fd_offset +
2099 region->mmaps[i].offset +
2100 region->mmaps[i].size - 1, ret);
2101
2102 region->mmaps[i].mmap = NULL;
2103
2104 for (i--; i >= 0; i--) {
2105 vfio_subregion_unmap(region, i);
2106 }
2107
2108 return ret;
2109 }
2110
2111 name = g_strdup_printf("%s mmaps[%d]",
2112 memory_region_name(region->mem), i);
2113 memory_region_init_ram_device_ptr(®ion->mmaps[i].mem,
2114 memory_region_owner(region->mem),
2115 name, region->mmaps[i].size,
2116 region->mmaps[i].mmap);
2117 g_free(name);
2118 memory_region_add_subregion(region->mem, region->mmaps[i].offset,
2119 ®ion->mmaps[i].mem);
2120
2121 trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem),
2122 region->mmaps[i].offset,
2123 region->mmaps[i].offset +
2124 region->mmaps[i].size - 1);
2125 }
2126
2127 return 0;
2128}
2129
2130void vfio_region_unmap(VFIORegion *region)
2131{
2132 int i;
2133
2134 if (!region->mem) {
2135 return;
2136 }
2137
2138 for (i = 0; i < region->nr_mmaps; i++) {
2139 if (region->mmaps[i].mmap) {
2140 vfio_subregion_unmap(region, i);
2141 }
2142 }
2143}
2144
2145void vfio_region_exit(VFIORegion *region)
2146{
2147 int i;
2148
2149 if (!region->mem) {
2150 return;
2151 }
2152
2153 for (i = 0; i < region->nr_mmaps; i++) {
2154 if (region->mmaps[i].mmap) {
2155 memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem);
2156 }
2157 }
2158
2159 trace_vfio_region_exit(region->vbasedev->name, region->nr);
2160}
2161
2162void vfio_region_finalize(VFIORegion *region)
2163{
2164 int i;
2165
2166 if (!region->mem) {
2167 return;
2168 }
2169
2170 for (i = 0; i < region->nr_mmaps; i++) {
2171 if (region->mmaps[i].mmap) {
2172 munmap(region->mmaps[i].mmap, region->mmaps[i].size);
2173 object_unparent(OBJECT(®ion->mmaps[i].mem));
2174 }
2175 }
2176
2177 object_unparent(OBJECT(region->mem));
2178
2179 g_free(region->mem);
2180 g_free(region->mmaps);
2181
2182 trace_vfio_region_finalize(region->vbasedev->name, region->nr);
2183
2184 region->mem = NULL;
2185 region->mmaps = NULL;
2186 region->nr_mmaps = 0;
2187 region->size = 0;
2188 region->flags = 0;
2189 region->nr = 0;
2190}
2191
2192void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
2193{
2194 int i;
2195
2196 if (!region->mem) {
2197 return;
2198 }
2199
2200 for (i = 0; i < region->nr_mmaps; i++) {
2201 if (region->mmaps[i].mmap) {
2202 memory_region_set_enabled(®ion->mmaps[i].mem, enabled);
2203 }
2204 }
2205
2206 trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
2207 enabled);
2208}
2209
2210void vfio_reset_handler(void *opaque)
2211{
2212 VFIOGroup *group;
2213 VFIODevice *vbasedev;
2214
2215 QLIST_FOREACH(group, &vfio_group_list, next) {
2216 QLIST_FOREACH(vbasedev, &group->device_list, next) {
2217 if (vbasedev->dev->realized) {
2218 vbasedev->ops->vfio_compute_needs_reset(vbasedev);
2219 }
2220 }
2221 }
2222
2223 QLIST_FOREACH(group, &vfio_group_list, next) {
2224 QLIST_FOREACH(vbasedev, &group->device_list, next) {
2225 if (vbasedev->dev->realized && vbasedev->needs_reset) {
2226 vbasedev->ops->vfio_hot_reset_multi(vbasedev);
2227 }
2228 }
2229 }
2230}
2231
2232static void vfio_kvm_device_add_group(VFIOGroup *group)
2233{
2234#ifdef CONFIG_KVM
2235 struct kvm_device_attr attr = {
2236 .group = KVM_DEV_VFIO_GROUP,
2237 .attr = KVM_DEV_VFIO_GROUP_ADD,
2238 .addr = (uint64_t)(unsigned long)&group->fd,
2239 };
2240
2241 if (!kvm_enabled()) {
2242 return;
2243 }
2244
2245 if (vfio_kvm_device_fd < 0) {
2246 struct kvm_create_device cd = {
2247 .type = KVM_DEV_TYPE_VFIO,
2248 };
2249
2250 if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
2251 error_report("Failed to create KVM VFIO device: %m");
2252 return;
2253 }
2254
2255 vfio_kvm_device_fd = cd.fd;
2256 }
2257
2258 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
2259 error_report("Failed to add group %d to KVM VFIO device: %m",
2260 group->groupid);
2261 }
2262#endif
2263}
2264
2265static void vfio_kvm_device_del_group(VFIOGroup *group)
2266{
2267#ifdef CONFIG_KVM
2268 struct kvm_device_attr attr = {
2269 .group = KVM_DEV_VFIO_GROUP,
2270 .attr = KVM_DEV_VFIO_GROUP_DEL,
2271 .addr = (uint64_t)(unsigned long)&group->fd,
2272 };
2273
2274 if (vfio_kvm_device_fd < 0) {
2275 return;
2276 }
2277
2278 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
2279 error_report("Failed to remove group %d from KVM VFIO device: %m",
2280 group->groupid);
2281 }
2282#endif
2283}
2284
2285static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
2286{
2287 VFIOAddressSpace *space;
2288
2289 QLIST_FOREACH(space, &vfio_address_spaces, list) {
2290 if (space->as == as) {
2291 return space;
2292 }
2293 }
2294
2295
2296 space = g_malloc0(sizeof(*space));
2297 space->as = as;
2298 QLIST_INIT(&space->containers);
2299
2300 QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
2301
2302 return space;
2303}
2304
2305static void vfio_put_address_space(VFIOAddressSpace *space)
2306{
2307 if (QLIST_EMPTY(&space->containers)) {
2308 QLIST_REMOVE(space, list);
2309 g_free(space);
2310 }
2311}
2312
2313
2314
2315
2316static int vfio_get_iommu_type(VFIOContainer *container,
2317 Error **errp)
2318{
2319 int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
2320 VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
2321 int i;
2322
2323 for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
2324 if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
2325 return iommu_types[i];
2326 }
2327 }
2328 error_setg(errp, "No available IOMMU models");
2329 return -EINVAL;
2330}
2331
2332static int vfio_init_container(VFIOContainer *container, int group_fd,
2333 Error **errp)
2334{
2335 int iommu_type, ret;
2336
2337 iommu_type = vfio_get_iommu_type(container, errp);
2338 if (iommu_type < 0) {
2339 return iommu_type;
2340 }
2341
2342 ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
2343 if (ret) {
2344 error_setg_errno(errp, errno, "Failed to set group container");
2345 return -errno;
2346 }
2347
2348 while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
2349 if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
2350
2351
2352
2353
2354
2355
2356 iommu_type = VFIO_SPAPR_TCE_IOMMU;
2357 continue;
2358 }
2359 error_setg_errno(errp, errno, "Failed to set iommu for container");
2360 return -errno;
2361 }
2362
2363 container->iommu_type = iommu_type;
2364 return 0;
2365}
2366
2367static int vfio_get_iommu_info(VFIOContainer *container,
2368 struct vfio_iommu_type1_info **info)
2369{
2370
2371 size_t argsz = sizeof(struct vfio_iommu_type1_info);
2372
2373 *info = g_new0(struct vfio_iommu_type1_info, 1);
2374again:
2375 (*info)->argsz = argsz;
2376
2377 if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
2378 g_free(*info);
2379 *info = NULL;
2380 return -errno;
2381 }
2382
2383 if (((*info)->argsz > argsz)) {
2384 argsz = (*info)->argsz;
2385 *info = g_realloc(*info, argsz);
2386 goto again;
2387 }
2388
2389 return 0;
2390}
2391
2392static struct vfio_info_cap_header *
2393vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
2394{
2395 struct vfio_info_cap_header *hdr;
2396 void *ptr = info;
2397
2398 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
2399 return NULL;
2400 }
2401
2402 for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
2403 if (hdr->id == id) {
2404 return hdr;
2405 }
2406 }
2407
2408 return NULL;
2409}
2410
2411static void vfio_get_iommu_info_migration(VFIOContainer *container,
2412 struct vfio_iommu_type1_info *info)
2413{
2414 struct vfio_info_cap_header *hdr;
2415 struct vfio_iommu_type1_info_cap_migration *cap_mig;
2416
2417 hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
2418 if (!hdr) {
2419 return;
2420 }
2421
2422 cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
2423 header);
2424
2425
2426
2427
2428
2429 if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
2430 container->dirty_pages_supported = true;
2431 container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
2432 container->dirty_pgsizes = cap_mig->pgsize_bitmap;
2433 }
2434}
2435
2436static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
2437 Error **errp)
2438{
2439 VFIOContainer *container;
2440 int ret, fd;
2441 VFIOAddressSpace *space;
2442
2443 space = vfio_get_address_space(as);
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476 QLIST_FOREACH(container, &space->containers, next) {
2477 if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
2478 ret = vfio_ram_block_discard_disable(container, true);
2479 if (ret) {
2480 error_setg_errno(errp, -ret,
2481 "Cannot set discarding of RAM broken");
2482 if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
2483 &container->fd)) {
2484 error_report("vfio: error disconnecting group %d from"
2485 " container", group->groupid);
2486 }
2487 return ret;
2488 }
2489 group->container = container;
2490 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
2491 vfio_kvm_device_add_group(group);
2492 return 0;
2493 }
2494 }
2495
2496 fd = qemu_open_old("/dev/vfio/vfio", O_RDWR);
2497 if (fd < 0) {
2498 error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio");
2499 ret = -errno;
2500 goto put_space_exit;
2501 }
2502
2503 ret = ioctl(fd, VFIO_GET_API_VERSION);
2504 if (ret != VFIO_API_VERSION) {
2505 error_setg(errp, "supported vfio version: %d, "
2506 "reported version: %d", VFIO_API_VERSION, ret);
2507 ret = -EINVAL;
2508 goto close_fd_exit;
2509 }
2510
2511 container = g_malloc0(sizeof(*container));
2512 container->space = space;
2513 container->fd = fd;
2514 container->error = NULL;
2515 container->dirty_pages_supported = false;
2516 container->dma_max_mappings = 0;
2517 QLIST_INIT(&container->giommu_list);
2518 QLIST_INIT(&container->hostwin_list);
2519 QLIST_INIT(&container->vrdl_list);
2520
2521 ret = vfio_init_container(container, group->fd, errp);
2522 if (ret) {
2523 goto free_container_exit;
2524 }
2525
2526 ret = vfio_ram_block_discard_disable(container, true);
2527 if (ret) {
2528 error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
2529 goto free_container_exit;
2530 }
2531
2532 switch (container->iommu_type) {
2533 case VFIO_TYPE1v2_IOMMU:
2534 case VFIO_TYPE1_IOMMU:
2535 {
2536 struct vfio_iommu_type1_info *info;
2537
2538 ret = vfio_get_iommu_info(container, &info);
2539 if (ret) {
2540 error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
2541 goto enable_discards_exit;
2542 }
2543
2544 if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
2545 container->pgsizes = info->iova_pgsizes;
2546 } else {
2547 container->pgsizes = qemu_real_host_page_size();
2548 }
2549
2550 if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
2551 container->dma_max_mappings = 65535;
2552 }
2553 vfio_get_iommu_info_migration(container, info);
2554 g_free(info);
2555
2556
2557
2558
2559
2560
2561 vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes);
2562
2563 break;
2564 }
2565 case VFIO_SPAPR_TCE_v2_IOMMU:
2566 case VFIO_SPAPR_TCE_IOMMU:
2567 {
2568 struct vfio_iommu_spapr_tce_info info;
2569 bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
2570
2571
2572
2573
2574
2575
2576 if (!v2) {
2577 ret = ioctl(fd, VFIO_IOMMU_ENABLE);
2578 if (ret) {
2579 error_setg_errno(errp, errno, "failed to enable container");
2580 ret = -errno;
2581 goto enable_discards_exit;
2582 }
2583 } else {
2584 container->prereg_listener = vfio_prereg_listener;
2585
2586 memory_listener_register(&container->prereg_listener,
2587 &address_space_memory);
2588 if (container->error) {
2589 memory_listener_unregister(&container->prereg_listener);
2590 ret = -1;
2591 error_propagate_prepend(errp, container->error,
2592 "RAM memory listener initialization failed: ");
2593 goto enable_discards_exit;
2594 }
2595 }
2596
2597 info.argsz = sizeof(info);
2598 ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
2599 if (ret) {
2600 error_setg_errno(errp, errno,
2601 "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
2602 ret = -errno;
2603 if (v2) {
2604 memory_listener_unregister(&container->prereg_listener);
2605 }
2606 goto enable_discards_exit;
2607 }
2608
2609 if (v2) {
2610 container->pgsizes = info.ddw.pgsizes;
2611
2612
2613
2614
2615
2616
2617 ret = vfio_spapr_remove_window(container, info.dma32_window_start);
2618 if (ret) {
2619 error_setg_errno(errp, -ret,
2620 "failed to remove existing window");
2621 goto enable_discards_exit;
2622 }
2623 } else {
2624
2625 container->pgsizes = 0x1000;
2626 vfio_host_win_add(container, info.dma32_window_start,
2627 info.dma32_window_start +
2628 info.dma32_window_size - 1,
2629 0x1000);
2630 }
2631 }
2632 }
2633
2634 vfio_kvm_device_add_group(group);
2635
2636 QLIST_INIT(&container->group_list);
2637 QLIST_INSERT_HEAD(&space->containers, container, next);
2638
2639 group->container = container;
2640 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
2641
2642 container->listener = vfio_memory_listener;
2643
2644 memory_listener_register(&container->listener, container->space->as);
2645
2646 if (container->error) {
2647 ret = -1;
2648 error_propagate_prepend(errp, container->error,
2649 "memory listener initialization failed: ");
2650 goto listener_release_exit;
2651 }
2652
2653 container->initialized = true;
2654
2655 return 0;
2656listener_release_exit:
2657 QLIST_REMOVE(group, container_next);
2658 QLIST_REMOVE(container, next);
2659 vfio_kvm_device_del_group(group);
2660 vfio_listener_release(container);
2661
2662enable_discards_exit:
2663 vfio_ram_block_discard_disable(container, false);
2664
2665free_container_exit:
2666 g_free(container);
2667
2668close_fd_exit:
2669 close(fd);
2670
2671put_space_exit:
2672 vfio_put_address_space(space);
2673
2674 return ret;
2675}
2676
2677static void vfio_disconnect_container(VFIOGroup *group)
2678{
2679 VFIOContainer *container = group->container;
2680
2681 QLIST_REMOVE(group, container_next);
2682 group->container = NULL;
2683
2684
2685
2686
2687
2688
2689 if (QLIST_EMPTY(&container->group_list)) {
2690 vfio_listener_release(container);
2691 }
2692
2693 if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
2694 error_report("vfio: error disconnecting group %d from container",
2695 group->groupid);
2696 }
2697
2698 if (QLIST_EMPTY(&container->group_list)) {
2699 VFIOAddressSpace *space = container->space;
2700 VFIOGuestIOMMU *giommu, *tmp;
2701 VFIOHostDMAWindow *hostwin, *next;
2702
2703 QLIST_REMOVE(container, next);
2704
2705 QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
2706 memory_region_unregister_iommu_notifier(
2707 MEMORY_REGION(giommu->iommu_mr), &giommu->n);
2708 QLIST_REMOVE(giommu, giommu_next);
2709 g_free(giommu);
2710 }
2711
2712 QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
2713 next) {
2714 QLIST_REMOVE(hostwin, hostwin_next);
2715 g_free(hostwin);
2716 }
2717
2718 trace_vfio_disconnect_container(container->fd);
2719 close(container->fd);
2720 g_free(container);
2721
2722 vfio_put_address_space(space);
2723 }
2724}
2725
2726VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
2727{
2728 VFIOGroup *group;
2729 char path[32];
2730 struct vfio_group_status status = { .argsz = sizeof(status) };
2731
2732 QLIST_FOREACH(group, &vfio_group_list, next) {
2733 if (group->groupid == groupid) {
2734
2735 if (group->container->space->as == as) {
2736 return group;
2737 } else {
2738 error_setg(errp, "group %d used in multiple address spaces",
2739 group->groupid);
2740 return NULL;
2741 }
2742 }
2743 }
2744
2745 group = g_malloc0(sizeof(*group));
2746
2747 snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
2748 group->fd = qemu_open_old(path, O_RDWR);
2749 if (group->fd < 0) {
2750 error_setg_errno(errp, errno, "failed to open %s", path);
2751 goto free_group_exit;
2752 }
2753
2754 if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
2755 error_setg_errno(errp, errno, "failed to get group %d status", groupid);
2756 goto close_fd_exit;
2757 }
2758
2759 if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
2760 error_setg(errp, "group %d is not viable", groupid);
2761 error_append_hint(errp,
2762 "Please ensure all devices within the iommu_group "
2763 "are bound to their vfio bus driver.\n");
2764 goto close_fd_exit;
2765 }
2766
2767 group->groupid = groupid;
2768 QLIST_INIT(&group->device_list);
2769
2770 if (vfio_connect_container(group, as, errp)) {
2771 error_prepend(errp, "failed to setup container for group %d: ",
2772 groupid);
2773 goto close_fd_exit;
2774 }
2775
2776 if (QLIST_EMPTY(&vfio_group_list)) {
2777 qemu_register_reset(vfio_reset_handler, NULL);
2778 }
2779
2780 QLIST_INSERT_HEAD(&vfio_group_list, group, next);
2781
2782 return group;
2783
2784close_fd_exit:
2785 close(group->fd);
2786
2787free_group_exit:
2788 g_free(group);
2789
2790 return NULL;
2791}
2792
2793void vfio_put_group(VFIOGroup *group)
2794{
2795 if (!group || !QLIST_EMPTY(&group->device_list)) {
2796 return;
2797 }
2798
2799 if (!group->ram_block_discard_allowed) {
2800 vfio_ram_block_discard_disable(group->container, false);
2801 }
2802 vfio_kvm_device_del_group(group);
2803 vfio_disconnect_container(group);
2804 QLIST_REMOVE(group, next);
2805 trace_vfio_put_group(group->fd);
2806 close(group->fd);
2807 g_free(group);
2808
2809 if (QLIST_EMPTY(&vfio_group_list)) {
2810 qemu_unregister_reset(vfio_reset_handler, NULL);
2811 }
2812}
2813
2814struct vfio_device_info *vfio_get_device_info(int fd)
2815{
2816 struct vfio_device_info *info;
2817 uint32_t argsz = sizeof(*info);
2818
2819 info = g_malloc0(argsz);
2820
2821retry:
2822 info->argsz = argsz;
2823
2824 if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) {
2825 g_free(info);
2826 return NULL;
2827 }
2828
2829 if (info->argsz > argsz) {
2830 argsz = info->argsz;
2831 info = g_realloc(info, argsz);
2832 goto retry;
2833 }
2834
2835 return info;
2836}
2837
2838int vfio_get_device(VFIOGroup *group, const char *name,
2839 VFIODevice *vbasedev, Error **errp)
2840{
2841 g_autofree struct vfio_device_info *info = NULL;
2842 int fd;
2843
2844 fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
2845 if (fd < 0) {
2846 error_setg_errno(errp, errno, "error getting device from group %d",
2847 group->groupid);
2848 error_append_hint(errp,
2849 "Verify all devices in group %d are bound to vfio-<bus> "
2850 "or pci-stub and not already in use\n", group->groupid);
2851 return fd;
2852 }
2853
2854 info = vfio_get_device_info(fd);
2855 if (!info) {
2856 error_setg_errno(errp, errno, "error getting device info");
2857 close(fd);
2858 return -1;
2859 }
2860
2861
2862
2863
2864
2865
2866
2867 if (vbasedev->ram_block_discard_allowed !=
2868 group->ram_block_discard_allowed) {
2869 if (!QLIST_EMPTY(&group->device_list)) {
2870 error_setg(errp, "Inconsistent setting of support for discarding "
2871 "RAM (e.g., balloon) within group");
2872 close(fd);
2873 return -1;
2874 }
2875
2876 if (!group->ram_block_discard_allowed) {
2877 group->ram_block_discard_allowed = true;
2878 vfio_ram_block_discard_disable(group->container, false);
2879 }
2880 }
2881
2882 vbasedev->fd = fd;
2883 vbasedev->group = group;
2884 QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
2885
2886 vbasedev->num_irqs = info->num_irqs;
2887 vbasedev->num_regions = info->num_regions;
2888 vbasedev->flags = info->flags;
2889
2890 trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs);
2891
2892 vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET);
2893
2894 return 0;
2895}
2896
2897void vfio_put_base_device(VFIODevice *vbasedev)
2898{
2899 if (!vbasedev->group) {
2900 return;
2901 }
2902 QLIST_REMOVE(vbasedev, next);
2903 vbasedev->group = NULL;
2904 trace_vfio_put_base_device(vbasedev->fd);
2905 close(vbasedev->fd);
2906}
2907
2908int vfio_get_region_info(VFIODevice *vbasedev, int index,
2909 struct vfio_region_info **info)
2910{
2911 size_t argsz = sizeof(struct vfio_region_info);
2912
2913 *info = g_malloc0(argsz);
2914
2915 (*info)->index = index;
2916retry:
2917 (*info)->argsz = argsz;
2918
2919 if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
2920 g_free(*info);
2921 *info = NULL;
2922 return -errno;
2923 }
2924
2925 if ((*info)->argsz > argsz) {
2926 argsz = (*info)->argsz;
2927 *info = g_realloc(*info, argsz);
2928
2929 goto retry;
2930 }
2931
2932 return 0;
2933}
2934
2935int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
2936 uint32_t subtype, struct vfio_region_info **info)
2937{
2938 int i;
2939
2940 for (i = 0; i < vbasedev->num_regions; i++) {
2941 struct vfio_info_cap_header *hdr;
2942 struct vfio_region_info_cap_type *cap_type;
2943
2944 if (vfio_get_region_info(vbasedev, i, info)) {
2945 continue;
2946 }
2947
2948 hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
2949 if (!hdr) {
2950 g_free(*info);
2951 continue;
2952 }
2953
2954 cap_type = container_of(hdr, struct vfio_region_info_cap_type, header);
2955
2956 trace_vfio_get_dev_region(vbasedev->name, i,
2957 cap_type->type, cap_type->subtype);
2958
2959 if (cap_type->type == type && cap_type->subtype == subtype) {
2960 return 0;
2961 }
2962
2963 g_free(*info);
2964 }
2965
2966 *info = NULL;
2967 return -ENODEV;
2968}
2969
2970bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
2971{
2972 struct vfio_region_info *info = NULL;
2973 bool ret = false;
2974
2975 if (!vfio_get_region_info(vbasedev, region, &info)) {
2976 if (vfio_get_region_info_cap(info, cap_type)) {
2977 ret = true;
2978 }
2979 g_free(info);
2980 }
2981
2982 return ret;
2983}
2984
2985
2986
2987
2988static bool vfio_eeh_container_ok(VFIOContainer *container)
2989{
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005 if (QLIST_EMPTY(&container->group_list)) {
3006 return false;
3007 }
3008
3009 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
3010 return false;
3011 }
3012
3013 return true;
3014}
3015
3016static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
3017{
3018 struct vfio_eeh_pe_op pe_op = {
3019 .argsz = sizeof(pe_op),
3020 .op = op,
3021 };
3022 int ret;
3023
3024 if (!vfio_eeh_container_ok(container)) {
3025 error_report("vfio/eeh: EEH_PE_OP 0x%x: "
3026 "kernel requires a container with exactly one group", op);
3027 return -EPERM;
3028 }
3029
3030 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
3031 if (ret < 0) {
3032 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
3033 return -errno;
3034 }
3035
3036 return ret;
3037}
3038
3039static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
3040{
3041 VFIOAddressSpace *space = vfio_get_address_space(as);
3042 VFIOContainer *container = NULL;
3043
3044 if (QLIST_EMPTY(&space->containers)) {
3045
3046 goto out;
3047 }
3048
3049 container = QLIST_FIRST(&space->containers);
3050
3051 if (QLIST_NEXT(container, next)) {
3052
3053
3054 container = NULL;
3055 goto out;
3056 }
3057
3058out:
3059 vfio_put_address_space(space);
3060 return container;
3061}
3062
3063bool vfio_eeh_as_ok(AddressSpace *as)
3064{
3065 VFIOContainer *container = vfio_eeh_as_container(as);
3066
3067 return (container != NULL) && vfio_eeh_container_ok(container);
3068}
3069
3070int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
3071{
3072 VFIOContainer *container = vfio_eeh_as_container(as);
3073
3074 if (!container) {
3075 return -ENODEV;
3076 }
3077 return vfio_eeh_container_op(container, op);
3078}
3079