1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include "qemu/osdep.h"
22#include <sys/ioctl.h>
23#ifdef CONFIG_KVM
24#include <linux/kvm.h>
25#endif
26#include <linux/vfio.h>
27
28#include "hw/vfio/vfio-common.h"
29#include "hw/vfio/vfio.h"
30#include "exec/address-spaces.h"
31#include "exec/memory.h"
32#include "exec/ram_addr.h"
33#include "hw/hw.h"
34#include "qemu/error-report.h"
35#include "qemu/main-loop.h"
36#include "qemu/range.h"
37#include "sysemu/kvm.h"
38#include "sysemu/reset.h"
39#include "sysemu/runstate.h"
40#include "trace.h"
41#include "qapi/error.h"
42#include "migration/migration.h"
43#include "migration/misc.h"
44#include "migration/blocker.h"
45#include "migration/qemu-file.h"
46#include "sysemu/tpm.h"
47
48VFIOGroupList vfio_group_list =
49 QLIST_HEAD_INITIALIZER(vfio_group_list);
50static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
51 QLIST_HEAD_INITIALIZER(vfio_address_spaces);
52
53#ifdef CONFIG_KVM
54
55
56
57
58
59
60
61static int vfio_kvm_device_fd = -1;
62#endif
63
64
65
66
67void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
68{
69 struct vfio_irq_set irq_set = {
70 .argsz = sizeof(irq_set),
71 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
72 .index = index,
73 .start = 0,
74 .count = 0,
75 };
76
77 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
78}
79
80void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
81{
82 struct vfio_irq_set irq_set = {
83 .argsz = sizeof(irq_set),
84 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
85 .index = index,
86 .start = 0,
87 .count = 1,
88 };
89
90 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
91}
92
93void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
94{
95 struct vfio_irq_set irq_set = {
96 .argsz = sizeof(irq_set),
97 .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
98 .index = index,
99 .start = 0,
100 .count = 1,
101 };
102
103 ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
104}
105
106static inline const char *action_to_str(int action)
107{
108 switch (action) {
109 case VFIO_IRQ_SET_ACTION_MASK:
110 return "MASK";
111 case VFIO_IRQ_SET_ACTION_UNMASK:
112 return "UNMASK";
113 case VFIO_IRQ_SET_ACTION_TRIGGER:
114 return "TRIGGER";
115 default:
116 return "UNKNOWN ACTION";
117 }
118}
119
120static const char *index_to_str(VFIODevice *vbasedev, int index)
121{
122 if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
123 return NULL;
124 }
125
126 switch (index) {
127 case VFIO_PCI_INTX_IRQ_INDEX:
128 return "INTX";
129 case VFIO_PCI_MSI_IRQ_INDEX:
130 return "MSI";
131 case VFIO_PCI_MSIX_IRQ_INDEX:
132 return "MSIX";
133 case VFIO_PCI_ERR_IRQ_INDEX:
134 return "ERR";
135 case VFIO_PCI_REQ_IRQ_INDEX:
136 return "REQ";
137 default:
138 return NULL;
139 }
140}
141
142static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
143{
144 switch (container->iommu_type) {
145 case VFIO_TYPE1v2_IOMMU:
146 case VFIO_TYPE1_IOMMU:
147
148
149
150 return ram_block_uncoordinated_discard_disable(state);
151 default:
152
153
154
155
156
157
158
159
160
161 return ram_block_discard_disable(state);
162 }
163}
164
165int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
166 int action, int fd, Error **errp)
167{
168 struct vfio_irq_set *irq_set;
169 int argsz, ret = 0;
170 const char *name;
171 int32_t *pfd;
172
173 argsz = sizeof(*irq_set) + sizeof(*pfd);
174
175 irq_set = g_malloc0(argsz);
176 irq_set->argsz = argsz;
177 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action;
178 irq_set->index = index;
179 irq_set->start = subindex;
180 irq_set->count = 1;
181 pfd = (int32_t *)&irq_set->data;
182 *pfd = fd;
183
184 if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
185 ret = -errno;
186 }
187 g_free(irq_set);
188
189 if (!ret) {
190 return 0;
191 }
192
193 error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure");
194
195 name = index_to_str(vbasedev, index);
196 if (name) {
197 error_prepend(errp, "%s-%d: ", name, subindex);
198 } else {
199 error_prepend(errp, "index %d-%d: ", index, subindex);
200 }
201 error_prepend(errp,
202 "Failed to %s %s eventfd signaling for interrupt ",
203 fd < 0 ? "tear down" : "set up", action_to_str(action));
204 return ret;
205}
206
207
208
209
210void vfio_region_write(void *opaque, hwaddr addr,
211 uint64_t data, unsigned size)
212{
213 VFIORegion *region = opaque;
214 VFIODevice *vbasedev = region->vbasedev;
215 union {
216 uint8_t byte;
217 uint16_t word;
218 uint32_t dword;
219 uint64_t qword;
220 } buf;
221
222 switch (size) {
223 case 1:
224 buf.byte = data;
225 break;
226 case 2:
227 buf.word = cpu_to_le16(data);
228 break;
229 case 4:
230 buf.dword = cpu_to_le32(data);
231 break;
232 case 8:
233 buf.qword = cpu_to_le64(data);
234 break;
235 default:
236 hw_error("vfio: unsupported write size, %u bytes", size);
237 break;
238 }
239
240 if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
241 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
242 ",%d) failed: %m",
243 __func__, vbasedev->name, region->nr,
244 addr, data, size);
245 }
246
247 trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
248
249
250
251
252
253
254
255
256
257 vbasedev->ops->vfio_eoi(vbasedev);
258}
259
260uint64_t vfio_region_read(void *opaque,
261 hwaddr addr, unsigned size)
262{
263 VFIORegion *region = opaque;
264 VFIODevice *vbasedev = region->vbasedev;
265 union {
266 uint8_t byte;
267 uint16_t word;
268 uint32_t dword;
269 uint64_t qword;
270 } buf;
271 uint64_t data = 0;
272
273 if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
274 error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
275 __func__, vbasedev->name, region->nr,
276 addr, size);
277 return (uint64_t)-1;
278 }
279 switch (size) {
280 case 1:
281 data = buf.byte;
282 break;
283 case 2:
284 data = le16_to_cpu(buf.word);
285 break;
286 case 4:
287 data = le32_to_cpu(buf.dword);
288 break;
289 case 8:
290 data = le64_to_cpu(buf.qword);
291 break;
292 default:
293 hw_error("vfio: unsupported read size, %u bytes", size);
294 break;
295 }
296
297 trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data);
298
299
300 vbasedev->ops->vfio_eoi(vbasedev);
301
302 return data;
303}
304
305const MemoryRegionOps vfio_region_ops = {
306 .read = vfio_region_read,
307 .write = vfio_region_write,
308 .endianness = DEVICE_LITTLE_ENDIAN,
309 .valid = {
310 .min_access_size = 1,
311 .max_access_size = 8,
312 },
313 .impl = {
314 .min_access_size = 1,
315 .max_access_size = 8,
316 },
317};
318
319
320
321
322
323typedef struct {
324 unsigned long *bitmap;
325 hwaddr size;
326 hwaddr pages;
327} VFIOBitmap;
328
329static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size)
330{
331 vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
332 vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) /
333 BITS_PER_BYTE;
334 vbmap->bitmap = g_try_malloc0(vbmap->size);
335 if (!vbmap->bitmap) {
336 return -ENOMEM;
337 }
338
339 return 0;
340}
341
342static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
343 uint64_t size, ram_addr_t ram_addr);
344
345bool vfio_mig_active(void)
346{
347 VFIOGroup *group;
348 VFIODevice *vbasedev;
349
350 if (QLIST_EMPTY(&vfio_group_list)) {
351 return false;
352 }
353
354 QLIST_FOREACH(group, &vfio_group_list, next) {
355 QLIST_FOREACH(vbasedev, &group->device_list, next) {
356 if (vbasedev->migration_blocker) {
357 return false;
358 }
359 }
360 }
361 return true;
362}
363
364static Error *multiple_devices_migration_blocker;
365static Error *giommu_migration_blocker;
366
367static unsigned int vfio_migratable_device_num(void)
368{
369 VFIOGroup *group;
370 VFIODevice *vbasedev;
371 unsigned int device_num = 0;
372
373 QLIST_FOREACH(group, &vfio_group_list, next) {
374 QLIST_FOREACH(vbasedev, &group->device_list, next) {
375 if (vbasedev->migration) {
376 device_num++;
377 }
378 }
379 }
380
381 return device_num;
382}
383
384int vfio_block_multiple_devices_migration(Error **errp)
385{
386 int ret;
387
388 if (multiple_devices_migration_blocker ||
389 vfio_migratable_device_num() <= 1) {
390 return 0;
391 }
392
393 error_setg(&multiple_devices_migration_blocker,
394 "Migration is currently not supported with multiple "
395 "VFIO devices");
396 ret = migrate_add_blocker(multiple_devices_migration_blocker, errp);
397 if (ret < 0) {
398 error_free(multiple_devices_migration_blocker);
399 multiple_devices_migration_blocker = NULL;
400 }
401
402 return ret;
403}
404
405void vfio_unblock_multiple_devices_migration(void)
406{
407 if (!multiple_devices_migration_blocker ||
408 vfio_migratable_device_num() > 1) {
409 return;
410 }
411
412 migrate_del_blocker(multiple_devices_migration_blocker);
413 error_free(multiple_devices_migration_blocker);
414 multiple_devices_migration_blocker = NULL;
415}
416
417static bool vfio_viommu_preset(void)
418{
419 VFIOAddressSpace *space;
420
421 QLIST_FOREACH(space, &vfio_address_spaces, list) {
422 if (space->as != &address_space_memory) {
423 return true;
424 }
425 }
426
427 return false;
428}
429
430int vfio_block_giommu_migration(Error **errp)
431{
432 int ret;
433
434 if (giommu_migration_blocker ||
435 !vfio_viommu_preset()) {
436 return 0;
437 }
438
439 error_setg(&giommu_migration_blocker,
440 "Migration is currently not supported with vIOMMU enabled");
441 ret = migrate_add_blocker(giommu_migration_blocker, errp);
442 if (ret < 0) {
443 error_free(giommu_migration_blocker);
444 giommu_migration_blocker = NULL;
445 }
446
447 return ret;
448}
449
450void vfio_migration_finalize(void)
451{
452 if (!giommu_migration_blocker ||
453 vfio_viommu_preset()) {
454 return;
455 }
456
457 migrate_del_blocker(giommu_migration_blocker);
458 error_free(giommu_migration_blocker);
459 giommu_migration_blocker = NULL;
460}
461
462static void vfio_set_migration_error(int err)
463{
464 MigrationState *ms = migrate_get_current();
465
466 if (migration_is_setup_or_active(ms->state)) {
467 WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
468 if (ms->to_dst_file) {
469 qemu_file_set_error(ms->to_dst_file, err);
470 }
471 }
472 }
473}
474
475static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
476{
477 VFIOGroup *group;
478 VFIODevice *vbasedev;
479 MigrationState *ms = migrate_get_current();
480
481 if (!migration_is_setup_or_active(ms->state)) {
482 return false;
483 }
484
485 QLIST_FOREACH(group, &container->group_list, container_next) {
486 QLIST_FOREACH(vbasedev, &group->device_list, next) {
487 VFIOMigration *migration = vbasedev->migration;
488
489 if (!migration) {
490 return false;
491 }
492
493 if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF &&
494 migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
495 return false;
496 }
497 }
498 }
499 return true;
500}
501
502static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
503{
504 VFIOGroup *group;
505 VFIODevice *vbasedev;
506
507 QLIST_FOREACH(group, &container->group_list, container_next) {
508 QLIST_FOREACH(vbasedev, &group->device_list, next) {
509 if (!vbasedev->dirty_pages_supported) {
510 return false;
511 }
512 }
513 }
514
515 return true;
516}
517
518
519
520
521
522static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
523{
524 VFIOGroup *group;
525 VFIODevice *vbasedev;
526
527 if (!migration_is_active(migrate_get_current())) {
528 return false;
529 }
530
531 QLIST_FOREACH(group, &container->group_list, container_next) {
532 QLIST_FOREACH(vbasedev, &group->device_list, next) {
533 VFIOMigration *migration = vbasedev->migration;
534
535 if (!migration) {
536 return false;
537 }
538
539 if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) {
540 continue;
541 } else {
542 return false;
543 }
544 }
545 }
546 return true;
547}
548
549static int vfio_dma_unmap_bitmap(VFIOContainer *container,
550 hwaddr iova, ram_addr_t size,
551 IOMMUTLBEntry *iotlb)
552{
553 struct vfio_iommu_type1_dma_unmap *unmap;
554 struct vfio_bitmap *bitmap;
555 VFIOBitmap vbmap;
556 int ret;
557
558 ret = vfio_bitmap_alloc(&vbmap, size);
559 if (ret) {
560 return ret;
561 }
562
563 unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
564
565 unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
566 unmap->iova = iova;
567 unmap->size = size;
568 unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
569 bitmap = (struct vfio_bitmap *)&unmap->data;
570
571
572
573
574
575
576 bitmap->pgsize = qemu_real_host_page_size();
577 bitmap->size = vbmap.size;
578 bitmap->data = (__u64 *)vbmap.bitmap;
579
580 if (vbmap.size > container->max_dirty_bitmap_size) {
581 error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
582 ret = -E2BIG;
583 goto unmap_exit;
584 }
585
586 ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
587 if (!ret) {
588 cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
589 iotlb->translated_addr, vbmap.pages);
590 } else {
591 error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
592 }
593
594unmap_exit:
595 g_free(unmap);
596 g_free(vbmap.bitmap);
597
598 return ret;
599}
600
601
602
603
604static int vfio_dma_unmap(VFIOContainer *container,
605 hwaddr iova, ram_addr_t size,
606 IOMMUTLBEntry *iotlb)
607{
608 struct vfio_iommu_type1_dma_unmap unmap = {
609 .argsz = sizeof(unmap),
610 .flags = 0,
611 .iova = iova,
612 .size = size,
613 };
614 bool need_dirty_sync = false;
615 int ret;
616
617 if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
618 if (!vfio_devices_all_device_dirty_tracking(container) &&
619 container->dirty_pages_supported) {
620 return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
621 }
622
623 need_dirty_sync = true;
624 }
625
626 while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
627
628
629
630
631
632
633
634
635
636
637
638
639 if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
640 container->iommu_type == VFIO_TYPE1v2_IOMMU) {
641 trace_vfio_dma_unmap_overflow_workaround();
642 unmap.size -= 1ULL << ctz64(container->pgsizes);
643 continue;
644 }
645 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
646 return -errno;
647 }
648
649 if (need_dirty_sync) {
650 ret = vfio_get_dirty_bitmap(container, iova, size,
651 iotlb->translated_addr);
652 if (ret) {
653 return ret;
654 }
655 }
656
657 return 0;
658}
659
660static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
661 ram_addr_t size, void *vaddr, bool readonly)
662{
663 struct vfio_iommu_type1_dma_map map = {
664 .argsz = sizeof(map),
665 .flags = VFIO_DMA_MAP_FLAG_READ,
666 .vaddr = (__u64)(uintptr_t)vaddr,
667 .iova = iova,
668 .size = size,
669 };
670
671 if (!readonly) {
672 map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
673 }
674
675
676
677
678
679
680 if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
681 (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
682 ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
683 return 0;
684 }
685
686 error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
687 return -errno;
688}
689
690static void vfio_host_win_add(VFIOContainer *container,
691 hwaddr min_iova, hwaddr max_iova,
692 uint64_t iova_pgsizes)
693{
694 VFIOHostDMAWindow *hostwin;
695
696 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
697 if (ranges_overlap(hostwin->min_iova,
698 hostwin->max_iova - hostwin->min_iova + 1,
699 min_iova,
700 max_iova - min_iova + 1)) {
701 hw_error("%s: Overlapped IOMMU are not enabled", __func__);
702 }
703 }
704
705 hostwin = g_malloc0(sizeof(*hostwin));
706
707 hostwin->min_iova = min_iova;
708 hostwin->max_iova = max_iova;
709 hostwin->iova_pgsizes = iova_pgsizes;
710 QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
711}
712
713static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
714 hwaddr max_iova)
715{
716 VFIOHostDMAWindow *hostwin;
717
718 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
719 if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
720 QLIST_REMOVE(hostwin, hostwin_next);
721 g_free(hostwin);
722 return 0;
723 }
724 }
725
726 return -1;
727}
728
729static bool vfio_listener_skipped_section(MemoryRegionSection *section)
730{
731 return (!memory_region_is_ram(section->mr) &&
732 !memory_region_is_iommu(section->mr)) ||
733 memory_region_is_protected(section->mr) ||
734
735
736
737
738
739
740 section->offset_within_address_space & (1ULL << 63);
741}
742
743
744static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
745 ram_addr_t *ram_addr, bool *read_only)
746{
747 bool ret, mr_has_discard_manager;
748
749 ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
750 &mr_has_discard_manager);
751 if (ret && mr_has_discard_manager) {
752
753
754
755
756
757
758
759
760
761
762
763
764 warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
765 " RAM (e.g., virtio-mem) works, however, malicious"
766 " guests can trigger pinning of more memory than"
767 " intended via an IOMMU. It's possible to mitigate "
768 " by setting/adjusting RLIMIT_MEMLOCK.");
769 }
770 return ret;
771}
772
773static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
774{
775 VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
776 VFIOContainer *container = giommu->container;
777 hwaddr iova = iotlb->iova + giommu->iommu_offset;
778 void *vaddr;
779 int ret;
780
781 trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
782 iova, iova + iotlb->addr_mask);
783
784 if (iotlb->target_as != &address_space_memory) {
785 error_report("Wrong target AS \"%s\", only system memory is allowed",
786 iotlb->target_as->name ? iotlb->target_as->name : "none");
787 vfio_set_migration_error(-EINVAL);
788 return;
789 }
790
791 rcu_read_lock();
792
793 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
794 bool read_only;
795
796 if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
797 goto out;
798 }
799
800
801
802
803
804
805
806 ret = vfio_dma_map(container, iova,
807 iotlb->addr_mask + 1, vaddr,
808 read_only);
809 if (ret) {
810 error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
811 "0x%"HWADDR_PRIx", %p) = %d (%s)",
812 container, iova,
813 iotlb->addr_mask + 1, vaddr, ret, strerror(-ret));
814 }
815 } else {
816 ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
817 if (ret) {
818 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
819 "0x%"HWADDR_PRIx") = %d (%s)",
820 container, iova,
821 iotlb->addr_mask + 1, ret, strerror(-ret));
822 vfio_set_migration_error(ret);
823 }
824 }
825out:
826 rcu_read_unlock();
827}
828
829static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
830 MemoryRegionSection *section)
831{
832 VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
833 listener);
834 const hwaddr size = int128_get64(section->size);
835 const hwaddr iova = section->offset_within_address_space;
836 int ret;
837
838
839 ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
840 if (ret) {
841 error_report("%s: vfio_dma_unmap() failed: %s", __func__,
842 strerror(-ret));
843 }
844}
845
846static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
847 MemoryRegionSection *section)
848{
849 VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
850 listener);
851 const hwaddr end = section->offset_within_region +
852 int128_get64(section->size);
853 hwaddr start, next, iova;
854 void *vaddr;
855 int ret;
856
857
858
859
860
861 for (start = section->offset_within_region; start < end; start = next) {
862 next = ROUND_UP(start + 1, vrdl->granularity);
863 next = MIN(next, end);
864
865 iova = start - section->offset_within_region +
866 section->offset_within_address_space;
867 vaddr = memory_region_get_ram_ptr(section->mr) + start;
868
869 ret = vfio_dma_map(vrdl->container, iova, next - start,
870 vaddr, section->readonly);
871 if (ret) {
872
873 vfio_ram_discard_notify_discard(rdl, section);
874 return ret;
875 }
876 }
877 return 0;
878}
879
880static void vfio_register_ram_discard_listener(VFIOContainer *container,
881 MemoryRegionSection *section)
882{
883 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
884 VFIORamDiscardListener *vrdl;
885
886
887 g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
888 g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
889 TARGET_PAGE_SIZE));
890 g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
891
892 vrdl = g_new0(VFIORamDiscardListener, 1);
893 vrdl->container = container;
894 vrdl->mr = section->mr;
895 vrdl->offset_within_address_space = section->offset_within_address_space;
896 vrdl->size = int128_get64(section->size);
897 vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
898 section->mr);
899
900 g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
901 g_assert(container->pgsizes &&
902 vrdl->granularity >= 1ULL << ctz64(container->pgsizes));
903
904 ram_discard_listener_init(&vrdl->listener,
905 vfio_ram_discard_notify_populate,
906 vfio_ram_discard_notify_discard, true);
907 ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
908 QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923 if (container->dma_max_mappings) {
924 unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
925
926#ifdef CONFIG_KVM
927 if (kvm_enabled()) {
928 max_memslots = kvm_get_max_memslots();
929 }
930#endif
931
932 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
933 hwaddr start, end;
934
935 start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
936 vrdl->granularity);
937 end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
938 vrdl->granularity);
939 vrdl_mappings += (end - start) / vrdl->granularity;
940 vrdl_count++;
941 }
942
943 if (vrdl_mappings + max_memslots - vrdl_count >
944 container->dma_max_mappings) {
945 warn_report("%s: possibly running out of DMA mappings. E.g., try"
946 " increasing the 'block-size' of virtio-mem devies."
947 " Maximum possible DMA mappings: %d, Maximum possible"
948 " memslots: %d", __func__, container->dma_max_mappings,
949 max_memslots);
950 }
951 }
952}
953
954static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
955 MemoryRegionSection *section)
956{
957 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
958 VFIORamDiscardListener *vrdl = NULL;
959
960 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
961 if (vrdl->mr == section->mr &&
962 vrdl->offset_within_address_space ==
963 section->offset_within_address_space) {
964 break;
965 }
966 }
967
968 if (!vrdl) {
969 hw_error("vfio: Trying to unregister missing RAM discard listener");
970 }
971
972 ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
973 QLIST_REMOVE(vrdl, next);
974 g_free(vrdl);
975}
976
977static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
978 hwaddr iova, hwaddr end)
979{
980 VFIOHostDMAWindow *hostwin;
981 bool hostwin_found = false;
982
983 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
984 if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
985 hostwin_found = true;
986 break;
987 }
988 }
989
990 return hostwin_found ? hostwin : NULL;
991}
992
993static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
994{
995 MemoryRegion *mr = section->mr;
996
997 if (!TPM_IS_CRB(mr->owner)) {
998 return false;
999 }
1000
1001
1002 trace_vfio_known_safe_misalignment(memory_region_name(mr),
1003 section->offset_within_address_space,
1004 section->offset_within_region,
1005 qemu_real_host_page_size());
1006 return true;
1007}
1008
1009static bool vfio_listener_valid_section(MemoryRegionSection *section,
1010 const char *name)
1011{
1012 if (vfio_listener_skipped_section(section)) {
1013 trace_vfio_listener_region_skip(name,
1014 section->offset_within_address_space,
1015 section->offset_within_address_space +
1016 int128_get64(int128_sub(section->size, int128_one())));
1017 return false;
1018 }
1019
1020 if (unlikely((section->offset_within_address_space &
1021 ~qemu_real_host_page_mask()) !=
1022 (section->offset_within_region & ~qemu_real_host_page_mask()))) {
1023 if (!vfio_known_safe_misalignment(section)) {
1024 error_report("%s received unaligned region %s iova=0x%"PRIx64
1025 " offset_within_region=0x%"PRIx64
1026 " qemu_real_host_page_size=0x%"PRIxPTR,
1027 __func__, memory_region_name(section->mr),
1028 section->offset_within_address_space,
1029 section->offset_within_region,
1030 qemu_real_host_page_size());
1031 }
1032 return false;
1033 }
1034
1035 return true;
1036}
1037
1038static bool vfio_get_section_iova_range(VFIOContainer *container,
1039 MemoryRegionSection *section,
1040 hwaddr *out_iova, hwaddr *out_end,
1041 Int128 *out_llend)
1042{
1043 Int128 llend;
1044 hwaddr iova;
1045
1046 iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
1047 llend = int128_make64(section->offset_within_address_space);
1048 llend = int128_add(llend, section->size);
1049 llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask()));
1050
1051 if (int128_ge(int128_make64(iova), llend)) {
1052 return false;
1053 }
1054
1055 *out_iova = iova;
1056 *out_end = int128_get64(int128_sub(llend, int128_one()));
1057 if (out_llend) {
1058 *out_llend = llend;
1059 }
1060 return true;
1061}
1062
1063static void vfio_listener_region_add(MemoryListener *listener,
1064 MemoryRegionSection *section)
1065{
1066 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1067 hwaddr iova, end;
1068 Int128 llend, llsize;
1069 void *vaddr;
1070 int ret;
1071 VFIOHostDMAWindow *hostwin;
1072 Error *err = NULL;
1073
1074 if (!vfio_listener_valid_section(section, "region_add")) {
1075 return;
1076 }
1077
1078 if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
1079 if (memory_region_is_ram_device(section->mr)) {
1080 trace_vfio_listener_region_add_no_dma_map(
1081 memory_region_name(section->mr),
1082 section->offset_within_address_space,
1083 int128_getlo(section->size),
1084 qemu_real_host_page_size());
1085 }
1086 return;
1087 }
1088
1089 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1090 hwaddr pgsize = 0;
1091
1092
1093 QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
1094 if (ranges_overlap(hostwin->min_iova,
1095 hostwin->max_iova - hostwin->min_iova + 1,
1096 section->offset_within_address_space,
1097 int128_get64(section->size))) {
1098 error_setg(&err,
1099 "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
1100 "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
1101 section->offset_within_address_space,
1102 section->offset_within_address_space +
1103 int128_get64(section->size) - 1,
1104 hostwin->min_iova, hostwin->max_iova);
1105 goto fail;
1106 }
1107 }
1108
1109 ret = vfio_spapr_create_window(container, section, &pgsize);
1110 if (ret) {
1111 error_setg_errno(&err, -ret, "Failed to create SPAPR window");
1112 goto fail;
1113 }
1114
1115 vfio_host_win_add(container, section->offset_within_address_space,
1116 section->offset_within_address_space +
1117 int128_get64(section->size) - 1, pgsize);
1118#ifdef CONFIG_KVM
1119 if (kvm_enabled()) {
1120 VFIOGroup *group;
1121 IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
1122 struct kvm_vfio_spapr_tce param;
1123 struct kvm_device_attr attr = {
1124 .group = KVM_DEV_VFIO_GROUP,
1125 .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
1126 .addr = (uint64_t)(unsigned long)¶m,
1127 };
1128
1129 if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
1130 ¶m.tablefd)) {
1131 QLIST_FOREACH(group, &container->group_list, container_next) {
1132 param.groupfd = group->fd;
1133 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
1134 error_report("vfio: failed to setup fd %d "
1135 "for a group with fd %d: %s",
1136 param.tablefd, param.groupfd,
1137 strerror(errno));
1138 return;
1139 }
1140 trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
1141 }
1142 }
1143 }
1144#endif
1145 }
1146
1147 hostwin = vfio_find_hostwin(container, iova, end);
1148 if (!hostwin) {
1149 error_setg(&err, "Container %p can't map guest IOVA region"
1150 " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
1151 goto fail;
1152 }
1153
1154 memory_region_ref(section->mr);
1155
1156 if (memory_region_is_iommu(section->mr)) {
1157 VFIOGuestIOMMU *giommu;
1158 IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
1159 int iommu_idx;
1160
1161 trace_vfio_listener_region_add_iommu(iova, end);
1162
1163
1164
1165
1166
1167
1168 giommu = g_malloc0(sizeof(*giommu));
1169 giommu->iommu_mr = iommu_mr;
1170 giommu->iommu_offset = section->offset_within_address_space -
1171 section->offset_within_region;
1172 giommu->container = container;
1173 llend = int128_add(int128_make64(section->offset_within_region),
1174 section->size);
1175 llend = int128_sub(llend, int128_one());
1176 iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
1177 MEMTXATTRS_UNSPECIFIED);
1178 iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
1179 IOMMU_NOTIFIER_IOTLB_EVENTS,
1180 section->offset_within_region,
1181 int128_get64(llend),
1182 iommu_idx);
1183
1184 ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr,
1185 container->pgsizes,
1186 &err);
1187 if (ret) {
1188 g_free(giommu);
1189 goto fail;
1190 }
1191
1192 ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
1193 &err);
1194 if (ret) {
1195 g_free(giommu);
1196 goto fail;
1197 }
1198 QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
1199 memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
1200
1201 return;
1202 }
1203
1204
1205
1206
1207
1208
1209
1210
1211 if (memory_region_has_ram_discard_manager(section->mr)) {
1212 vfio_register_ram_discard_listener(container, section);
1213 return;
1214 }
1215
1216 vaddr = memory_region_get_ram_ptr(section->mr) +
1217 section->offset_within_region +
1218 (iova - section->offset_within_address_space);
1219
1220 trace_vfio_listener_region_add_ram(iova, end, vaddr);
1221
1222 llsize = int128_sub(llend, int128_make64(iova));
1223
1224 if (memory_region_is_ram_device(section->mr)) {
1225 hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
1226
1227 if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
1228 trace_vfio_listener_region_add_no_dma_map(
1229 memory_region_name(section->mr),
1230 section->offset_within_address_space,
1231 int128_getlo(section->size),
1232 pgmask + 1);
1233 return;
1234 }
1235 }
1236
1237 ret = vfio_dma_map(container, iova, int128_get64(llsize),
1238 vaddr, section->readonly);
1239 if (ret) {
1240 error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
1241 "0x%"HWADDR_PRIx", %p) = %d (%s)",
1242 container, iova, int128_get64(llsize), vaddr, ret,
1243 strerror(-ret));
1244 if (memory_region_is_ram_device(section->mr)) {
1245
1246 error_report_err(err);
1247 return;
1248 }
1249 goto fail;
1250 }
1251
1252 return;
1253
1254fail:
1255 if (memory_region_is_ram_device(section->mr)) {
1256 error_report("failed to vfio_dma_map. pci p2p may not work");
1257 return;
1258 }
1259
1260
1261
1262
1263
1264 if (!container->initialized) {
1265 if (!container->error) {
1266 error_propagate_prepend(&container->error, err,
1267 "Region %s: ",
1268 memory_region_name(section->mr));
1269 } else {
1270 error_free(err);
1271 }
1272 } else {
1273 error_report_err(err);
1274 hw_error("vfio: DMA mapping failed, unable to continue");
1275 }
1276}
1277
1278static void vfio_listener_region_del(MemoryListener *listener,
1279 MemoryRegionSection *section)
1280{
1281 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1282 hwaddr iova, end;
1283 Int128 llend, llsize;
1284 int ret;
1285 bool try_unmap = true;
1286
1287 if (!vfio_listener_valid_section(section, "region_del")) {
1288 return;
1289 }
1290
1291 if (memory_region_is_iommu(section->mr)) {
1292 VFIOGuestIOMMU *giommu;
1293
1294 QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
1295 if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
1296 giommu->n.start == section->offset_within_region) {
1297 memory_region_unregister_iommu_notifier(section->mr,
1298 &giommu->n);
1299 QLIST_REMOVE(giommu, giommu_next);
1300 g_free(giommu);
1301 break;
1302 }
1303 }
1304
1305
1306
1307
1308
1309
1310
1311
1312 }
1313
1314 if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
1315 return;
1316 }
1317
1318 llsize = int128_sub(llend, int128_make64(iova));
1319
1320 trace_vfio_listener_region_del(iova, end);
1321
1322 if (memory_region_is_ram_device(section->mr)) {
1323 hwaddr pgmask;
1324 VFIOHostDMAWindow *hostwin;
1325
1326 hostwin = vfio_find_hostwin(container, iova, end);
1327 assert(hostwin);
1328
1329 pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
1330 try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
1331 } else if (memory_region_has_ram_discard_manager(section->mr)) {
1332 vfio_unregister_ram_discard_listener(container, section);
1333
1334 try_unmap = false;
1335 }
1336
1337 if (try_unmap) {
1338 if (int128_eq(llsize, int128_2_64())) {
1339
1340 llsize = int128_rshift(llsize, 1);
1341 ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
1342 if (ret) {
1343 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
1344 "0x%"HWADDR_PRIx") = %d (%s)",
1345 container, iova, int128_get64(llsize), ret,
1346 strerror(-ret));
1347 }
1348 iova += int128_get64(llsize);
1349 }
1350 ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
1351 if (ret) {
1352 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
1353 "0x%"HWADDR_PRIx") = %d (%s)",
1354 container, iova, int128_get64(llsize), ret,
1355 strerror(-ret));
1356 }
1357 }
1358
1359 memory_region_unref(section->mr);
1360
1361 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1362 vfio_spapr_remove_window(container,
1363 section->offset_within_address_space);
1364 if (vfio_host_win_del(container,
1365 section->offset_within_address_space,
1366 section->offset_within_address_space +
1367 int128_get64(section->size) - 1) < 0) {
1368 hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
1369 __func__, section->offset_within_address_space);
1370 }
1371 }
1372}
1373
1374static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
1375{
1376 int ret;
1377 struct vfio_iommu_type1_dirty_bitmap dirty = {
1378 .argsz = sizeof(dirty),
1379 };
1380
1381 if (!container->dirty_pages_supported) {
1382 return 0;
1383 }
1384
1385 if (start) {
1386 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
1387 } else {
1388 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
1389 }
1390
1391 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
1392 if (ret) {
1393 ret = -errno;
1394 error_report("Failed to set dirty tracking flag 0x%x errno: %d",
1395 dirty.flags, errno);
1396 }
1397
1398 return ret;
1399}
1400
1401typedef struct VFIODirtyRanges {
1402 hwaddr min32;
1403 hwaddr max32;
1404 hwaddr min64;
1405 hwaddr max64;
1406} VFIODirtyRanges;
1407
1408typedef struct VFIODirtyRangesListener {
1409 VFIOContainer *container;
1410 VFIODirtyRanges ranges;
1411 MemoryListener listener;
1412} VFIODirtyRangesListener;
1413
1414static void vfio_dirty_tracking_update(MemoryListener *listener,
1415 MemoryRegionSection *section)
1416{
1417 VFIODirtyRangesListener *dirty = container_of(listener,
1418 VFIODirtyRangesListener,
1419 listener);
1420 VFIODirtyRanges *range = &dirty->ranges;
1421 hwaddr iova, end, *min, *max;
1422
1423 if (!vfio_listener_valid_section(section, "tracking_update") ||
1424 !vfio_get_section_iova_range(dirty->container, section,
1425 &iova, &end, NULL)) {
1426 return;
1427 }
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440 min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
1441 max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
1442
1443 if (*min > iova) {
1444 *min = iova;
1445 }
1446 if (*max < end) {
1447 *max = end;
1448 }
1449
1450 trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
1451 return;
1452}
1453
1454static const MemoryListener vfio_dirty_tracking_listener = {
1455 .name = "vfio-tracking",
1456 .region_add = vfio_dirty_tracking_update,
1457};
1458
1459static void vfio_dirty_tracking_init(VFIOContainer *container,
1460 VFIODirtyRanges *ranges)
1461{
1462 VFIODirtyRangesListener dirty;
1463
1464 memset(&dirty, 0, sizeof(dirty));
1465 dirty.ranges.min32 = UINT32_MAX;
1466 dirty.ranges.min64 = UINT64_MAX;
1467 dirty.listener = vfio_dirty_tracking_listener;
1468 dirty.container = container;
1469
1470 memory_listener_register(&dirty.listener,
1471 container->space->as);
1472
1473 *ranges = dirty.ranges;
1474
1475
1476
1477
1478
1479
1480 memory_listener_unregister(&dirty.listener);
1481}
1482
1483static void vfio_devices_dma_logging_stop(VFIOContainer *container)
1484{
1485 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
1486 sizeof(uint64_t))] = {};
1487 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
1488 VFIODevice *vbasedev;
1489 VFIOGroup *group;
1490
1491 feature->argsz = sizeof(buf);
1492 feature->flags = VFIO_DEVICE_FEATURE_SET |
1493 VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
1494
1495 QLIST_FOREACH(group, &container->group_list, container_next) {
1496 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1497 if (!vbasedev->dirty_tracking) {
1498 continue;
1499 }
1500
1501 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
1502 warn_report("%s: Failed to stop DMA logging, err %d (%s)",
1503 vbasedev->name, -errno, strerror(errno));
1504 }
1505 vbasedev->dirty_tracking = false;
1506 }
1507 }
1508}
1509
1510static struct vfio_device_feature *
1511vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
1512 VFIODirtyRanges *tracking)
1513{
1514 struct vfio_device_feature *feature;
1515 size_t feature_size;
1516 struct vfio_device_feature_dma_logging_control *control;
1517 struct vfio_device_feature_dma_logging_range *ranges;
1518
1519 feature_size = sizeof(struct vfio_device_feature) +
1520 sizeof(struct vfio_device_feature_dma_logging_control);
1521 feature = g_try_malloc0(feature_size);
1522 if (!feature) {
1523 errno = ENOMEM;
1524 return NULL;
1525 }
1526 feature->argsz = feature_size;
1527 feature->flags = VFIO_DEVICE_FEATURE_SET |
1528 VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
1529
1530 control = (struct vfio_device_feature_dma_logging_control *)feature->data;
1531 control->page_size = qemu_real_host_page_size();
1532
1533
1534
1535
1536
1537 control->num_ranges = !!tracking->max32 + !!tracking->max64;
1538 ranges = g_try_new0(struct vfio_device_feature_dma_logging_range,
1539 control->num_ranges);
1540 if (!ranges) {
1541 g_free(feature);
1542 errno = ENOMEM;
1543
1544 return NULL;
1545 }
1546
1547 control->ranges = (__u64)(uintptr_t)ranges;
1548 if (tracking->max32) {
1549 ranges->iova = tracking->min32;
1550 ranges->length = (tracking->max32 - tracking->min32) + 1;
1551 ranges++;
1552 }
1553 if (tracking->max64) {
1554 ranges->iova = tracking->min64;
1555 ranges->length = (tracking->max64 - tracking->min64) + 1;
1556 }
1557
1558 trace_vfio_device_dirty_tracking_start(control->num_ranges,
1559 tracking->min32, tracking->max32,
1560 tracking->min64, tracking->max64);
1561
1562 return feature;
1563}
1564
1565static void vfio_device_feature_dma_logging_start_destroy(
1566 struct vfio_device_feature *feature)
1567{
1568 struct vfio_device_feature_dma_logging_control *control =
1569 (struct vfio_device_feature_dma_logging_control *)feature->data;
1570 struct vfio_device_feature_dma_logging_range *ranges =
1571 (struct vfio_device_feature_dma_logging_range *)(uintptr_t)control->ranges;
1572
1573 g_free(ranges);
1574 g_free(feature);
1575}
1576
1577static int vfio_devices_dma_logging_start(VFIOContainer *container)
1578{
1579 struct vfio_device_feature *feature;
1580 VFIODirtyRanges ranges;
1581 VFIODevice *vbasedev;
1582 VFIOGroup *group;
1583 int ret = 0;
1584
1585 vfio_dirty_tracking_init(container, &ranges);
1586 feature = vfio_device_feature_dma_logging_start_create(container,
1587 &ranges);
1588 if (!feature) {
1589 return -errno;
1590 }
1591
1592 QLIST_FOREACH(group, &container->group_list, container_next) {
1593 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1594 if (vbasedev->dirty_tracking) {
1595 continue;
1596 }
1597
1598 ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
1599 if (ret) {
1600 ret = -errno;
1601 error_report("%s: Failed to start DMA logging, err %d (%s)",
1602 vbasedev->name, ret, strerror(errno));
1603 goto out;
1604 }
1605 vbasedev->dirty_tracking = true;
1606 }
1607 }
1608
1609out:
1610 if (ret) {
1611 vfio_devices_dma_logging_stop(container);
1612 }
1613
1614 vfio_device_feature_dma_logging_start_destroy(feature);
1615
1616 return ret;
1617}
1618
1619static void vfio_listener_log_global_start(MemoryListener *listener)
1620{
1621 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1622 int ret;
1623
1624 if (vfio_devices_all_device_dirty_tracking(container)) {
1625 ret = vfio_devices_dma_logging_start(container);
1626 } else {
1627 ret = vfio_set_dirty_page_tracking(container, true);
1628 }
1629
1630 if (ret) {
1631 error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
1632 ret, strerror(-ret));
1633 vfio_set_migration_error(ret);
1634 }
1635}
1636
1637static void vfio_listener_log_global_stop(MemoryListener *listener)
1638{
1639 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1640 int ret = 0;
1641
1642 if (vfio_devices_all_device_dirty_tracking(container)) {
1643 vfio_devices_dma_logging_stop(container);
1644 } else {
1645 ret = vfio_set_dirty_page_tracking(container, false);
1646 }
1647
1648 if (ret) {
1649 error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
1650 ret, strerror(-ret));
1651 vfio_set_migration_error(ret);
1652 }
1653}
1654
1655static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
1656 hwaddr size, void *bitmap)
1657{
1658 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
1659 sizeof(struct vfio_device_feature_dma_logging_report),
1660 sizeof(__u64))] = {};
1661 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
1662 struct vfio_device_feature_dma_logging_report *report =
1663 (struct vfio_device_feature_dma_logging_report *)feature->data;
1664
1665 report->iova = iova;
1666 report->length = size;
1667 report->page_size = qemu_real_host_page_size();
1668 report->bitmap = (__u64)(uintptr_t)bitmap;
1669
1670 feature->argsz = sizeof(buf);
1671 feature->flags = VFIO_DEVICE_FEATURE_GET |
1672 VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
1673
1674 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
1675 return -errno;
1676 }
1677
1678 return 0;
1679}
1680
1681static int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
1682 VFIOBitmap *vbmap, hwaddr iova,
1683 hwaddr size)
1684{
1685 VFIODevice *vbasedev;
1686 VFIOGroup *group;
1687 int ret;
1688
1689 QLIST_FOREACH(group, &container->group_list, container_next) {
1690 QLIST_FOREACH(vbasedev, &group->device_list, next) {
1691 ret = vfio_device_dma_logging_report(vbasedev, iova, size,
1692 vbmap->bitmap);
1693 if (ret) {
1694 error_report("%s: Failed to get DMA logging report, iova: "
1695 "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
1696 ", err: %d (%s)",
1697 vbasedev->name, iova, size, ret, strerror(-ret));
1698
1699 return ret;
1700 }
1701 }
1702 }
1703
1704 return 0;
1705}
1706
1707static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
1708 hwaddr iova, hwaddr size)
1709{
1710 struct vfio_iommu_type1_dirty_bitmap *dbitmap;
1711 struct vfio_iommu_type1_dirty_bitmap_get *range;
1712 int ret;
1713
1714 dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
1715
1716 dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
1717 dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
1718 range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
1719 range->iova = iova;
1720 range->size = size;
1721
1722
1723
1724
1725
1726
1727 range->bitmap.pgsize = qemu_real_host_page_size();
1728 range->bitmap.size = vbmap->size;
1729 range->bitmap.data = (__u64 *)vbmap->bitmap;
1730
1731 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
1732 if (ret) {
1733 ret = -errno;
1734 error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
1735 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
1736 (uint64_t)range->size, errno);
1737 }
1738
1739 g_free(dbitmap);
1740
1741 return ret;
1742}
1743
1744static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
1745 uint64_t size, ram_addr_t ram_addr)
1746{
1747 bool all_device_dirty_tracking =
1748 vfio_devices_all_device_dirty_tracking(container);
1749 VFIOBitmap vbmap;
1750 int ret;
1751
1752 if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
1753 cpu_physical_memory_set_dirty_range(ram_addr, size,
1754 tcg_enabled() ? DIRTY_CLIENTS_ALL :
1755 DIRTY_CLIENTS_NOCODE);
1756 return 0;
1757 }
1758
1759 ret = vfio_bitmap_alloc(&vbmap, size);
1760 if (ret) {
1761 return ret;
1762 }
1763
1764 if (all_device_dirty_tracking) {
1765 ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
1766 } else {
1767 ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
1768 }
1769
1770 if (ret) {
1771 goto out;
1772 }
1773
1774 cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
1775 vbmap.pages);
1776
1777 trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size,
1778 ram_addr);
1779out:
1780 g_free(vbmap.bitmap);
1781
1782 return ret;
1783}
1784
1785typedef struct {
1786 IOMMUNotifier n;
1787 VFIOGuestIOMMU *giommu;
1788} vfio_giommu_dirty_notifier;
1789
1790static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
1791{
1792 vfio_giommu_dirty_notifier *gdn = container_of(n,
1793 vfio_giommu_dirty_notifier, n);
1794 VFIOGuestIOMMU *giommu = gdn->giommu;
1795 VFIOContainer *container = giommu->container;
1796 hwaddr iova = iotlb->iova + giommu->iommu_offset;
1797 ram_addr_t translated_addr;
1798 int ret = -EINVAL;
1799
1800 trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
1801
1802 if (iotlb->target_as != &address_space_memory) {
1803 error_report("Wrong target AS \"%s\", only system memory is allowed",
1804 iotlb->target_as->name ? iotlb->target_as->name : "none");
1805 goto out;
1806 }
1807
1808 rcu_read_lock();
1809 if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
1810 ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
1811 translated_addr);
1812 if (ret) {
1813 error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
1814 "0x%"HWADDR_PRIx") = %d (%s)",
1815 container, iova, iotlb->addr_mask + 1, ret,
1816 strerror(-ret));
1817 }
1818 }
1819 rcu_read_unlock();
1820
1821out:
1822 if (ret) {
1823 vfio_set_migration_error(ret);
1824 }
1825}
1826
1827static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
1828 void *opaque)
1829{
1830 const hwaddr size = int128_get64(section->size);
1831 const hwaddr iova = section->offset_within_address_space;
1832 const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
1833 section->offset_within_region;
1834 VFIORamDiscardListener *vrdl = opaque;
1835
1836
1837
1838
1839
1840 return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
1841}
1842
1843static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
1844 MemoryRegionSection *section)
1845{
1846 RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
1847 VFIORamDiscardListener *vrdl = NULL;
1848
1849 QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
1850 if (vrdl->mr == section->mr &&
1851 vrdl->offset_within_address_space ==
1852 section->offset_within_address_space) {
1853 break;
1854 }
1855 }
1856
1857 if (!vrdl) {
1858 hw_error("vfio: Trying to sync missing RAM discard listener");
1859 }
1860
1861
1862
1863
1864
1865 return ram_discard_manager_replay_populated(rdm, section,
1866 vfio_ram_discard_get_dirty_bitmap,
1867 &vrdl);
1868}
1869
1870static int vfio_sync_dirty_bitmap(VFIOContainer *container,
1871 MemoryRegionSection *section)
1872{
1873 ram_addr_t ram_addr;
1874
1875 if (memory_region_is_iommu(section->mr)) {
1876 VFIOGuestIOMMU *giommu;
1877
1878 QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
1879 if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
1880 giommu->n.start == section->offset_within_region) {
1881 Int128 llend;
1882 vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
1883 int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
1884 MEMTXATTRS_UNSPECIFIED);
1885
1886 llend = int128_add(int128_make64(section->offset_within_region),
1887 section->size);
1888 llend = int128_sub(llend, int128_one());
1889
1890 iommu_notifier_init(&gdn.n,
1891 vfio_iommu_map_dirty_notify,
1892 IOMMU_NOTIFIER_MAP,
1893 section->offset_within_region,
1894 int128_get64(llend),
1895 idx);
1896 memory_region_iommu_replay(giommu->iommu_mr, &gdn.n);
1897 break;
1898 }
1899 }
1900 return 0;
1901 } else if (memory_region_has_ram_discard_manager(section->mr)) {
1902 return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
1903 }
1904
1905 ram_addr = memory_region_get_ram_addr(section->mr) +
1906 section->offset_within_region;
1907
1908 return vfio_get_dirty_bitmap(container,
1909 REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
1910 int128_get64(section->size), ram_addr);
1911}
1912
1913static void vfio_listener_log_sync(MemoryListener *listener,
1914 MemoryRegionSection *section)
1915{
1916 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
1917 int ret;
1918
1919 if (vfio_listener_skipped_section(section)) {
1920 return;
1921 }
1922
1923 if (vfio_devices_all_dirty_tracking(container)) {
1924 ret = vfio_sync_dirty_bitmap(container, section);
1925 if (ret) {
1926 error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
1927 strerror(-ret));
1928 vfio_set_migration_error(ret);
1929 }
1930 }
1931}
1932
1933static const MemoryListener vfio_memory_listener = {
1934 .name = "vfio",
1935 .region_add = vfio_listener_region_add,
1936 .region_del = vfio_listener_region_del,
1937 .log_global_start = vfio_listener_log_global_start,
1938 .log_global_stop = vfio_listener_log_global_stop,
1939 .log_sync = vfio_listener_log_sync,
1940};
1941
1942static void vfio_listener_release(VFIOContainer *container)
1943{
1944 memory_listener_unregister(&container->listener);
1945 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
1946 memory_listener_unregister(&container->prereg_listener);
1947 }
1948}
1949
1950static struct vfio_info_cap_header *
1951vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id)
1952{
1953 struct vfio_info_cap_header *hdr;
1954
1955 for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
1956 if (hdr->id == id) {
1957 return hdr;
1958 }
1959 }
1960
1961 return NULL;
1962}
1963
1964struct vfio_info_cap_header *
1965vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
1966{
1967 if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
1968 return NULL;
1969 }
1970
1971 return vfio_get_cap((void *)info, info->cap_offset, id);
1972}
1973
1974static struct vfio_info_cap_header *
1975vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
1976{
1977 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
1978 return NULL;
1979 }
1980
1981 return vfio_get_cap((void *)info, info->cap_offset, id);
1982}
1983
1984struct vfio_info_cap_header *
1985vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id)
1986{
1987 if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) {
1988 return NULL;
1989 }
1990
1991 return vfio_get_cap((void *)info, info->cap_offset, id);
1992}
1993
1994bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
1995 unsigned int *avail)
1996{
1997 struct vfio_info_cap_header *hdr;
1998 struct vfio_iommu_type1_info_dma_avail *cap;
1999
2000
2001 hdr = vfio_get_iommu_type1_info_cap(info,
2002 VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
2003 if (hdr == NULL) {
2004 return false;
2005 }
2006
2007 if (avail != NULL) {
2008 cap = (void *) hdr;
2009 *avail = cap->avail;
2010 }
2011
2012 return true;
2013}
2014
2015static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
2016 struct vfio_region_info *info)
2017{
2018 struct vfio_info_cap_header *hdr;
2019 struct vfio_region_info_cap_sparse_mmap *sparse;
2020 int i, j;
2021
2022 hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
2023 if (!hdr) {
2024 return -ENODEV;
2025 }
2026
2027 sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
2028
2029 trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
2030 region->nr, sparse->nr_areas);
2031
2032 region->mmaps = g_new0(VFIOMmap, sparse->nr_areas);
2033
2034 for (i = 0, j = 0; i < sparse->nr_areas; i++) {
2035 if (sparse->areas[i].size) {
2036 trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
2037 sparse->areas[i].offset +
2038 sparse->areas[i].size - 1);
2039 region->mmaps[j].offset = sparse->areas[i].offset;
2040 region->mmaps[j].size = sparse->areas[i].size;
2041 j++;
2042 }
2043 }
2044
2045 region->nr_mmaps = j;
2046 region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap));
2047
2048 return 0;
2049}
2050
2051int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
2052 int index, const char *name)
2053{
2054 struct vfio_region_info *info;
2055 int ret;
2056
2057 ret = vfio_get_region_info(vbasedev, index, &info);
2058 if (ret) {
2059 return ret;
2060 }
2061
2062 region->vbasedev = vbasedev;
2063 region->flags = info->flags;
2064 region->size = info->size;
2065 region->fd_offset = info->offset;
2066 region->nr = index;
2067
2068 if (region->size) {
2069 region->mem = g_new0(MemoryRegion, 1);
2070 memory_region_init_io(region->mem, obj, &vfio_region_ops,
2071 region, name, region->size);
2072
2073 if (!vbasedev->no_mmap &&
2074 region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
2075
2076 ret = vfio_setup_region_sparse_mmaps(region, info);
2077
2078 if (ret) {
2079 region->nr_mmaps = 1;
2080 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
2081 region->mmaps[0].offset = 0;
2082 region->mmaps[0].size = region->size;
2083 }
2084 }
2085 }
2086
2087 g_free(info);
2088
2089 trace_vfio_region_setup(vbasedev->name, index, name,
2090 region->flags, region->fd_offset, region->size);
2091 return 0;
2092}
2093
2094static void vfio_subregion_unmap(VFIORegion *region, int index)
2095{
2096 trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem),
2097 region->mmaps[index].offset,
2098 region->mmaps[index].offset +
2099 region->mmaps[index].size - 1);
2100 memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem);
2101 munmap(region->mmaps[index].mmap, region->mmaps[index].size);
2102 object_unparent(OBJECT(®ion->mmaps[index].mem));
2103 region->mmaps[index].mmap = NULL;
2104}
2105
2106int vfio_region_mmap(VFIORegion *region)
2107{
2108 int i, prot = 0;
2109 char *name;
2110
2111 if (!region->mem) {
2112 return 0;
2113 }
2114
2115 prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
2116 prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
2117
2118 for (i = 0; i < region->nr_mmaps; i++) {
2119 region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
2120 MAP_SHARED, region->vbasedev->fd,
2121 region->fd_offset +
2122 region->mmaps[i].offset);
2123 if (region->mmaps[i].mmap == MAP_FAILED) {
2124 int ret = -errno;
2125
2126 trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
2127 region->fd_offset +
2128 region->mmaps[i].offset,
2129 region->fd_offset +
2130 region->mmaps[i].offset +
2131 region->mmaps[i].size - 1, ret);
2132
2133 region->mmaps[i].mmap = NULL;
2134
2135 for (i--; i >= 0; i--) {
2136 vfio_subregion_unmap(region, i);
2137 }
2138
2139 return ret;
2140 }
2141
2142 name = g_strdup_printf("%s mmaps[%d]",
2143 memory_region_name(region->mem), i);
2144 memory_region_init_ram_device_ptr(®ion->mmaps[i].mem,
2145 memory_region_owner(region->mem),
2146 name, region->mmaps[i].size,
2147 region->mmaps[i].mmap);
2148 g_free(name);
2149 memory_region_add_subregion(region->mem, region->mmaps[i].offset,
2150 ®ion->mmaps[i].mem);
2151
2152 trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem),
2153 region->mmaps[i].offset,
2154 region->mmaps[i].offset +
2155 region->mmaps[i].size - 1);
2156 }
2157
2158 return 0;
2159}
2160
2161void vfio_region_unmap(VFIORegion *region)
2162{
2163 int i;
2164
2165 if (!region->mem) {
2166 return;
2167 }
2168
2169 for (i = 0; i < region->nr_mmaps; i++) {
2170 if (region->mmaps[i].mmap) {
2171 vfio_subregion_unmap(region, i);
2172 }
2173 }
2174}
2175
2176void vfio_region_exit(VFIORegion *region)
2177{
2178 int i;
2179
2180 if (!region->mem) {
2181 return;
2182 }
2183
2184 for (i = 0; i < region->nr_mmaps; i++) {
2185 if (region->mmaps[i].mmap) {
2186 memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem);
2187 }
2188 }
2189
2190 trace_vfio_region_exit(region->vbasedev->name, region->nr);
2191}
2192
2193void vfio_region_finalize(VFIORegion *region)
2194{
2195 int i;
2196
2197 if (!region->mem) {
2198 return;
2199 }
2200
2201 for (i = 0; i < region->nr_mmaps; i++) {
2202 if (region->mmaps[i].mmap) {
2203 munmap(region->mmaps[i].mmap, region->mmaps[i].size);
2204 object_unparent(OBJECT(®ion->mmaps[i].mem));
2205 }
2206 }
2207
2208 object_unparent(OBJECT(region->mem));
2209
2210 g_free(region->mem);
2211 g_free(region->mmaps);
2212
2213 trace_vfio_region_finalize(region->vbasedev->name, region->nr);
2214
2215 region->mem = NULL;
2216 region->mmaps = NULL;
2217 region->nr_mmaps = 0;
2218 region->size = 0;
2219 region->flags = 0;
2220 region->nr = 0;
2221}
2222
2223void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
2224{
2225 int i;
2226
2227 if (!region->mem) {
2228 return;
2229 }
2230
2231 for (i = 0; i < region->nr_mmaps; i++) {
2232 if (region->mmaps[i].mmap) {
2233 memory_region_set_enabled(®ion->mmaps[i].mem, enabled);
2234 }
2235 }
2236
2237 trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
2238 enabled);
2239}
2240
2241void vfio_reset_handler(void *opaque)
2242{
2243 VFIOGroup *group;
2244 VFIODevice *vbasedev;
2245
2246 QLIST_FOREACH(group, &vfio_group_list, next) {
2247 QLIST_FOREACH(vbasedev, &group->device_list, next) {
2248 if (vbasedev->dev->realized) {
2249 vbasedev->ops->vfio_compute_needs_reset(vbasedev);
2250 }
2251 }
2252 }
2253
2254 QLIST_FOREACH(group, &vfio_group_list, next) {
2255 QLIST_FOREACH(vbasedev, &group->device_list, next) {
2256 if (vbasedev->dev->realized && vbasedev->needs_reset) {
2257 vbasedev->ops->vfio_hot_reset_multi(vbasedev);
2258 }
2259 }
2260 }
2261}
2262
2263static void vfio_kvm_device_add_group(VFIOGroup *group)
2264{
2265#ifdef CONFIG_KVM
2266 struct kvm_device_attr attr = {
2267 .group = KVM_DEV_VFIO_GROUP,
2268 .attr = KVM_DEV_VFIO_GROUP_ADD,
2269 .addr = (uint64_t)(unsigned long)&group->fd,
2270 };
2271
2272 if (!kvm_enabled()) {
2273 return;
2274 }
2275
2276 if (vfio_kvm_device_fd < 0) {
2277 struct kvm_create_device cd = {
2278 .type = KVM_DEV_TYPE_VFIO,
2279 };
2280
2281 if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) {
2282 error_report("Failed to create KVM VFIO device: %m");
2283 return;
2284 }
2285
2286 vfio_kvm_device_fd = cd.fd;
2287 }
2288
2289 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
2290 error_report("Failed to add group %d to KVM VFIO device: %m",
2291 group->groupid);
2292 }
2293#endif
2294}
2295
2296static void vfio_kvm_device_del_group(VFIOGroup *group)
2297{
2298#ifdef CONFIG_KVM
2299 struct kvm_device_attr attr = {
2300 .group = KVM_DEV_VFIO_GROUP,
2301 .attr = KVM_DEV_VFIO_GROUP_DEL,
2302 .addr = (uint64_t)(unsigned long)&group->fd,
2303 };
2304
2305 if (vfio_kvm_device_fd < 0) {
2306 return;
2307 }
2308
2309 if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
2310 error_report("Failed to remove group %d from KVM VFIO device: %m",
2311 group->groupid);
2312 }
2313#endif
2314}
2315
2316static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
2317{
2318 VFIOAddressSpace *space;
2319
2320 QLIST_FOREACH(space, &vfio_address_spaces, list) {
2321 if (space->as == as) {
2322 return space;
2323 }
2324 }
2325
2326
2327 space = g_malloc0(sizeof(*space));
2328 space->as = as;
2329 QLIST_INIT(&space->containers);
2330
2331 QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
2332
2333 return space;
2334}
2335
2336static void vfio_put_address_space(VFIOAddressSpace *space)
2337{
2338 if (QLIST_EMPTY(&space->containers)) {
2339 QLIST_REMOVE(space, list);
2340 g_free(space);
2341 }
2342}
2343
2344
2345
2346
2347static int vfio_get_iommu_type(VFIOContainer *container,
2348 Error **errp)
2349{
2350 int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
2351 VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
2352 int i;
2353
2354 for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
2355 if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
2356 return iommu_types[i];
2357 }
2358 }
2359 error_setg(errp, "No available IOMMU models");
2360 return -EINVAL;
2361}
2362
2363static int vfio_init_container(VFIOContainer *container, int group_fd,
2364 Error **errp)
2365{
2366 int iommu_type, ret;
2367
2368 iommu_type = vfio_get_iommu_type(container, errp);
2369 if (iommu_type < 0) {
2370 return iommu_type;
2371 }
2372
2373 ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
2374 if (ret) {
2375 error_setg_errno(errp, errno, "Failed to set group container");
2376 return -errno;
2377 }
2378
2379 while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
2380 if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
2381
2382
2383
2384
2385
2386
2387 iommu_type = VFIO_SPAPR_TCE_IOMMU;
2388 continue;
2389 }
2390 error_setg_errno(errp, errno, "Failed to set iommu for container");
2391 return -errno;
2392 }
2393
2394 container->iommu_type = iommu_type;
2395 return 0;
2396}
2397
2398static int vfio_get_iommu_info(VFIOContainer *container,
2399 struct vfio_iommu_type1_info **info)
2400{
2401
2402 size_t argsz = sizeof(struct vfio_iommu_type1_info);
2403
2404 *info = g_new0(struct vfio_iommu_type1_info, 1);
2405again:
2406 (*info)->argsz = argsz;
2407
2408 if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
2409 g_free(*info);
2410 *info = NULL;
2411 return -errno;
2412 }
2413
2414 if (((*info)->argsz > argsz)) {
2415 argsz = (*info)->argsz;
2416 *info = g_realloc(*info, argsz);
2417 goto again;
2418 }
2419
2420 return 0;
2421}
2422
2423static struct vfio_info_cap_header *
2424vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
2425{
2426 struct vfio_info_cap_header *hdr;
2427 void *ptr = info;
2428
2429 if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
2430 return NULL;
2431 }
2432
2433 for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
2434 if (hdr->id == id) {
2435 return hdr;
2436 }
2437 }
2438
2439 return NULL;
2440}
2441
2442static void vfio_get_iommu_info_migration(VFIOContainer *container,
2443 struct vfio_iommu_type1_info *info)
2444{
2445 struct vfio_info_cap_header *hdr;
2446 struct vfio_iommu_type1_info_cap_migration *cap_mig;
2447
2448 hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
2449 if (!hdr) {
2450 return;
2451 }
2452
2453 cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
2454 header);
2455
2456
2457
2458
2459
2460 if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
2461 container->dirty_pages_supported = true;
2462 container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
2463 container->dirty_pgsizes = cap_mig->pgsize_bitmap;
2464 }
2465}
2466
2467static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
2468 Error **errp)
2469{
2470 VFIOContainer *container;
2471 int ret, fd;
2472 VFIOAddressSpace *space;
2473
2474 space = vfio_get_address_space(as);
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507 QLIST_FOREACH(container, &space->containers, next) {
2508 if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
2509 ret = vfio_ram_block_discard_disable(container, true);
2510 if (ret) {
2511 error_setg_errno(errp, -ret,
2512 "Cannot set discarding of RAM broken");
2513 if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
2514 &container->fd)) {
2515 error_report("vfio: error disconnecting group %d from"
2516 " container", group->groupid);
2517 }
2518 return ret;
2519 }
2520 group->container = container;
2521 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
2522 vfio_kvm_device_add_group(group);
2523 return 0;
2524 }
2525 }
2526
2527 fd = qemu_open_old("/dev/vfio/vfio", O_RDWR);
2528 if (fd < 0) {
2529 error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio");
2530 ret = -errno;
2531 goto put_space_exit;
2532 }
2533
2534 ret = ioctl(fd, VFIO_GET_API_VERSION);
2535 if (ret != VFIO_API_VERSION) {
2536 error_setg(errp, "supported vfio version: %d, "
2537 "reported version: %d", VFIO_API_VERSION, ret);
2538 ret = -EINVAL;
2539 goto close_fd_exit;
2540 }
2541
2542 container = g_malloc0(sizeof(*container));
2543 container->space = space;
2544 container->fd = fd;
2545 container->error = NULL;
2546 container->dirty_pages_supported = false;
2547 container->dma_max_mappings = 0;
2548 QLIST_INIT(&container->giommu_list);
2549 QLIST_INIT(&container->hostwin_list);
2550 QLIST_INIT(&container->vrdl_list);
2551
2552 ret = vfio_init_container(container, group->fd, errp);
2553 if (ret) {
2554 goto free_container_exit;
2555 }
2556
2557 ret = vfio_ram_block_discard_disable(container, true);
2558 if (ret) {
2559 error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
2560 goto free_container_exit;
2561 }
2562
2563 switch (container->iommu_type) {
2564 case VFIO_TYPE1v2_IOMMU:
2565 case VFIO_TYPE1_IOMMU:
2566 {
2567 struct vfio_iommu_type1_info *info;
2568
2569 ret = vfio_get_iommu_info(container, &info);
2570 if (ret) {
2571 error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
2572 goto enable_discards_exit;
2573 }
2574
2575 if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
2576 container->pgsizes = info->iova_pgsizes;
2577 } else {
2578 container->pgsizes = qemu_real_host_page_size();
2579 }
2580
2581 if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
2582 container->dma_max_mappings = 65535;
2583 }
2584 vfio_get_iommu_info_migration(container, info);
2585 g_free(info);
2586
2587
2588
2589
2590
2591
2592 vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes);
2593
2594 break;
2595 }
2596 case VFIO_SPAPR_TCE_v2_IOMMU:
2597 case VFIO_SPAPR_TCE_IOMMU:
2598 {
2599 struct vfio_iommu_spapr_tce_info info;
2600 bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
2601
2602
2603
2604
2605
2606
2607 if (!v2) {
2608 ret = ioctl(fd, VFIO_IOMMU_ENABLE);
2609 if (ret) {
2610 error_setg_errno(errp, errno, "failed to enable container");
2611 ret = -errno;
2612 goto enable_discards_exit;
2613 }
2614 } else {
2615 container->prereg_listener = vfio_prereg_listener;
2616
2617 memory_listener_register(&container->prereg_listener,
2618 &address_space_memory);
2619 if (container->error) {
2620 memory_listener_unregister(&container->prereg_listener);
2621 ret = -1;
2622 error_propagate_prepend(errp, container->error,
2623 "RAM memory listener initialization failed: ");
2624 goto enable_discards_exit;
2625 }
2626 }
2627
2628 info.argsz = sizeof(info);
2629 ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
2630 if (ret) {
2631 error_setg_errno(errp, errno,
2632 "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
2633 ret = -errno;
2634 if (v2) {
2635 memory_listener_unregister(&container->prereg_listener);
2636 }
2637 goto enable_discards_exit;
2638 }
2639
2640 if (v2) {
2641 container->pgsizes = info.ddw.pgsizes;
2642
2643
2644
2645
2646
2647
2648 ret = vfio_spapr_remove_window(container, info.dma32_window_start);
2649 if (ret) {
2650 error_setg_errno(errp, -ret,
2651 "failed to remove existing window");
2652 goto enable_discards_exit;
2653 }
2654 } else {
2655
2656 container->pgsizes = 0x1000;
2657 vfio_host_win_add(container, info.dma32_window_start,
2658 info.dma32_window_start +
2659 info.dma32_window_size - 1,
2660 0x1000);
2661 }
2662 }
2663 }
2664
2665 vfio_kvm_device_add_group(group);
2666
2667 QLIST_INIT(&container->group_list);
2668 QLIST_INSERT_HEAD(&space->containers, container, next);
2669
2670 group->container = container;
2671 QLIST_INSERT_HEAD(&container->group_list, group, container_next);
2672
2673 container->listener = vfio_memory_listener;
2674
2675 memory_listener_register(&container->listener, container->space->as);
2676
2677 if (container->error) {
2678 ret = -1;
2679 error_propagate_prepend(errp, container->error,
2680 "memory listener initialization failed: ");
2681 goto listener_release_exit;
2682 }
2683
2684 container->initialized = true;
2685
2686 return 0;
2687listener_release_exit:
2688 QLIST_REMOVE(group, container_next);
2689 QLIST_REMOVE(container, next);
2690 vfio_kvm_device_del_group(group);
2691 vfio_listener_release(container);
2692
2693enable_discards_exit:
2694 vfio_ram_block_discard_disable(container, false);
2695
2696free_container_exit:
2697 g_free(container);
2698
2699close_fd_exit:
2700 close(fd);
2701
2702put_space_exit:
2703 vfio_put_address_space(space);
2704
2705 return ret;
2706}
2707
2708static void vfio_disconnect_container(VFIOGroup *group)
2709{
2710 VFIOContainer *container = group->container;
2711
2712 QLIST_REMOVE(group, container_next);
2713 group->container = NULL;
2714
2715
2716
2717
2718
2719
2720 if (QLIST_EMPTY(&container->group_list)) {
2721 vfio_listener_release(container);
2722 }
2723
2724 if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
2725 error_report("vfio: error disconnecting group %d from container",
2726 group->groupid);
2727 }
2728
2729 if (QLIST_EMPTY(&container->group_list)) {
2730 VFIOAddressSpace *space = container->space;
2731 VFIOGuestIOMMU *giommu, *tmp;
2732 VFIOHostDMAWindow *hostwin, *next;
2733
2734 QLIST_REMOVE(container, next);
2735
2736 QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
2737 memory_region_unregister_iommu_notifier(
2738 MEMORY_REGION(giommu->iommu_mr), &giommu->n);
2739 QLIST_REMOVE(giommu, giommu_next);
2740 g_free(giommu);
2741 }
2742
2743 QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
2744 next) {
2745 QLIST_REMOVE(hostwin, hostwin_next);
2746 g_free(hostwin);
2747 }
2748
2749 trace_vfio_disconnect_container(container->fd);
2750 close(container->fd);
2751 g_free(container);
2752
2753 vfio_put_address_space(space);
2754 }
2755}
2756
2757VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
2758{
2759 VFIOGroup *group;
2760 char path[32];
2761 struct vfio_group_status status = { .argsz = sizeof(status) };
2762
2763 QLIST_FOREACH(group, &vfio_group_list, next) {
2764 if (group->groupid == groupid) {
2765
2766 if (group->container->space->as == as) {
2767 return group;
2768 } else {
2769 error_setg(errp, "group %d used in multiple address spaces",
2770 group->groupid);
2771 return NULL;
2772 }
2773 }
2774 }
2775
2776 group = g_malloc0(sizeof(*group));
2777
2778 snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
2779 group->fd = qemu_open_old(path, O_RDWR);
2780 if (group->fd < 0) {
2781 error_setg_errno(errp, errno, "failed to open %s", path);
2782 goto free_group_exit;
2783 }
2784
2785 if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
2786 error_setg_errno(errp, errno, "failed to get group %d status", groupid);
2787 goto close_fd_exit;
2788 }
2789
2790 if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
2791 error_setg(errp, "group %d is not viable", groupid);
2792 error_append_hint(errp,
2793 "Please ensure all devices within the iommu_group "
2794 "are bound to their vfio bus driver.\n");
2795 goto close_fd_exit;
2796 }
2797
2798 group->groupid = groupid;
2799 QLIST_INIT(&group->device_list);
2800
2801 if (vfio_connect_container(group, as, errp)) {
2802 error_prepend(errp, "failed to setup container for group %d: ",
2803 groupid);
2804 goto close_fd_exit;
2805 }
2806
2807 if (QLIST_EMPTY(&vfio_group_list)) {
2808 qemu_register_reset(vfio_reset_handler, NULL);
2809 }
2810
2811 QLIST_INSERT_HEAD(&vfio_group_list, group, next);
2812
2813 return group;
2814
2815close_fd_exit:
2816 close(group->fd);
2817
2818free_group_exit:
2819 g_free(group);
2820
2821 return NULL;
2822}
2823
2824void vfio_put_group(VFIOGroup *group)
2825{
2826 if (!group || !QLIST_EMPTY(&group->device_list)) {
2827 return;
2828 }
2829
2830 if (!group->ram_block_discard_allowed) {
2831 vfio_ram_block_discard_disable(group->container, false);
2832 }
2833 vfio_kvm_device_del_group(group);
2834 vfio_disconnect_container(group);
2835 QLIST_REMOVE(group, next);
2836 trace_vfio_put_group(group->fd);
2837 close(group->fd);
2838 g_free(group);
2839
2840 if (QLIST_EMPTY(&vfio_group_list)) {
2841 qemu_unregister_reset(vfio_reset_handler, NULL);
2842 }
2843}
2844
2845int vfio_get_device(VFIOGroup *group, const char *name,
2846 VFIODevice *vbasedev, Error **errp)
2847{
2848 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
2849 int ret, fd;
2850
2851 fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
2852 if (fd < 0) {
2853 error_setg_errno(errp, errno, "error getting device from group %d",
2854 group->groupid);
2855 error_append_hint(errp,
2856 "Verify all devices in group %d are bound to vfio-<bus> "
2857 "or pci-stub and not already in use\n", group->groupid);
2858 return fd;
2859 }
2860
2861 ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info);
2862 if (ret) {
2863 error_setg_errno(errp, errno, "error getting device info");
2864 close(fd);
2865 return ret;
2866 }
2867
2868
2869
2870
2871
2872
2873
2874 if (vbasedev->ram_block_discard_allowed !=
2875 group->ram_block_discard_allowed) {
2876 if (!QLIST_EMPTY(&group->device_list)) {
2877 error_setg(errp, "Inconsistent setting of support for discarding "
2878 "RAM (e.g., balloon) within group");
2879 close(fd);
2880 return -1;
2881 }
2882
2883 if (!group->ram_block_discard_allowed) {
2884 group->ram_block_discard_allowed = true;
2885 vfio_ram_block_discard_disable(group->container, false);
2886 }
2887 }
2888
2889 vbasedev->fd = fd;
2890 vbasedev->group = group;
2891 QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
2892
2893 vbasedev->num_irqs = dev_info.num_irqs;
2894 vbasedev->num_regions = dev_info.num_regions;
2895 vbasedev->flags = dev_info.flags;
2896
2897 trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions,
2898 dev_info.num_irqs);
2899
2900 vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
2901 return 0;
2902}
2903
2904void vfio_put_base_device(VFIODevice *vbasedev)
2905{
2906 if (!vbasedev->group) {
2907 return;
2908 }
2909 QLIST_REMOVE(vbasedev, next);
2910 vbasedev->group = NULL;
2911 trace_vfio_put_base_device(vbasedev->fd);
2912 close(vbasedev->fd);
2913}
2914
2915int vfio_get_region_info(VFIODevice *vbasedev, int index,
2916 struct vfio_region_info **info)
2917{
2918 size_t argsz = sizeof(struct vfio_region_info);
2919
2920 *info = g_malloc0(argsz);
2921
2922 (*info)->index = index;
2923retry:
2924 (*info)->argsz = argsz;
2925
2926 if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
2927 g_free(*info);
2928 *info = NULL;
2929 return -errno;
2930 }
2931
2932 if ((*info)->argsz > argsz) {
2933 argsz = (*info)->argsz;
2934 *info = g_realloc(*info, argsz);
2935
2936 goto retry;
2937 }
2938
2939 return 0;
2940}
2941
2942int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
2943 uint32_t subtype, struct vfio_region_info **info)
2944{
2945 int i;
2946
2947 for (i = 0; i < vbasedev->num_regions; i++) {
2948 struct vfio_info_cap_header *hdr;
2949 struct vfio_region_info_cap_type *cap_type;
2950
2951 if (vfio_get_region_info(vbasedev, i, info)) {
2952 continue;
2953 }
2954
2955 hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
2956 if (!hdr) {
2957 g_free(*info);
2958 continue;
2959 }
2960
2961 cap_type = container_of(hdr, struct vfio_region_info_cap_type, header);
2962
2963 trace_vfio_get_dev_region(vbasedev->name, i,
2964 cap_type->type, cap_type->subtype);
2965
2966 if (cap_type->type == type && cap_type->subtype == subtype) {
2967 return 0;
2968 }
2969
2970 g_free(*info);
2971 }
2972
2973 *info = NULL;
2974 return -ENODEV;
2975}
2976
2977bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
2978{
2979 struct vfio_region_info *info = NULL;
2980 bool ret = false;
2981
2982 if (!vfio_get_region_info(vbasedev, region, &info)) {
2983 if (vfio_get_region_info_cap(info, cap_type)) {
2984 ret = true;
2985 }
2986 g_free(info);
2987 }
2988
2989 return ret;
2990}
2991
2992
2993
2994
2995static bool vfio_eeh_container_ok(VFIOContainer *container)
2996{
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012 if (QLIST_EMPTY(&container->group_list)) {
3013 return false;
3014 }
3015
3016 if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
3017 return false;
3018 }
3019
3020 return true;
3021}
3022
3023static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
3024{
3025 struct vfio_eeh_pe_op pe_op = {
3026 .argsz = sizeof(pe_op),
3027 .op = op,
3028 };
3029 int ret;
3030
3031 if (!vfio_eeh_container_ok(container)) {
3032 error_report("vfio/eeh: EEH_PE_OP 0x%x: "
3033 "kernel requires a container with exactly one group", op);
3034 return -EPERM;
3035 }
3036
3037 ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
3038 if (ret < 0) {
3039 error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
3040 return -errno;
3041 }
3042
3043 return ret;
3044}
3045
3046static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
3047{
3048 VFIOAddressSpace *space = vfio_get_address_space(as);
3049 VFIOContainer *container = NULL;
3050
3051 if (QLIST_EMPTY(&space->containers)) {
3052
3053 goto out;
3054 }
3055
3056 container = QLIST_FIRST(&space->containers);
3057
3058 if (QLIST_NEXT(container, next)) {
3059
3060
3061 container = NULL;
3062 goto out;
3063 }
3064
3065out:
3066 vfio_put_address_space(space);
3067 return container;
3068}
3069
3070bool vfio_eeh_as_ok(AddressSpace *as)
3071{
3072 VFIOContainer *container = vfio_eeh_as_container(as);
3073
3074 return (container != NULL) && vfio_eeh_container_ok(container);
3075}
3076
3077int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
3078{
3079 VFIOContainer *container = vfio_eeh_as_container(as);
3080
3081 if (!container) {
3082 return -ENODEV;
3083 }
3084 return vfio_eeh_container_op(container, op);
3085}
3086