qemu/hw/virtio/virtio-iommu.c
<<
>>
Prefs
   1/*
   2 * virtio-iommu device
   3 *
   4 * Copyright (c) 2020 Red Hat, Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms and conditions of the GNU General Public License,
   8 * version 2 or later, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program.  If not, see <http://www.gnu.org/licenses/>.
  17 *
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/log.h"
  22#include "qemu/iov.h"
  23#include "qemu-common.h"
  24#include "hw/qdev-properties.h"
  25#include "hw/virtio/virtio.h"
  26#include "sysemu/kvm.h"
  27#include "sysemu/reset.h"
  28#include "qapi/error.h"
  29#include "qemu/error-report.h"
  30#include "trace.h"
  31
  32#include "standard-headers/linux/virtio_ids.h"
  33
  34#include "hw/virtio/virtio-bus.h"
  35#include "hw/virtio/virtio-access.h"
  36#include "hw/virtio/virtio-iommu.h"
  37#include "hw/pci/pci_bus.h"
  38#include "hw/pci/pci.h"
  39
  40/* Max size */
  41#define VIOMMU_DEFAULT_QUEUE_SIZE 256
  42#define VIOMMU_PROBE_SIZE 512
  43
  44typedef struct VirtIOIOMMUDomain {
  45    uint32_t id;
  46    bool bypass;
  47    GTree *mappings;
  48    QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
  49} VirtIOIOMMUDomain;
  50
  51typedef struct VirtIOIOMMUEndpoint {
  52    uint32_t id;
  53    VirtIOIOMMUDomain *domain;
  54    IOMMUMemoryRegion *iommu_mr;
  55    QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
  56} VirtIOIOMMUEndpoint;
  57
  58typedef struct VirtIOIOMMUInterval {
  59    uint64_t low;
  60    uint64_t high;
  61} VirtIOIOMMUInterval;
  62
  63typedef struct VirtIOIOMMUMapping {
  64    uint64_t phys_addr;
  65    uint32_t flags;
  66} VirtIOIOMMUMapping;
  67
  68static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
  69{
  70    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
  71}
  72
  73/**
  74 * The bus number is used for lookup when SID based operations occur.
  75 * In that case we lazily populate the IOMMUPciBus array from the bus hash
  76 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
  77 * numbers may not be always initialized yet.
  78 */
  79static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
  80{
  81    IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
  82
  83    if (!iommu_pci_bus) {
  84        GHashTableIter iter;
  85
  86        g_hash_table_iter_init(&iter, s->as_by_busptr);
  87        while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
  88            if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
  89                s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
  90                return iommu_pci_bus;
  91            }
  92        }
  93        return NULL;
  94    }
  95    return iommu_pci_bus;
  96}
  97
  98static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
  99{
 100    uint8_t bus_n, devfn;
 101    IOMMUPciBus *iommu_pci_bus;
 102    IOMMUDevice *dev;
 103
 104    bus_n = PCI_BUS_NUM(sid);
 105    iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
 106    if (iommu_pci_bus) {
 107        devfn = sid & (PCI_DEVFN_MAX - 1);
 108        dev = iommu_pci_bus->pbdev[devfn];
 109        if (dev) {
 110            return &dev->iommu_mr;
 111        }
 112    }
 113    return NULL;
 114}
 115
 116static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
 117{
 118    VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
 119    VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
 120
 121    if (inta->high < intb->low) {
 122        return -1;
 123    } else if (intb->high < inta->low) {
 124        return 1;
 125    } else {
 126        return 0;
 127    }
 128}
 129
 130static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
 131                                    hwaddr virt_end, hwaddr paddr,
 132                                    uint32_t flags)
 133{
 134    IOMMUTLBEvent event;
 135    IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
 136                                              flags & VIRTIO_IOMMU_MAP_F_WRITE);
 137
 138    if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
 139        (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
 140        return;
 141    }
 142
 143    trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
 144                                  paddr, perm);
 145
 146    event.type = IOMMU_NOTIFIER_MAP;
 147    event.entry.target_as = &address_space_memory;
 148    event.entry.addr_mask = virt_end - virt_start;
 149    event.entry.iova = virt_start;
 150    event.entry.perm = perm;
 151    event.entry.translated_addr = paddr;
 152
 153    memory_region_notify_iommu(mr, 0, event);
 154}
 155
 156static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
 157                                      hwaddr virt_end)
 158{
 159    IOMMUTLBEvent event;
 160    uint64_t delta = virt_end - virt_start;
 161
 162    if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
 163        return;
 164    }
 165
 166    trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
 167
 168    event.type = IOMMU_NOTIFIER_UNMAP;
 169    event.entry.target_as = &address_space_memory;
 170    event.entry.perm = IOMMU_NONE;
 171    event.entry.translated_addr = 0;
 172    event.entry.addr_mask = delta;
 173    event.entry.iova = virt_start;
 174
 175    if (delta == UINT64_MAX) {
 176        memory_region_notify_iommu(mr, 0, event);
 177    }
 178
 179
 180    while (virt_start != virt_end + 1) {
 181        uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
 182
 183        event.entry.addr_mask = mask;
 184        event.entry.iova = virt_start;
 185        memory_region_notify_iommu(mr, 0, event);
 186        virt_start += mask + 1;
 187    }
 188}
 189
 190static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
 191                                             gpointer data)
 192{
 193    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
 194    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
 195
 196    virtio_iommu_notify_unmap(mr, interval->low, interval->high);
 197
 198    return false;
 199}
 200
 201static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
 202                                           gpointer data)
 203{
 204    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
 205    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
 206    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
 207
 208    virtio_iommu_notify_map(mr, interval->low, interval->high,
 209                            mapping->phys_addr, mapping->flags);
 210
 211    return false;
 212}
 213
 214static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
 215{
 216    VirtIOIOMMUDomain *domain = ep->domain;
 217
 218    if (!ep->domain) {
 219        return;
 220    }
 221    g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
 222                   ep->iommu_mr);
 223    QLIST_REMOVE(ep, next);
 224    ep->domain = NULL;
 225}
 226
 227static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
 228                                                      uint32_t ep_id)
 229{
 230    VirtIOIOMMUEndpoint *ep;
 231    IOMMUMemoryRegion *mr;
 232
 233    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
 234    if (ep) {
 235        return ep;
 236    }
 237    mr = virtio_iommu_mr(s, ep_id);
 238    if (!mr) {
 239        return NULL;
 240    }
 241    ep = g_malloc0(sizeof(*ep));
 242    ep->id = ep_id;
 243    ep->iommu_mr = mr;
 244    trace_virtio_iommu_get_endpoint(ep_id);
 245    g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
 246    return ep;
 247}
 248
 249static void virtio_iommu_put_endpoint(gpointer data)
 250{
 251    VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
 252
 253    if (ep->domain) {
 254        virtio_iommu_detach_endpoint_from_domain(ep);
 255    }
 256
 257    trace_virtio_iommu_put_endpoint(ep->id);
 258    g_free(ep);
 259}
 260
 261static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
 262                                                  uint32_t domain_id,
 263                                                  bool bypass)
 264{
 265    VirtIOIOMMUDomain *domain;
 266
 267    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
 268    if (domain) {
 269        if (domain->bypass != bypass) {
 270            return NULL;
 271        }
 272        return domain;
 273    }
 274    domain = g_malloc0(sizeof(*domain));
 275    domain->id = domain_id;
 276    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
 277                                   NULL, (GDestroyNotify)g_free,
 278                                   (GDestroyNotify)g_free);
 279    domain->bypass = bypass;
 280    g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
 281    QLIST_INIT(&domain->endpoint_list);
 282    trace_virtio_iommu_get_domain(domain_id);
 283    return domain;
 284}
 285
 286static void virtio_iommu_put_domain(gpointer data)
 287{
 288    VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
 289    VirtIOIOMMUEndpoint *iter, *tmp;
 290
 291    QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
 292        virtio_iommu_detach_endpoint_from_domain(iter);
 293    }
 294    g_tree_destroy(domain->mappings);
 295    trace_virtio_iommu_put_domain(domain->id);
 296    g_free(domain);
 297}
 298
 299static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
 300                                              int devfn)
 301{
 302    VirtIOIOMMU *s = opaque;
 303    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
 304    static uint32_t mr_index;
 305    IOMMUDevice *sdev;
 306
 307    if (!sbus) {
 308        sbus = g_malloc0(sizeof(IOMMUPciBus) +
 309                         sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
 310        sbus->bus = bus;
 311        g_hash_table_insert(s->as_by_busptr, bus, sbus);
 312    }
 313
 314    sdev = sbus->pbdev[devfn];
 315    if (!sdev) {
 316        char *name = g_strdup_printf("%s-%d-%d",
 317                                     TYPE_VIRTIO_IOMMU_MEMORY_REGION,
 318                                     mr_index++, devfn);
 319        sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
 320
 321        sdev->viommu = s;
 322        sdev->bus = bus;
 323        sdev->devfn = devfn;
 324
 325        trace_virtio_iommu_init_iommu_mr(name);
 326
 327        memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
 328                                 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
 329                                 OBJECT(s), name,
 330                                 UINT64_MAX);
 331        address_space_init(&sdev->as,
 332                           MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
 333        g_free(name);
 334    }
 335    return &sdev->as;
 336}
 337
 338static int virtio_iommu_attach(VirtIOIOMMU *s,
 339                               struct virtio_iommu_req_attach *req)
 340{
 341    uint32_t domain_id = le32_to_cpu(req->domain);
 342    uint32_t ep_id = le32_to_cpu(req->endpoint);
 343    uint32_t flags = le32_to_cpu(req->flags);
 344    VirtIOIOMMUDomain *domain;
 345    VirtIOIOMMUEndpoint *ep;
 346
 347    trace_virtio_iommu_attach(domain_id, ep_id);
 348
 349    if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
 350        return VIRTIO_IOMMU_S_INVAL;
 351    }
 352
 353    ep = virtio_iommu_get_endpoint(s, ep_id);
 354    if (!ep) {
 355        return VIRTIO_IOMMU_S_NOENT;
 356    }
 357
 358    if (ep->domain) {
 359        VirtIOIOMMUDomain *previous_domain = ep->domain;
 360        /*
 361         * the device is already attached to a domain,
 362         * detach it first
 363         */
 364        virtio_iommu_detach_endpoint_from_domain(ep);
 365        if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
 366            g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
 367        }
 368    }
 369
 370    domain = virtio_iommu_get_domain(s, domain_id,
 371                                     flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
 372    if (!domain) {
 373        /* Incompatible bypass flag */
 374        return VIRTIO_IOMMU_S_INVAL;
 375    }
 376    QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
 377
 378    ep->domain = domain;
 379
 380    /* Replay domain mappings on the associated memory region */
 381    g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
 382                   ep->iommu_mr);
 383
 384    return VIRTIO_IOMMU_S_OK;
 385}
 386
 387static int virtio_iommu_detach(VirtIOIOMMU *s,
 388                               struct virtio_iommu_req_detach *req)
 389{
 390    uint32_t domain_id = le32_to_cpu(req->domain);
 391    uint32_t ep_id = le32_to_cpu(req->endpoint);
 392    VirtIOIOMMUDomain *domain;
 393    VirtIOIOMMUEndpoint *ep;
 394
 395    trace_virtio_iommu_detach(domain_id, ep_id);
 396
 397    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
 398    if (!ep) {
 399        return VIRTIO_IOMMU_S_NOENT;
 400    }
 401
 402    domain = ep->domain;
 403
 404    if (!domain || domain->id != domain_id) {
 405        return VIRTIO_IOMMU_S_INVAL;
 406    }
 407
 408    virtio_iommu_detach_endpoint_from_domain(ep);
 409
 410    if (QLIST_EMPTY(&domain->endpoint_list)) {
 411        g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
 412    }
 413    return VIRTIO_IOMMU_S_OK;
 414}
 415
 416static int virtio_iommu_map(VirtIOIOMMU *s,
 417                            struct virtio_iommu_req_map *req)
 418{
 419    uint32_t domain_id = le32_to_cpu(req->domain);
 420    uint64_t phys_start = le64_to_cpu(req->phys_start);
 421    uint64_t virt_start = le64_to_cpu(req->virt_start);
 422    uint64_t virt_end = le64_to_cpu(req->virt_end);
 423    uint32_t flags = le32_to_cpu(req->flags);
 424    VirtIOIOMMUDomain *domain;
 425    VirtIOIOMMUInterval *interval;
 426    VirtIOIOMMUMapping *mapping;
 427    VirtIOIOMMUEndpoint *ep;
 428
 429    if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
 430        return VIRTIO_IOMMU_S_INVAL;
 431    }
 432
 433    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
 434    if (!domain) {
 435        return VIRTIO_IOMMU_S_NOENT;
 436    }
 437
 438    if (domain->bypass) {
 439        return VIRTIO_IOMMU_S_INVAL;
 440    }
 441
 442    interval = g_malloc0(sizeof(*interval));
 443
 444    interval->low = virt_start;
 445    interval->high = virt_end;
 446
 447    mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
 448    if (mapping) {
 449        g_free(interval);
 450        return VIRTIO_IOMMU_S_INVAL;
 451    }
 452
 453    trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
 454
 455    mapping = g_malloc0(sizeof(*mapping));
 456    mapping->phys_addr = phys_start;
 457    mapping->flags = flags;
 458
 459    g_tree_insert(domain->mappings, interval, mapping);
 460
 461    QLIST_FOREACH(ep, &domain->endpoint_list, next) {
 462        virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
 463                                flags);
 464    }
 465
 466    return VIRTIO_IOMMU_S_OK;
 467}
 468
 469static int virtio_iommu_unmap(VirtIOIOMMU *s,
 470                              struct virtio_iommu_req_unmap *req)
 471{
 472    uint32_t domain_id = le32_to_cpu(req->domain);
 473    uint64_t virt_start = le64_to_cpu(req->virt_start);
 474    uint64_t virt_end = le64_to_cpu(req->virt_end);
 475    VirtIOIOMMUMapping *iter_val;
 476    VirtIOIOMMUInterval interval, *iter_key;
 477    VirtIOIOMMUDomain *domain;
 478    VirtIOIOMMUEndpoint *ep;
 479    int ret = VIRTIO_IOMMU_S_OK;
 480
 481    trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
 482
 483    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
 484    if (!domain) {
 485        return VIRTIO_IOMMU_S_NOENT;
 486    }
 487
 488    if (domain->bypass) {
 489        return VIRTIO_IOMMU_S_INVAL;
 490    }
 491
 492    interval.low = virt_start;
 493    interval.high = virt_end;
 494
 495    while (g_tree_lookup_extended(domain->mappings, &interval,
 496                                  (void **)&iter_key, (void**)&iter_val)) {
 497        uint64_t current_low = iter_key->low;
 498        uint64_t current_high = iter_key->high;
 499
 500        if (interval.low <= current_low && interval.high >= current_high) {
 501            QLIST_FOREACH(ep, &domain->endpoint_list, next) {
 502                virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
 503                                          current_high);
 504            }
 505            g_tree_remove(domain->mappings, iter_key);
 506            trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
 507        } else {
 508            ret = VIRTIO_IOMMU_S_RANGE;
 509            break;
 510        }
 511    }
 512    return ret;
 513}
 514
 515static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep,
 516                                               uint8_t *buf, size_t free)
 517{
 518    struct virtio_iommu_probe_resv_mem prop = {};
 519    size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
 520    int i;
 521
 522    total = size * s->nb_reserved_regions;
 523
 524    if (total > free) {
 525        return -ENOSPC;
 526    }
 527
 528    for (i = 0; i < s->nb_reserved_regions; i++) {
 529        unsigned subtype = s->reserved_regions[i].type;
 530
 531        assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
 532               subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
 533        prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
 534        prop.head.length = cpu_to_le16(length);
 535        prop.subtype = subtype;
 536        prop.start = cpu_to_le64(s->reserved_regions[i].low);
 537        prop.end = cpu_to_le64(s->reserved_regions[i].high);
 538
 539        memcpy(buf, &prop, size);
 540
 541        trace_virtio_iommu_fill_resv_property(ep, prop.subtype,
 542                                              prop.start, prop.end);
 543        buf += size;
 544    }
 545    return total;
 546}
 547
 548/**
 549 * virtio_iommu_probe - Fill the probe request buffer with
 550 * the properties the device is able to return
 551 */
 552static int virtio_iommu_probe(VirtIOIOMMU *s,
 553                              struct virtio_iommu_req_probe *req,
 554                              uint8_t *buf)
 555{
 556    uint32_t ep_id = le32_to_cpu(req->endpoint);
 557    size_t free = VIOMMU_PROBE_SIZE;
 558    ssize_t count;
 559
 560    if (!virtio_iommu_mr(s, ep_id)) {
 561        return VIRTIO_IOMMU_S_NOENT;
 562    }
 563
 564    count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free);
 565    if (count < 0) {
 566        return VIRTIO_IOMMU_S_INVAL;
 567    }
 568    buf += count;
 569    free -= count;
 570
 571    return VIRTIO_IOMMU_S_OK;
 572}
 573
 574static int virtio_iommu_iov_to_req(struct iovec *iov,
 575                                   unsigned int iov_cnt,
 576                                   void *req, size_t req_sz)
 577{
 578    size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail);
 579
 580    sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
 581    if (unlikely(sz != payload_sz)) {
 582        return VIRTIO_IOMMU_S_INVAL;
 583    }
 584    return 0;
 585}
 586
 587#define virtio_iommu_handle_req(__req)                                  \
 588static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s,                \
 589                                         struct iovec *iov,             \
 590                                         unsigned int iov_cnt)          \
 591{                                                                       \
 592    struct virtio_iommu_req_ ## __req req;                              \
 593    int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
 594                                                                        \
 595    return ret ? ret : virtio_iommu_ ## __req(s, &req);                 \
 596}
 597
 598virtio_iommu_handle_req(attach)
 599virtio_iommu_handle_req(detach)
 600virtio_iommu_handle_req(map)
 601virtio_iommu_handle_req(unmap)
 602
 603static int virtio_iommu_handle_probe(VirtIOIOMMU *s,
 604                                     struct iovec *iov,
 605                                     unsigned int iov_cnt,
 606                                     uint8_t *buf)
 607{
 608    struct virtio_iommu_req_probe req;
 609    int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req));
 610
 611    return ret ? ret : virtio_iommu_probe(s, &req, buf);
 612}
 613
 614static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
 615{
 616    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
 617    struct virtio_iommu_req_head head;
 618    struct virtio_iommu_req_tail tail = {};
 619    size_t output_size = sizeof(tail), sz;
 620    VirtQueueElement *elem;
 621    unsigned int iov_cnt;
 622    struct iovec *iov;
 623    void *buf = NULL;
 624
 625    for (;;) {
 626        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 627        if (!elem) {
 628            return;
 629        }
 630
 631        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
 632            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
 633            virtio_error(vdev, "virtio-iommu bad head/tail size");
 634            virtqueue_detach_element(vq, elem, 0);
 635            g_free(elem);
 636            break;
 637        }
 638
 639        iov_cnt = elem->out_num;
 640        iov = elem->out_sg;
 641        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
 642        if (unlikely(sz != sizeof(head))) {
 643            tail.status = VIRTIO_IOMMU_S_DEVERR;
 644            goto out;
 645        }
 646        qemu_mutex_lock(&s->mutex);
 647        switch (head.type) {
 648        case VIRTIO_IOMMU_T_ATTACH:
 649            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
 650            break;
 651        case VIRTIO_IOMMU_T_DETACH:
 652            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
 653            break;
 654        case VIRTIO_IOMMU_T_MAP:
 655            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
 656            break;
 657        case VIRTIO_IOMMU_T_UNMAP:
 658            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
 659            break;
 660        case VIRTIO_IOMMU_T_PROBE:
 661        {
 662            struct virtio_iommu_req_tail *ptail;
 663
 664            output_size = s->config.probe_size + sizeof(tail);
 665            buf = g_malloc0(output_size);
 666
 667            ptail = (struct virtio_iommu_req_tail *)
 668                        (buf + s->config.probe_size);
 669            ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
 670            break;
 671        }
 672        default:
 673            tail.status = VIRTIO_IOMMU_S_UNSUPP;
 674        }
 675        qemu_mutex_unlock(&s->mutex);
 676
 677out:
 678        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
 679                          buf ? buf : &tail, output_size);
 680        assert(sz == output_size);
 681
 682        virtqueue_push(vq, elem, sz);
 683        virtio_notify(vdev, vq);
 684        g_free(elem);
 685        g_free(buf);
 686        buf = NULL;
 687    }
 688}
 689
 690static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
 691                                      int flags, uint32_t endpoint,
 692                                      uint64_t address)
 693{
 694    VirtIODevice *vdev = &viommu->parent_obj;
 695    VirtQueue *vq = viommu->event_vq;
 696    struct virtio_iommu_fault fault;
 697    VirtQueueElement *elem;
 698    size_t sz;
 699
 700    memset(&fault, 0, sizeof(fault));
 701    fault.reason = reason;
 702    fault.flags = cpu_to_le32(flags);
 703    fault.endpoint = cpu_to_le32(endpoint);
 704    fault.address = cpu_to_le64(address);
 705
 706    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 707
 708    if (!elem) {
 709        error_report_once(
 710            "no buffer available in event queue to report event");
 711        return;
 712    }
 713
 714    if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
 715        virtio_error(vdev, "error buffer of wrong size");
 716        virtqueue_detach_element(vq, elem, 0);
 717        g_free(elem);
 718        return;
 719    }
 720
 721    sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
 722                      &fault, sizeof(fault));
 723    assert(sz == sizeof(fault));
 724
 725    trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
 726    virtqueue_push(vq, elem, sz);
 727    virtio_notify(vdev, vq);
 728    g_free(elem);
 729
 730}
 731
 732static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
 733                                            IOMMUAccessFlags flag,
 734                                            int iommu_idx)
 735{
 736    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
 737    VirtIOIOMMUInterval interval, *mapping_key;
 738    VirtIOIOMMUMapping *mapping_value;
 739    VirtIOIOMMU *s = sdev->viommu;
 740    bool read_fault, write_fault;
 741    VirtIOIOMMUEndpoint *ep;
 742    uint32_t sid, flags;
 743    bool bypass_allowed;
 744    bool found;
 745    int i;
 746
 747    interval.low = addr;
 748    interval.high = addr + 1;
 749
 750    IOMMUTLBEntry entry = {
 751        .target_as = &address_space_memory,
 752        .iova = addr,
 753        .translated_addr = addr,
 754        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
 755        .perm = IOMMU_NONE,
 756    };
 757
 758    bypass_allowed = s->config.bypass;
 759
 760    sid = virtio_iommu_get_bdf(sdev);
 761
 762    trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
 763    qemu_mutex_lock(&s->mutex);
 764
 765    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
 766    if (!ep) {
 767        if (!bypass_allowed) {
 768            error_report_once("%s sid=%d is not known!!", __func__, sid);
 769            virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
 770                                      VIRTIO_IOMMU_FAULT_F_ADDRESS,
 771                                      sid, addr);
 772        } else {
 773            entry.perm = flag;
 774        }
 775        goto unlock;
 776    }
 777
 778    for (i = 0; i < s->nb_reserved_regions; i++) {
 779        ReservedRegion *reg = &s->reserved_regions[i];
 780
 781        if (addr >= reg->low && addr <= reg->high) {
 782            switch (reg->type) {
 783            case VIRTIO_IOMMU_RESV_MEM_T_MSI:
 784                entry.perm = flag;
 785                break;
 786            case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
 787            default:
 788                virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
 789                                          VIRTIO_IOMMU_FAULT_F_ADDRESS,
 790                                          sid, addr);
 791                break;
 792            }
 793            goto unlock;
 794        }
 795    }
 796
 797    if (!ep->domain) {
 798        if (!bypass_allowed) {
 799            error_report_once("%s %02x:%02x.%01x not attached to any domain",
 800                              __func__, PCI_BUS_NUM(sid),
 801                              PCI_SLOT(sid), PCI_FUNC(sid));
 802            virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
 803                                      VIRTIO_IOMMU_FAULT_F_ADDRESS,
 804                                      sid, addr);
 805        } else {
 806            entry.perm = flag;
 807        }
 808        goto unlock;
 809    } else if (ep->domain->bypass) {
 810        entry.perm = flag;
 811        goto unlock;
 812    }
 813
 814    found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
 815                                   (void **)&mapping_key,
 816                                   (void **)&mapping_value);
 817    if (!found) {
 818        error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
 819                          __func__, addr, sid);
 820        virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
 821                                  VIRTIO_IOMMU_FAULT_F_ADDRESS,
 822                                  sid, addr);
 823        goto unlock;
 824    }
 825
 826    read_fault = (flag & IOMMU_RO) &&
 827                    !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
 828    write_fault = (flag & IOMMU_WO) &&
 829                    !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
 830
 831    flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
 832    flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
 833    if (flags) {
 834        error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
 835                          __func__, addr, flag, mapping_value->flags);
 836        flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
 837        virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
 838                                  flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
 839                                  sid, addr);
 840        goto unlock;
 841    }
 842    entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
 843    entry.perm = flag;
 844    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
 845
 846unlock:
 847    qemu_mutex_unlock(&s->mutex);
 848    return entry;
 849}
 850
 851static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
 852{
 853    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
 854    struct virtio_iommu_config *dev_config = &dev->config;
 855    struct virtio_iommu_config *out_config = (void *)config_data;
 856
 857    out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
 858    out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
 859    out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
 860    out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
 861    out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
 862    out_config->probe_size = cpu_to_le32(dev_config->probe_size);
 863    out_config->bypass = dev_config->bypass;
 864
 865    trace_virtio_iommu_get_config(dev_config->page_size_mask,
 866                                  dev_config->input_range.start,
 867                                  dev_config->input_range.end,
 868                                  dev_config->domain_range.start,
 869                                  dev_config->domain_range.end,
 870                                  dev_config->probe_size,
 871                                  dev_config->bypass);
 872}
 873
 874static void virtio_iommu_set_config(VirtIODevice *vdev,
 875                                    const uint8_t *config_data)
 876{
 877    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
 878    struct virtio_iommu_config *dev_config = &dev->config;
 879    const struct virtio_iommu_config *in_config = (void *)config_data;
 880
 881    if (in_config->bypass != dev_config->bypass) {
 882        if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
 883            virtio_error(vdev, "cannot set config.bypass");
 884            return;
 885        } else if (in_config->bypass != 0 && in_config->bypass != 1) {
 886            virtio_error(vdev, "invalid config.bypass value '%u'",
 887                         in_config->bypass);
 888            return;
 889        }
 890        dev_config->bypass = in_config->bypass;
 891    }
 892
 893    trace_virtio_iommu_set_config(in_config->bypass);
 894}
 895
 896static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
 897                                          Error **errp)
 898{
 899    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
 900
 901    f |= dev->features;
 902    trace_virtio_iommu_get_features(f);
 903    return f;
 904}
 905
 906static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
 907{
 908    guint ua = GPOINTER_TO_UINT(a);
 909    guint ub = GPOINTER_TO_UINT(b);
 910    return (ua > ub) - (ua < ub);
 911}
 912
 913static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
 914{
 915    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
 916    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
 917    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
 918
 919    trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
 920                             mapping->phys_addr);
 921    virtio_iommu_notify_map(mr, interval->low, interval->high,
 922                            mapping->phys_addr, mapping->flags);
 923    return false;
 924}
 925
 926static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
 927{
 928    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
 929    VirtIOIOMMU *s = sdev->viommu;
 930    uint32_t sid;
 931    VirtIOIOMMUEndpoint *ep;
 932
 933    sid = virtio_iommu_get_bdf(sdev);
 934
 935    qemu_mutex_lock(&s->mutex);
 936
 937    if (!s->endpoints) {
 938        goto unlock;
 939    }
 940
 941    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
 942    if (!ep || !ep->domain) {
 943        goto unlock;
 944    }
 945
 946    g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
 947
 948unlock:
 949    qemu_mutex_unlock(&s->mutex);
 950}
 951
 952static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
 953                                            IOMMUNotifierFlag old,
 954                                            IOMMUNotifierFlag new,
 955                                            Error **errp)
 956{
 957    if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
 958        error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
 959        return -EINVAL;
 960    }
 961
 962    if (old == IOMMU_NOTIFIER_NONE) {
 963        trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
 964    } else if (new == IOMMU_NOTIFIER_NONE) {
 965        trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
 966    }
 967    return 0;
 968}
 969
 970/*
 971 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
 972 * for example 0xfffffffffffff000. When an assigned device has page size
 973 * restrictions due to the hardware IOMMU configuration, apply this restriction
 974 * to the mask.
 975 */
 976static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
 977                                           uint64_t new_mask,
 978                                           Error **errp)
 979{
 980    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
 981    VirtIOIOMMU *s = sdev->viommu;
 982    uint64_t cur_mask = s->config.page_size_mask;
 983
 984    trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask,
 985                                          new_mask);
 986
 987    if ((cur_mask & new_mask) == 0) {
 988        error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
 989                   " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask);
 990        return -1;
 991    }
 992
 993    /*
 994     * After the machine is finalized, we can't change the mask anymore. If by
 995     * chance the hotplugged device supports the same granule, we can still
 996     * accept it. Having a different masks is possible but the guest will use
 997     * sub-optimal block sizes, so warn about it.
 998     */
 999    if (phase_check(PHASE_MACHINE_READY)) {
1000        int new_granule = ctz64(new_mask);
1001        int cur_granule = ctz64(cur_mask);
1002
1003        if (new_granule != cur_granule) {
1004            error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
1005                       " is incompatible with mask 0x%"PRIx64, cur_mask,
1006                       new_mask);
1007            return -1;
1008        } else if (new_mask != cur_mask) {
1009            warn_report("virtio-iommu page mask 0x%"PRIx64
1010                        " does not match 0x%"PRIx64, cur_mask, new_mask);
1011        }
1012        return 0;
1013    }
1014
1015    s->config.page_size_mask &= new_mask;
1016    return 0;
1017}
1018
1019static void virtio_iommu_system_reset(void *opaque)
1020{
1021    VirtIOIOMMU *s = opaque;
1022
1023    trace_virtio_iommu_system_reset();
1024
1025    /*
1026     * config.bypass is sticky across device reset, but should be restored on
1027     * system reset
1028     */
1029    s->config.bypass = s->boot_bypass;
1030}
1031
1032static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
1033{
1034    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1035    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1036
1037    virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
1038                sizeof(struct virtio_iommu_config));
1039
1040    memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
1041
1042    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
1043                             virtio_iommu_handle_command);
1044    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
1045
1046    s->config.page_size_mask = TARGET_PAGE_MASK;
1047    s->config.input_range.end = UINT64_MAX;
1048    s->config.domain_range.end = UINT32_MAX;
1049    s->config.probe_size = VIOMMU_PROBE_SIZE;
1050
1051    virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
1052    virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
1053    virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
1054    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
1055    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
1056    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
1057    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
1058    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
1059    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
1060
1061    qemu_mutex_init(&s->mutex);
1062
1063    s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
1064
1065    if (s->primary_bus) {
1066        pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
1067    } else {
1068        error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
1069    }
1070
1071    qemu_register_reset(virtio_iommu_system_reset, s);
1072}
1073
1074static void virtio_iommu_device_unrealize(DeviceState *dev)
1075{
1076    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1077    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
1078
1079    qemu_unregister_reset(virtio_iommu_system_reset, s);
1080
1081    g_hash_table_destroy(s->as_by_busptr);
1082    if (s->domains) {
1083        g_tree_destroy(s->domains);
1084    }
1085    if (s->endpoints) {
1086        g_tree_destroy(s->endpoints);
1087    }
1088
1089    virtio_delete_queue(s->req_vq);
1090    virtio_delete_queue(s->event_vq);
1091    virtio_cleanup(vdev);
1092}
1093
1094static void virtio_iommu_device_reset(VirtIODevice *vdev)
1095{
1096    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
1097
1098    trace_virtio_iommu_device_reset();
1099
1100    if (s->domains) {
1101        g_tree_destroy(s->domains);
1102    }
1103    if (s->endpoints) {
1104        g_tree_destroy(s->endpoints);
1105    }
1106    s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
1107                                 NULL, NULL, virtio_iommu_put_domain);
1108    s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
1109                                   NULL, NULL, virtio_iommu_put_endpoint);
1110}
1111
1112static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
1113{
1114    trace_virtio_iommu_device_status(status);
1115}
1116
1117static void virtio_iommu_instance_init(Object *obj)
1118{
1119}
1120
1121#define VMSTATE_INTERVAL                               \
1122{                                                      \
1123    .name = "interval",                                \
1124    .version_id = 1,                                   \
1125    .minimum_version_id = 1,                           \
1126    .fields = (VMStateField[]) {                       \
1127        VMSTATE_UINT64(low, VirtIOIOMMUInterval),      \
1128        VMSTATE_UINT64(high, VirtIOIOMMUInterval),     \
1129        VMSTATE_END_OF_LIST()                          \
1130    }                                                  \
1131}
1132
1133#define VMSTATE_MAPPING                               \
1134{                                                     \
1135    .name = "mapping",                                \
1136    .version_id = 1,                                  \
1137    .minimum_version_id = 1,                          \
1138    .fields = (VMStateField[]) {                      \
1139        VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1140        VMSTATE_UINT32(flags, VirtIOIOMMUMapping),    \
1141        VMSTATE_END_OF_LIST()                         \
1142    },                                                \
1143}
1144
1145static const VMStateDescription vmstate_interval_mapping[2] = {
1146    VMSTATE_MAPPING,   /* value */
1147    VMSTATE_INTERVAL   /* key   */
1148};
1149
1150static int domain_preload(void *opaque)
1151{
1152    VirtIOIOMMUDomain *domain = opaque;
1153
1154    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
1155                                       NULL, g_free, g_free);
1156    return 0;
1157}
1158
1159static const VMStateDescription vmstate_endpoint = {
1160    .name = "endpoint",
1161    .version_id = 1,
1162    .minimum_version_id = 1,
1163    .fields = (VMStateField[]) {
1164        VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
1165        VMSTATE_END_OF_LIST()
1166    }
1167};
1168
1169static const VMStateDescription vmstate_domain = {
1170    .name = "domain",
1171    .version_id = 2,
1172    .minimum_version_id = 2,
1173    .pre_load = domain_preload,
1174    .fields = (VMStateField[]) {
1175        VMSTATE_UINT32(id, VirtIOIOMMUDomain),
1176        VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
1177                        vmstate_interval_mapping,
1178                        VirtIOIOMMUInterval, VirtIOIOMMUMapping),
1179        VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
1180                        vmstate_endpoint, VirtIOIOMMUEndpoint, next),
1181        VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
1182        VMSTATE_END_OF_LIST()
1183    }
1184};
1185
1186static gboolean reconstruct_endpoints(gpointer key, gpointer value,
1187                                      gpointer data)
1188{
1189    VirtIOIOMMU *s = (VirtIOIOMMU *)data;
1190    VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
1191    VirtIOIOMMUEndpoint *iter;
1192    IOMMUMemoryRegion *mr;
1193
1194    QLIST_FOREACH(iter, &d->endpoint_list, next) {
1195        mr = virtio_iommu_mr(s, iter->id);
1196        assert(mr);
1197
1198        iter->domain = d;
1199        iter->iommu_mr = mr;
1200        g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
1201    }
1202    return false; /* continue the domain traversal */
1203}
1204
1205static int iommu_post_load(void *opaque, int version_id)
1206{
1207    VirtIOIOMMU *s = opaque;
1208
1209    g_tree_foreach(s->domains, reconstruct_endpoints, s);
1210    return 0;
1211}
1212
1213static const VMStateDescription vmstate_virtio_iommu_device = {
1214    .name = "virtio-iommu-device",
1215    .minimum_version_id = 2,
1216    .version_id = 2,
1217    .post_load = iommu_post_load,
1218    .fields = (VMStateField[]) {
1219        VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
1220                                   &vmstate_domain, VirtIOIOMMUDomain),
1221        VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
1222        VMSTATE_END_OF_LIST()
1223    },
1224};
1225
1226static const VMStateDescription vmstate_virtio_iommu = {
1227    .name = "virtio-iommu",
1228    .minimum_version_id = 2,
1229    .priority = MIG_PRI_IOMMU,
1230    .version_id = 2,
1231    .fields = (VMStateField[]) {
1232        VMSTATE_VIRTIO_DEVICE,
1233        VMSTATE_END_OF_LIST()
1234    },
1235};
1236
1237static Property virtio_iommu_properties[] = {
1238    DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
1239    DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
1240    DEFINE_PROP_END_OF_LIST(),
1241};
1242
1243static void virtio_iommu_class_init(ObjectClass *klass, void *data)
1244{
1245    DeviceClass *dc = DEVICE_CLASS(klass);
1246    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1247
1248    device_class_set_props(dc, virtio_iommu_properties);
1249    dc->vmsd = &vmstate_virtio_iommu;
1250
1251    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1252    vdc->realize = virtio_iommu_device_realize;
1253    vdc->unrealize = virtio_iommu_device_unrealize;
1254    vdc->reset = virtio_iommu_device_reset;
1255    vdc->get_config = virtio_iommu_get_config;
1256    vdc->set_config = virtio_iommu_set_config;
1257    vdc->get_features = virtio_iommu_get_features;
1258    vdc->set_status = virtio_iommu_set_status;
1259    vdc->vmsd = &vmstate_virtio_iommu_device;
1260}
1261
1262static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
1263                                                  void *data)
1264{
1265    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1266
1267    imrc->translate = virtio_iommu_translate;
1268    imrc->replay = virtio_iommu_replay;
1269    imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
1270    imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
1271}
1272
1273static const TypeInfo virtio_iommu_info = {
1274    .name = TYPE_VIRTIO_IOMMU,
1275    .parent = TYPE_VIRTIO_DEVICE,
1276    .instance_size = sizeof(VirtIOIOMMU),
1277    .instance_init = virtio_iommu_instance_init,
1278    .class_init = virtio_iommu_class_init,
1279};
1280
1281static const TypeInfo virtio_iommu_memory_region_info = {
1282    .parent = TYPE_IOMMU_MEMORY_REGION,
1283    .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
1284    .class_init = virtio_iommu_memory_region_class_init,
1285};
1286
1287static void virtio_register_types(void)
1288{
1289    type_register_static(&virtio_iommu_info);
1290    type_register_static(&virtio_iommu_memory_region_info);
1291}
1292
1293type_init(virtio_register_types)
1294