qemu/hw/virtio/virtio-pci.c
<<
>>
Prefs
   1/*
   2 * Virtio PCI Bindings
   3 *
   4 * Copyright IBM, Corp. 2007
   5 * Copyright (c) 2009 CodeSourcery
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Paul Brook        <paul@codesourcery.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 *
  14 * Contributions after 2012-01-13 are licensed under the terms of the
  15 * GNU GPL, version 2 or (at your option) any later version.
  16 */
  17
  18#include "qemu/osdep.h"
  19
  20#include "exec/memop.h"
  21#include "standard-headers/linux/virtio_pci.h"
  22#include "hw/virtio/virtio.h"
  23#include "migration/qemu-file-types.h"
  24#include "hw/pci/pci.h"
  25#include "hw/pci/pci_bus.h"
  26#include "hw/qdev-properties.h"
  27#include "qapi/error.h"
  28#include "qemu/error-report.h"
  29#include "qemu/module.h"
  30#include "hw/pci/msi.h"
  31#include "hw/pci/msix.h"
  32#include "hw/loader.h"
  33#include "sysemu/kvm.h"
  34#include "virtio-pci.h"
  35#include "qemu/range.h"
  36#include "hw/virtio/virtio-bus.h"
  37#include "qapi/visitor.h"
  38
  39#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
  40
  41#undef VIRTIO_PCI_CONFIG
  42
  43/* The remaining space is defined by each driver as the per-driver
  44 * configuration space */
  45#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
  46
  47static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
  48                               VirtIOPCIProxy *dev);
  49static void virtio_pci_reset(DeviceState *qdev);
  50
  51/* virtio device */
  52/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
  53static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
  54{
  55    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  56}
  57
  58/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
  59 * be careful and test performance if you change this.
  60 */
  61static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
  62{
  63    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
  64}
  65
  66static void virtio_pci_notify(DeviceState *d, uint16_t vector)
  67{
  68    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
  69
  70    if (msix_enabled(&proxy->pci_dev))
  71        msix_notify(&proxy->pci_dev, vector);
  72    else {
  73        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  74        pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1);
  75    }
  76}
  77
  78static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
  79{
  80    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
  81    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
  82
  83    pci_device_save(&proxy->pci_dev, f);
  84    msix_save(&proxy->pci_dev, f);
  85    if (msix_present(&proxy->pci_dev))
  86        qemu_put_be16(f, vdev->config_vector);
  87}
  88
  89static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
  90    .name = "virtio_pci/modern_queue_state",
  91    .version_id = 1,
  92    .minimum_version_id = 1,
  93    .fields = (VMStateField[]) {
  94        VMSTATE_UINT16(num, VirtIOPCIQueue),
  95        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
  96        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
  97        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
  98        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
  99        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
 100        VMSTATE_END_OF_LIST()
 101    }
 102};
 103
 104static bool virtio_pci_modern_state_needed(void *opaque)
 105{
 106    VirtIOPCIProxy *proxy = opaque;
 107
 108    return virtio_pci_modern(proxy);
 109}
 110
 111static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
 112    .name = "virtio_pci/modern_state",
 113    .version_id = 1,
 114    .minimum_version_id = 1,
 115    .needed = &virtio_pci_modern_state_needed,
 116    .fields = (VMStateField[]) {
 117        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
 118        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
 119        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
 120        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
 121                             vmstate_virtio_pci_modern_queue_state,
 122                             VirtIOPCIQueue),
 123        VMSTATE_END_OF_LIST()
 124    }
 125};
 126
 127static const VMStateDescription vmstate_virtio_pci = {
 128    .name = "virtio_pci",
 129    .version_id = 1,
 130    .minimum_version_id = 1,
 131    .minimum_version_id_old = 1,
 132    .fields = (VMStateField[]) {
 133        VMSTATE_END_OF_LIST()
 134    },
 135    .subsections = (const VMStateDescription*[]) {
 136        &vmstate_virtio_pci_modern_state_sub,
 137        NULL
 138    }
 139};
 140
 141static bool virtio_pci_has_extra_state(DeviceState *d)
 142{
 143    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 144
 145    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
 146}
 147
 148static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
 149{
 150    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 151
 152    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
 153}
 154
 155static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
 156{
 157    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 158
 159    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
 160}
 161
 162static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
 163{
 164    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 165    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 166
 167    if (msix_present(&proxy->pci_dev))
 168        qemu_put_be16(f, virtio_queue_vector(vdev, n));
 169}
 170
 171static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
 172{
 173    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 174    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 175
 176    int ret;
 177    ret = pci_device_load(&proxy->pci_dev, f);
 178    if (ret) {
 179        return ret;
 180    }
 181    msix_unuse_all_vectors(&proxy->pci_dev);
 182    msix_load(&proxy->pci_dev, f);
 183    if (msix_present(&proxy->pci_dev)) {
 184        qemu_get_be16s(f, &vdev->config_vector);
 185    } else {
 186        vdev->config_vector = VIRTIO_NO_VECTOR;
 187    }
 188    if (vdev->config_vector != VIRTIO_NO_VECTOR) {
 189        return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
 190    }
 191    return 0;
 192}
 193
 194static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
 195{
 196    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 197    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 198
 199    uint16_t vector;
 200    if (msix_present(&proxy->pci_dev)) {
 201        qemu_get_be16s(f, &vector);
 202    } else {
 203        vector = VIRTIO_NO_VECTOR;
 204    }
 205    virtio_queue_set_vector(vdev, n, vector);
 206    if (vector != VIRTIO_NO_VECTOR) {
 207        return msix_vector_use(&proxy->pci_dev, vector);
 208    }
 209
 210    return 0;
 211}
 212
 213static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
 214{
 215    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 216
 217    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
 218}
 219
 220#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
 221
 222static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
 223{
 224    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
 225        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
 226}
 227
 228static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
 229                                       int n, bool assign)
 230{
 231    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 232    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 233    VirtQueue *vq = virtio_get_queue(vdev, n);
 234    bool legacy = virtio_pci_legacy(proxy);
 235    bool modern = virtio_pci_modern(proxy);
 236    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
 237    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
 238    MemoryRegion *modern_mr = &proxy->notify.mr;
 239    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
 240    MemoryRegion *legacy_mr = &proxy->bar;
 241    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
 242                         virtio_get_queue_index(vq);
 243    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
 244
 245    if (assign) {
 246        if (modern) {
 247            if (fast_mmio) {
 248                memory_region_add_eventfd(modern_mr, modern_addr, 0,
 249                                          false, n, notifier);
 250            } else {
 251                memory_region_add_eventfd(modern_mr, modern_addr, 2,
 252                                          false, n, notifier);
 253            }
 254            if (modern_pio) {
 255                memory_region_add_eventfd(modern_notify_mr, 0, 2,
 256                                              true, n, notifier);
 257            }
 258        }
 259        if (legacy) {
 260            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
 261                                      true, n, notifier);
 262        }
 263    } else {
 264        if (modern) {
 265            if (fast_mmio) {
 266                memory_region_del_eventfd(modern_mr, modern_addr, 0,
 267                                          false, n, notifier);
 268            } else {
 269                memory_region_del_eventfd(modern_mr, modern_addr, 2,
 270                                          false, n, notifier);
 271            }
 272            if (modern_pio) {
 273                memory_region_del_eventfd(modern_notify_mr, 0, 2,
 274                                          true, n, notifier);
 275            }
 276        }
 277        if (legacy) {
 278            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
 279                                      true, n, notifier);
 280        }
 281    }
 282    return 0;
 283}
 284
 285static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
 286{
 287    virtio_bus_start_ioeventfd(&proxy->bus);
 288}
 289
 290static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 291{
 292    virtio_bus_stop_ioeventfd(&proxy->bus);
 293}
 294
 295static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 296{
 297    VirtIOPCIProxy *proxy = opaque;
 298    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 299    hwaddr pa;
 300
 301    switch (addr) {
 302    case VIRTIO_PCI_GUEST_FEATURES:
 303        /* Guest does not negotiate properly?  We have to assume nothing. */
 304        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
 305            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
 306        }
 307        virtio_set_features(vdev, val);
 308        break;
 309    case VIRTIO_PCI_QUEUE_PFN:
 310        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 311        if (pa == 0) {
 312            virtio_pci_reset(DEVICE(proxy));
 313        }
 314        else
 315            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
 316        break;
 317    case VIRTIO_PCI_QUEUE_SEL:
 318        if (val < VIRTIO_QUEUE_MAX)
 319            vdev->queue_sel = val;
 320        break;
 321    case VIRTIO_PCI_QUEUE_NOTIFY:
 322        if (val < VIRTIO_QUEUE_MAX) {
 323            virtio_queue_notify(vdev, val);
 324        }
 325        break;
 326    case VIRTIO_PCI_STATUS:
 327        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
 328            virtio_pci_stop_ioeventfd(proxy);
 329        }
 330
 331        virtio_set_status(vdev, val & 0xFF);
 332
 333        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
 334            virtio_pci_start_ioeventfd(proxy);
 335        }
 336
 337        if (vdev->status == 0) {
 338            virtio_pci_reset(DEVICE(proxy));
 339        }
 340
 341        /* Linux before 2.6.34 drives the device without enabling
 342           the PCI device bus master bit. Enable it automatically
 343           for the guest. This is a PCI spec violation but so is
 344           initiating DMA with bus master bit clear. */
 345        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
 346            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
 347                                     proxy->pci_dev.config[PCI_COMMAND] |
 348                                     PCI_COMMAND_MASTER, 1);
 349        }
 350        break;
 351    case VIRTIO_MSI_CONFIG_VECTOR:
 352        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
 353        /* Make it possible for guest to discover an error took place. */
 354        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 355            val = VIRTIO_NO_VECTOR;
 356        vdev->config_vector = val;
 357        break;
 358    case VIRTIO_MSI_QUEUE_VECTOR:
 359        msix_vector_unuse(&proxy->pci_dev,
 360                          virtio_queue_vector(vdev, vdev->queue_sel));
 361        /* Make it possible for guest to discover an error took place. */
 362        if (msix_vector_use(&proxy->pci_dev, val) < 0)
 363            val = VIRTIO_NO_VECTOR;
 364        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
 365        break;
 366    default:
 367        error_report("%s: unexpected address 0x%x value 0x%x",
 368                     __func__, addr, val);
 369        break;
 370    }
 371}
 372
 373static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 374{
 375    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 376    uint32_t ret = 0xFFFFFFFF;
 377
 378    switch (addr) {
 379    case VIRTIO_PCI_HOST_FEATURES:
 380        ret = vdev->host_features;
 381        break;
 382    case VIRTIO_PCI_GUEST_FEATURES:
 383        ret = vdev->guest_features;
 384        break;
 385    case VIRTIO_PCI_QUEUE_PFN:
 386        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
 387              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 388        break;
 389    case VIRTIO_PCI_QUEUE_NUM:
 390        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
 391        break;
 392    case VIRTIO_PCI_QUEUE_SEL:
 393        ret = vdev->queue_sel;
 394        break;
 395    case VIRTIO_PCI_STATUS:
 396        ret = vdev->status;
 397        break;
 398    case VIRTIO_PCI_ISR:
 399        /* reading from the ISR also clears it. */
 400        ret = atomic_xchg(&vdev->isr, 0);
 401        pci_irq_deassert(&proxy->pci_dev);
 402        break;
 403    case VIRTIO_MSI_CONFIG_VECTOR:
 404        ret = vdev->config_vector;
 405        break;
 406    case VIRTIO_MSI_QUEUE_VECTOR:
 407        ret = virtio_queue_vector(vdev, vdev->queue_sel);
 408        break;
 409    default:
 410        break;
 411    }
 412
 413    return ret;
 414}
 415
 416static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
 417                                       unsigned size)
 418{
 419    VirtIOPCIProxy *proxy = opaque;
 420    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 421    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 422    uint64_t val = 0;
 423    if (addr < config) {
 424        return virtio_ioport_read(proxy, addr);
 425    }
 426    addr -= config;
 427
 428    switch (size) {
 429    case 1:
 430        val = virtio_config_readb(vdev, addr);
 431        break;
 432    case 2:
 433        val = virtio_config_readw(vdev, addr);
 434        if (virtio_is_big_endian(vdev)) {
 435            val = bswap16(val);
 436        }
 437        break;
 438    case 4:
 439        val = virtio_config_readl(vdev, addr);
 440        if (virtio_is_big_endian(vdev)) {
 441            val = bswap32(val);
 442        }
 443        break;
 444    }
 445    return val;
 446}
 447
 448static void virtio_pci_config_write(void *opaque, hwaddr addr,
 449                                    uint64_t val, unsigned size)
 450{
 451    VirtIOPCIProxy *proxy = opaque;
 452    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
 453    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 454    if (addr < config) {
 455        virtio_ioport_write(proxy, addr, val);
 456        return;
 457    }
 458    addr -= config;
 459    /*
 460     * Virtio-PCI is odd. Ioports are LE but config space is target native
 461     * endian.
 462     */
 463    switch (size) {
 464    case 1:
 465        virtio_config_writeb(vdev, addr, val);
 466        break;
 467    case 2:
 468        if (virtio_is_big_endian(vdev)) {
 469            val = bswap16(val);
 470        }
 471        virtio_config_writew(vdev, addr, val);
 472        break;
 473    case 4:
 474        if (virtio_is_big_endian(vdev)) {
 475            val = bswap32(val);
 476        }
 477        virtio_config_writel(vdev, addr, val);
 478        break;
 479    }
 480}
 481
 482static const MemoryRegionOps virtio_pci_config_ops = {
 483    .read = virtio_pci_config_read,
 484    .write = virtio_pci_config_write,
 485    .impl = {
 486        .min_access_size = 1,
 487        .max_access_size = 4,
 488    },
 489    .endianness = DEVICE_LITTLE_ENDIAN,
 490};
 491
 492static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
 493                                                 hwaddr *off, int len)
 494{
 495    int i;
 496    VirtIOPCIRegion *reg;
 497
 498    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
 499        reg = &proxy->regs[i];
 500        if (*off >= reg->offset &&
 501            *off + len <= reg->offset + reg->size) {
 502            *off -= reg->offset;
 503            return &reg->mr;
 504        }
 505    }
 506
 507    return NULL;
 508}
 509
 510/* Below are generic functions to do memcpy from/to an address space,
 511 * without byteswaps, with input validation.
 512 *
 513 * As regular address_space_* APIs all do some kind of byteswap at least for
 514 * some host/target combinations, we are forced to explicitly convert to a
 515 * known-endianness integer value.
 516 * It doesn't really matter which endian format to go through, so the code
 517 * below selects the endian that causes the least amount of work on the given
 518 * host.
 519 *
 520 * Note: host pointer must be aligned.
 521 */
 522static
 523void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
 524                                const uint8_t *buf, int len)
 525{
 526    uint64_t val;
 527    MemoryRegion *mr;
 528
 529    /* address_space_* APIs assume an aligned address.
 530     * As address is under guest control, handle illegal values.
 531     */
 532    addr &= ~(len - 1);
 533
 534    mr = virtio_address_space_lookup(proxy, &addr, len);
 535    if (!mr) {
 536        return;
 537    }
 538
 539    /* Make sure caller aligned buf properly */
 540    assert(!(((uintptr_t)buf) & (len - 1)));
 541
 542    switch (len) {
 543    case 1:
 544        val = pci_get_byte(buf);
 545        break;
 546    case 2:
 547        val = pci_get_word(buf);
 548        break;
 549    case 4:
 550        val = pci_get_long(buf);
 551        break;
 552    default:
 553        /* As length is under guest control, handle illegal values. */
 554        return;
 555    }
 556    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
 557                                 MEMTXATTRS_UNSPECIFIED);
 558}
 559
 560static void
 561virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
 562                          uint8_t *buf, int len)
 563{
 564    uint64_t val;
 565    MemoryRegion *mr;
 566
 567    /* address_space_* APIs assume an aligned address.
 568     * As address is under guest control, handle illegal values.
 569     */
 570    addr &= ~(len - 1);
 571
 572    mr = virtio_address_space_lookup(proxy, &addr, len);
 573    if (!mr) {
 574        return;
 575    }
 576
 577    /* Make sure caller aligned buf properly */
 578    assert(!(((uintptr_t)buf) & (len - 1)));
 579
 580    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
 581                                MEMTXATTRS_UNSPECIFIED);
 582    switch (len) {
 583    case 1:
 584        pci_set_byte(buf, val);
 585        break;
 586    case 2:
 587        pci_set_word(buf, val);
 588        break;
 589    case 4:
 590        pci_set_long(buf, val);
 591        break;
 592    default:
 593        /* As length is under guest control, handle illegal values. */
 594        break;
 595    }
 596}
 597
 598static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
 599                                uint32_t val, int len)
 600{
 601    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 602    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 603    struct virtio_pci_cfg_cap *cfg;
 604
 605    pci_default_write_config(pci_dev, address, val, len);
 606
 607    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
 608        pcie_cap_flr_write_config(pci_dev, address, val, len);
 609    }
 610
 611    if (range_covers_byte(address, len, PCI_COMMAND) &&
 612        !(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
 613        virtio_pci_stop_ioeventfd(proxy);
 614        virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
 615    }
 616
 617    if (proxy->config_cap &&
 618        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 619                                                                  pci_cfg_data),
 620                       sizeof cfg->pci_cfg_data)) {
 621        uint32_t off;
 622        uint32_t len;
 623
 624        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 625        off = le32_to_cpu(cfg->cap.offset);
 626        len = le32_to_cpu(cfg->cap.length);
 627
 628        if (len == 1 || len == 2 || len == 4) {
 629            assert(len <= sizeof cfg->pci_cfg_data);
 630            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
 631        }
 632    }
 633}
 634
 635static uint32_t virtio_read_config(PCIDevice *pci_dev,
 636                                   uint32_t address, int len)
 637{
 638    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
 639    struct virtio_pci_cfg_cap *cfg;
 640
 641    if (proxy->config_cap &&
 642        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
 643                                                                  pci_cfg_data),
 644                       sizeof cfg->pci_cfg_data)) {
 645        uint32_t off;
 646        uint32_t len;
 647
 648        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
 649        off = le32_to_cpu(cfg->cap.offset);
 650        len = le32_to_cpu(cfg->cap.length);
 651
 652        if (len == 1 || len == 2 || len == 4) {
 653            assert(len <= sizeof cfg->pci_cfg_data);
 654            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
 655        }
 656    }
 657
 658    return pci_default_read_config(pci_dev, address, len);
 659}
 660
 661static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
 662                                        unsigned int queue_no,
 663                                        unsigned int vector)
 664{
 665    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 666    int ret;
 667
 668    if (irqfd->users == 0) {
 669        ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
 670        if (ret < 0) {
 671            return ret;
 672        }
 673        irqfd->virq = ret;
 674    }
 675    irqfd->users++;
 676    return 0;
 677}
 678
 679static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 680                                             unsigned int vector)
 681{
 682    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 683    if (--irqfd->users == 0) {
 684        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
 685    }
 686}
 687
 688static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
 689                                 unsigned int queue_no,
 690                                 unsigned int vector)
 691{
 692    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 693    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 694    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 695    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 696    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
 697}
 698
 699static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
 700                                      unsigned int queue_no,
 701                                      unsigned int vector)
 702{
 703    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 704    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 705    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 706    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
 707    int ret;
 708
 709    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
 710    assert(ret == 0);
 711}
 712
 713static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 714{
 715    PCIDevice *dev = &proxy->pci_dev;
 716    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 717    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 718    unsigned int vector;
 719    int ret, queue_no;
 720
 721    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 722        if (!virtio_queue_get_num(vdev, queue_no)) {
 723            break;
 724        }
 725        vector = virtio_queue_vector(vdev, queue_no);
 726        if (vector >= msix_nr_vectors_allocated(dev)) {
 727            continue;
 728        }
 729        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
 730        if (ret < 0) {
 731            goto undo;
 732        }
 733        /* If guest supports masking, set up irqfd now.
 734         * Otherwise, delay until unmasked in the frontend.
 735         */
 736        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 737            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 738            if (ret < 0) {
 739                kvm_virtio_pci_vq_vector_release(proxy, vector);
 740                goto undo;
 741            }
 742        }
 743    }
 744    return 0;
 745
 746undo:
 747    while (--queue_no >= 0) {
 748        vector = virtio_queue_vector(vdev, queue_no);
 749        if (vector >= msix_nr_vectors_allocated(dev)) {
 750            continue;
 751        }
 752        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 753            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 754        }
 755        kvm_virtio_pci_vq_vector_release(proxy, vector);
 756    }
 757    return ret;
 758}
 759
 760static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
 761{
 762    PCIDevice *dev = &proxy->pci_dev;
 763    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 764    unsigned int vector;
 765    int queue_no;
 766    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 767
 768    for (queue_no = 0; queue_no < nvqs; queue_no++) {
 769        if (!virtio_queue_get_num(vdev, queue_no)) {
 770            break;
 771        }
 772        vector = virtio_queue_vector(vdev, queue_no);
 773        if (vector >= msix_nr_vectors_allocated(dev)) {
 774            continue;
 775        }
 776        /* If guest supports masking, clean up irqfd now.
 777         * Otherwise, it was cleaned when masked in the frontend.
 778         */
 779        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 780            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 781        }
 782        kvm_virtio_pci_vq_vector_release(proxy, vector);
 783    }
 784}
 785
 786static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
 787                                       unsigned int queue_no,
 788                                       unsigned int vector,
 789                                       MSIMessage msg)
 790{
 791    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 792    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 793    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
 794    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
 795    VirtIOIRQFD *irqfd;
 796    int ret = 0;
 797
 798    if (proxy->vector_irqfd) {
 799        irqfd = &proxy->vector_irqfd[vector];
 800        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
 801            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
 802                                               &proxy->pci_dev);
 803            if (ret < 0) {
 804                return ret;
 805            }
 806            kvm_irqchip_commit_routes(kvm_state);
 807        }
 808    }
 809
 810    /* If guest supports masking, irqfd is already setup, unmask it.
 811     * Otherwise, set it up now.
 812     */
 813    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 814        k->guest_notifier_mask(vdev, queue_no, false);
 815        /* Test after unmasking to avoid losing events. */
 816        if (k->guest_notifier_pending &&
 817            k->guest_notifier_pending(vdev, queue_no)) {
 818            event_notifier_set(n);
 819        }
 820    } else {
 821        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
 822    }
 823    return ret;
 824}
 825
 826static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
 827                                             unsigned int queue_no,
 828                                             unsigned int vector)
 829{
 830    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 831    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 832
 833    /* If guest supports masking, keep irqfd but mask it.
 834     * Otherwise, clean it up now.
 835     */ 
 836    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
 837        k->guest_notifier_mask(vdev, queue_no, true);
 838    } else {
 839        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
 840    }
 841}
 842
 843static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
 844                                    MSIMessage msg)
 845{
 846    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 847    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 848    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 849    int ret, index, unmasked = 0;
 850
 851    while (vq) {
 852        index = virtio_get_queue_index(vq);
 853        if (!virtio_queue_get_num(vdev, index)) {
 854            break;
 855        }
 856        if (index < proxy->nvqs_with_notifiers) {
 857            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
 858            if (ret < 0) {
 859                goto undo;
 860            }
 861            ++unmasked;
 862        }
 863        vq = virtio_vector_next_queue(vq);
 864    }
 865
 866    return 0;
 867
 868undo:
 869    vq = virtio_vector_first_queue(vdev, vector);
 870    while (vq && unmasked >= 0) {
 871        index = virtio_get_queue_index(vq);
 872        if (index < proxy->nvqs_with_notifiers) {
 873            virtio_pci_vq_vector_mask(proxy, index, vector);
 874            --unmasked;
 875        }
 876        vq = virtio_vector_next_queue(vq);
 877    }
 878    return ret;
 879}
 880
 881static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 882{
 883    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 884    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 885    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
 886    int index;
 887
 888    while (vq) {
 889        index = virtio_get_queue_index(vq);
 890        if (!virtio_queue_get_num(vdev, index)) {
 891            break;
 892        }
 893        if (index < proxy->nvqs_with_notifiers) {
 894            virtio_pci_vq_vector_mask(proxy, index, vector);
 895        }
 896        vq = virtio_vector_next_queue(vq);
 897    }
 898}
 899
 900static void virtio_pci_vector_poll(PCIDevice *dev,
 901                                   unsigned int vector_start,
 902                                   unsigned int vector_end)
 903{
 904    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
 905    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 906    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 907    int queue_no;
 908    unsigned int vector;
 909    EventNotifier *notifier;
 910    VirtQueue *vq;
 911
 912    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
 913        if (!virtio_queue_get_num(vdev, queue_no)) {
 914            break;
 915        }
 916        vector = virtio_queue_vector(vdev, queue_no);
 917        if (vector < vector_start || vector >= vector_end ||
 918            !msix_is_masked(dev, vector)) {
 919            continue;
 920        }
 921        vq = virtio_get_queue(vdev, queue_no);
 922        notifier = virtio_queue_get_guest_notifier(vq);
 923        if (k->guest_notifier_pending) {
 924            if (k->guest_notifier_pending(vdev, queue_no)) {
 925                msix_set_pending(dev, vector);
 926            }
 927        } else if (event_notifier_test_and_clear(notifier)) {
 928            msix_set_pending(dev, vector);
 929        }
 930    }
 931}
 932
 933static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
 934                                         bool with_irqfd)
 935{
 936    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 937    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 938    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 939    VirtQueue *vq = virtio_get_queue(vdev, n);
 940    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 941
 942    if (assign) {
 943        int r = event_notifier_init(notifier, 0);
 944        if (r < 0) {
 945            return r;
 946        }
 947        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
 948    } else {
 949        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
 950        event_notifier_cleanup(notifier);
 951    }
 952
 953    if (!msix_enabled(&proxy->pci_dev) &&
 954        vdev->use_guest_notifier_mask &&
 955        vdc->guest_notifier_mask) {
 956        vdc->guest_notifier_mask(vdev, n, !assign);
 957    }
 958
 959    return 0;
 960}
 961
 962static bool virtio_pci_query_guest_notifiers(DeviceState *d)
 963{
 964    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 965    return msix_enabled(&proxy->pci_dev);
 966}
 967
 968static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 969{
 970    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 971    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 972    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 973    int r, n;
 974    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
 975        kvm_msi_via_irqfd_enabled();
 976
 977    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
 978
 979    /* When deassigning, pass a consistent nvqs value
 980     * to avoid leaking notifiers.
 981     */
 982    assert(assign || nvqs == proxy->nvqs_with_notifiers);
 983
 984    proxy->nvqs_with_notifiers = nvqs;
 985
 986    /* Must unset vector notifier while guest notifier is still assigned */
 987    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
 988        msix_unset_vector_notifiers(&proxy->pci_dev);
 989        if (proxy->vector_irqfd) {
 990            kvm_virtio_pci_vector_release(proxy, nvqs);
 991            g_free(proxy->vector_irqfd);
 992            proxy->vector_irqfd = NULL;
 993        }
 994    }
 995
 996    for (n = 0; n < nvqs; n++) {
 997        if (!virtio_queue_get_num(vdev, n)) {
 998            break;
 999        }
1000
1001        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1002        if (r < 0) {
1003            goto assign_error;
1004        }
1005    }
1006
1007    /* Must set vector notifier after guest notifier has been assigned */
1008    if ((with_irqfd || k->guest_notifier_mask) && assign) {
1009        if (with_irqfd) {
1010            proxy->vector_irqfd =
1011                g_malloc0(sizeof(*proxy->vector_irqfd) *
1012                          msix_nr_vectors_allocated(&proxy->pci_dev));
1013            r = kvm_virtio_pci_vector_use(proxy, nvqs);
1014            if (r < 0) {
1015                goto assign_error;
1016            }
1017        }
1018        r = msix_set_vector_notifiers(&proxy->pci_dev,
1019                                      virtio_pci_vector_unmask,
1020                                      virtio_pci_vector_mask,
1021                                      virtio_pci_vector_poll);
1022        if (r < 0) {
1023            goto notifiers_error;
1024        }
1025    }
1026
1027    return 0;
1028
1029notifiers_error:
1030    if (with_irqfd) {
1031        assert(assign);
1032        kvm_virtio_pci_vector_release(proxy, nvqs);
1033    }
1034
1035assign_error:
1036    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1037    assert(assign);
1038    while (--n >= 0) {
1039        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1040    }
1041    return r;
1042}
1043
1044static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1045                                           MemoryRegion *mr, bool assign)
1046{
1047    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1048    int offset;
1049
1050    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1051        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1052        return -1;
1053    }
1054
1055    if (assign) {
1056        offset = virtio_pci_queue_mem_mult(proxy) * n;
1057        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1058    } else {
1059        memory_region_del_subregion(&proxy->notify.mr, mr);
1060    }
1061
1062    return 0;
1063}
1064
1065static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1066{
1067    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1068    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1069
1070    if (running) {
1071        /* Old QEMU versions did not set bus master enable on status write.
1072         * Detect DRIVER set and enable it.
1073         */
1074        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1075            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1076            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1077            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1078                                     proxy->pci_dev.config[PCI_COMMAND] |
1079                                     PCI_COMMAND_MASTER, 1);
1080        }
1081        virtio_pci_start_ioeventfd(proxy);
1082    } else {
1083        virtio_pci_stop_ioeventfd(proxy);
1084    }
1085}
1086
1087/*
1088 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1089 */
1090
1091static int virtio_pci_query_nvectors(DeviceState *d)
1092{
1093    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1094
1095    return proxy->nvectors;
1096}
1097
1098static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1099{
1100    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1101    PCIDevice *dev = &proxy->pci_dev;
1102
1103    return pci_get_address_space(dev);
1104}
1105
1106static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1107                                   struct virtio_pci_cap *cap)
1108{
1109    PCIDevice *dev = &proxy->pci_dev;
1110    int offset;
1111
1112    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1113                                cap->cap_len, &error_abort);
1114
1115    assert(cap->cap_len >= sizeof *cap);
1116    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1117           cap->cap_len - PCI_CAP_FLAGS);
1118
1119    return offset;
1120}
1121
1122static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1123                                       unsigned size)
1124{
1125    VirtIOPCIProxy *proxy = opaque;
1126    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1127    uint32_t val = 0;
1128    int i;
1129
1130    switch (addr) {
1131    case VIRTIO_PCI_COMMON_DFSELECT:
1132        val = proxy->dfselect;
1133        break;
1134    case VIRTIO_PCI_COMMON_DF:
1135        if (proxy->dfselect <= 1) {
1136            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1137
1138            val = (vdev->host_features & ~vdc->legacy_features) >>
1139                (32 * proxy->dfselect);
1140        }
1141        break;
1142    case VIRTIO_PCI_COMMON_GFSELECT:
1143        val = proxy->gfselect;
1144        break;
1145    case VIRTIO_PCI_COMMON_GF:
1146        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1147            val = proxy->guest_features[proxy->gfselect];
1148        }
1149        break;
1150    case VIRTIO_PCI_COMMON_MSIX:
1151        val = vdev->config_vector;
1152        break;
1153    case VIRTIO_PCI_COMMON_NUMQ:
1154        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1155            if (virtio_queue_get_num(vdev, i)) {
1156                val = i + 1;
1157            }
1158        }
1159        break;
1160    case VIRTIO_PCI_COMMON_STATUS:
1161        val = vdev->status;
1162        break;
1163    case VIRTIO_PCI_COMMON_CFGGENERATION:
1164        val = vdev->generation;
1165        break;
1166    case VIRTIO_PCI_COMMON_Q_SELECT:
1167        val = vdev->queue_sel;
1168        break;
1169    case VIRTIO_PCI_COMMON_Q_SIZE:
1170        val = virtio_queue_get_num(vdev, vdev->queue_sel);
1171        break;
1172    case VIRTIO_PCI_COMMON_Q_MSIX:
1173        val = virtio_queue_vector(vdev, vdev->queue_sel);
1174        break;
1175    case VIRTIO_PCI_COMMON_Q_ENABLE:
1176        val = proxy->vqs[vdev->queue_sel].enabled;
1177        break;
1178    case VIRTIO_PCI_COMMON_Q_NOFF:
1179        /* Simply map queues in order */
1180        val = vdev->queue_sel;
1181        break;
1182    case VIRTIO_PCI_COMMON_Q_DESCLO:
1183        val = proxy->vqs[vdev->queue_sel].desc[0];
1184        break;
1185    case VIRTIO_PCI_COMMON_Q_DESCHI:
1186        val = proxy->vqs[vdev->queue_sel].desc[1];
1187        break;
1188    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1189        val = proxy->vqs[vdev->queue_sel].avail[0];
1190        break;
1191    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1192        val = proxy->vqs[vdev->queue_sel].avail[1];
1193        break;
1194    case VIRTIO_PCI_COMMON_Q_USEDLO:
1195        val = proxy->vqs[vdev->queue_sel].used[0];
1196        break;
1197    case VIRTIO_PCI_COMMON_Q_USEDHI:
1198        val = proxy->vqs[vdev->queue_sel].used[1];
1199        break;
1200    default:
1201        val = 0;
1202    }
1203
1204    return val;
1205}
1206
1207static void virtio_pci_common_write(void *opaque, hwaddr addr,
1208                                    uint64_t val, unsigned size)
1209{
1210    VirtIOPCIProxy *proxy = opaque;
1211    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1212
1213    switch (addr) {
1214    case VIRTIO_PCI_COMMON_DFSELECT:
1215        proxy->dfselect = val;
1216        break;
1217    case VIRTIO_PCI_COMMON_GFSELECT:
1218        proxy->gfselect = val;
1219        break;
1220    case VIRTIO_PCI_COMMON_GF:
1221        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1222            proxy->guest_features[proxy->gfselect] = val;
1223            virtio_set_features(vdev,
1224                                (((uint64_t)proxy->guest_features[1]) << 32) |
1225                                proxy->guest_features[0]);
1226        }
1227        break;
1228    case VIRTIO_PCI_COMMON_MSIX:
1229        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1230        /* Make it possible for guest to discover an error took place. */
1231        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1232            val = VIRTIO_NO_VECTOR;
1233        }
1234        vdev->config_vector = val;
1235        break;
1236    case VIRTIO_PCI_COMMON_STATUS:
1237        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1238            virtio_pci_stop_ioeventfd(proxy);
1239        }
1240
1241        virtio_set_status(vdev, val & 0xFF);
1242
1243        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1244            virtio_pci_start_ioeventfd(proxy);
1245        }
1246
1247        if (vdev->status == 0) {
1248            virtio_pci_reset(DEVICE(proxy));
1249        }
1250
1251        break;
1252    case VIRTIO_PCI_COMMON_Q_SELECT:
1253        if (val < VIRTIO_QUEUE_MAX) {
1254            vdev->queue_sel = val;
1255        }
1256        break;
1257    case VIRTIO_PCI_COMMON_Q_SIZE:
1258        proxy->vqs[vdev->queue_sel].num = val;
1259        break;
1260    case VIRTIO_PCI_COMMON_Q_MSIX:
1261        msix_vector_unuse(&proxy->pci_dev,
1262                          virtio_queue_vector(vdev, vdev->queue_sel));
1263        /* Make it possible for guest to discover an error took place. */
1264        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
1265            val = VIRTIO_NO_VECTOR;
1266        }
1267        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
1268        break;
1269    case VIRTIO_PCI_COMMON_Q_ENABLE:
1270        virtio_queue_set_num(vdev, vdev->queue_sel,
1271                             proxy->vqs[vdev->queue_sel].num);
1272        virtio_queue_set_rings(vdev, vdev->queue_sel,
1273                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1274                       proxy->vqs[vdev->queue_sel].desc[0],
1275                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1276                       proxy->vqs[vdev->queue_sel].avail[0],
1277                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1278                       proxy->vqs[vdev->queue_sel].used[0]);
1279        proxy->vqs[vdev->queue_sel].enabled = 1;
1280        break;
1281    case VIRTIO_PCI_COMMON_Q_DESCLO:
1282        proxy->vqs[vdev->queue_sel].desc[0] = val;
1283        break;
1284    case VIRTIO_PCI_COMMON_Q_DESCHI:
1285        proxy->vqs[vdev->queue_sel].desc[1] = val;
1286        break;
1287    case VIRTIO_PCI_COMMON_Q_AVAILLO:
1288        proxy->vqs[vdev->queue_sel].avail[0] = val;
1289        break;
1290    case VIRTIO_PCI_COMMON_Q_AVAILHI:
1291        proxy->vqs[vdev->queue_sel].avail[1] = val;
1292        break;
1293    case VIRTIO_PCI_COMMON_Q_USEDLO:
1294        proxy->vqs[vdev->queue_sel].used[0] = val;
1295        break;
1296    case VIRTIO_PCI_COMMON_Q_USEDHI:
1297        proxy->vqs[vdev->queue_sel].used[1] = val;
1298        break;
1299    default:
1300        break;
1301    }
1302}
1303
1304
1305static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1306                                       unsigned size)
1307{
1308    return 0;
1309}
1310
1311static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1312                                    uint64_t val, unsigned size)
1313{
1314    VirtIODevice *vdev = opaque;
1315    VirtIOPCIProxy *proxy = VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent);
1316    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1317
1318    if (queue < VIRTIO_QUEUE_MAX) {
1319        virtio_queue_notify(vdev, queue);
1320    }
1321}
1322
1323static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1324                                        uint64_t val, unsigned size)
1325{
1326    VirtIODevice *vdev = opaque;
1327    unsigned queue = val;
1328
1329    if (queue < VIRTIO_QUEUE_MAX) {
1330        virtio_queue_notify(vdev, queue);
1331    }
1332}
1333
1334static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1335                                    unsigned size)
1336{
1337    VirtIOPCIProxy *proxy = opaque;
1338    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1339    uint64_t val = atomic_xchg(&vdev->isr, 0);
1340    pci_irq_deassert(&proxy->pci_dev);
1341
1342    return val;
1343}
1344
1345static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1346                                 uint64_t val, unsigned size)
1347{
1348}
1349
1350static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1351                                       unsigned size)
1352{
1353    VirtIODevice *vdev = opaque;
1354    uint64_t val = 0;
1355
1356    switch (size) {
1357    case 1:
1358        val = virtio_config_modern_readb(vdev, addr);
1359        break;
1360    case 2:
1361        val = virtio_config_modern_readw(vdev, addr);
1362        break;
1363    case 4:
1364        val = virtio_config_modern_readl(vdev, addr);
1365        break;
1366    }
1367    return val;
1368}
1369
1370static void virtio_pci_device_write(void *opaque, hwaddr addr,
1371                                    uint64_t val, unsigned size)
1372{
1373    VirtIODevice *vdev = opaque;
1374    switch (size) {
1375    case 1:
1376        virtio_config_modern_writeb(vdev, addr, val);
1377        break;
1378    case 2:
1379        virtio_config_modern_writew(vdev, addr, val);
1380        break;
1381    case 4:
1382        virtio_config_modern_writel(vdev, addr, val);
1383        break;
1384    }
1385}
1386
1387static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy)
1388{
1389    static const MemoryRegionOps common_ops = {
1390        .read = virtio_pci_common_read,
1391        .write = virtio_pci_common_write,
1392        .impl = {
1393            .min_access_size = 1,
1394            .max_access_size = 4,
1395        },
1396        .endianness = DEVICE_LITTLE_ENDIAN,
1397    };
1398    static const MemoryRegionOps isr_ops = {
1399        .read = virtio_pci_isr_read,
1400        .write = virtio_pci_isr_write,
1401        .impl = {
1402            .min_access_size = 1,
1403            .max_access_size = 4,
1404        },
1405        .endianness = DEVICE_LITTLE_ENDIAN,
1406    };
1407    static const MemoryRegionOps device_ops = {
1408        .read = virtio_pci_device_read,
1409        .write = virtio_pci_device_write,
1410        .impl = {
1411            .min_access_size = 1,
1412            .max_access_size = 4,
1413        },
1414        .endianness = DEVICE_LITTLE_ENDIAN,
1415    };
1416    static const MemoryRegionOps notify_ops = {
1417        .read = virtio_pci_notify_read,
1418        .write = virtio_pci_notify_write,
1419        .impl = {
1420            .min_access_size = 1,
1421            .max_access_size = 4,
1422        },
1423        .endianness = DEVICE_LITTLE_ENDIAN,
1424    };
1425    static const MemoryRegionOps notify_pio_ops = {
1426        .read = virtio_pci_notify_read,
1427        .write = virtio_pci_notify_write_pio,
1428        .impl = {
1429            .min_access_size = 1,
1430            .max_access_size = 4,
1431        },
1432        .endianness = DEVICE_LITTLE_ENDIAN,
1433    };
1434
1435
1436    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1437                          &common_ops,
1438                          proxy,
1439                          "virtio-pci-common",
1440                          proxy->common.size);
1441
1442    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1443                          &isr_ops,
1444                          proxy,
1445                          "virtio-pci-isr",
1446                          proxy->isr.size);
1447
1448    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1449                          &device_ops,
1450                          virtio_bus_get_device(&proxy->bus),
1451                          "virtio-pci-device",
1452                          proxy->device.size);
1453
1454    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1455                          &notify_ops,
1456                          virtio_bus_get_device(&proxy->bus),
1457                          "virtio-pci-notify",
1458                          proxy->notify.size);
1459
1460    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1461                          &notify_pio_ops,
1462                          virtio_bus_get_device(&proxy->bus),
1463                          "virtio-pci-notify-pio",
1464                          proxy->notify_pio.size);
1465}
1466
1467static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1468                                         VirtIOPCIRegion *region,
1469                                         struct virtio_pci_cap *cap,
1470                                         MemoryRegion *mr,
1471                                         uint8_t bar)
1472{
1473    memory_region_add_subregion(mr, region->offset, &region->mr);
1474
1475    cap->cfg_type = region->type;
1476    cap->bar = bar;
1477    cap->offset = cpu_to_le32(region->offset);
1478    cap->length = cpu_to_le32(region->size);
1479    virtio_pci_add_mem_cap(proxy, cap);
1480
1481}
1482
1483static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1484                                             VirtIOPCIRegion *region,
1485                                             struct virtio_pci_cap *cap)
1486{
1487    virtio_pci_modern_region_map(proxy, region, cap,
1488                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1489}
1490
1491static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1492                                            VirtIOPCIRegion *region,
1493                                            struct virtio_pci_cap *cap)
1494{
1495    virtio_pci_modern_region_map(proxy, region, cap,
1496                                 &proxy->io_bar, proxy->modern_io_bar_idx);
1497}
1498
1499static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1500                                               VirtIOPCIRegion *region)
1501{
1502    memory_region_del_subregion(&proxy->modern_bar,
1503                                &region->mr);
1504}
1505
1506static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1507                                              VirtIOPCIRegion *region)
1508{
1509    memory_region_del_subregion(&proxy->io_bar,
1510                                &region->mr);
1511}
1512
1513static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1514{
1515    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1516    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1517
1518    if (virtio_pci_modern(proxy)) {
1519        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1520    }
1521
1522    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1523}
1524
1525/* This is called by virtio-bus just after the device is plugged. */
1526static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1527{
1528    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1529    VirtioBusState *bus = &proxy->bus;
1530    bool legacy = virtio_pci_legacy(proxy);
1531    bool modern;
1532    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1533    uint8_t *config;
1534    uint32_t size;
1535    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1536
1537    /*
1538     * Virtio capabilities present without
1539     * VIRTIO_F_VERSION_1 confuses guests
1540     */
1541    if (!proxy->ignore_backend_features &&
1542            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1543        virtio_pci_disable_modern(proxy);
1544
1545        if (!legacy) {
1546            error_setg(errp, "Device doesn't support modern mode, and legacy"
1547                             " mode is disabled");
1548            error_append_hint(errp, "Set disable-legacy to off\n");
1549
1550            return;
1551        }
1552    }
1553
1554    modern = virtio_pci_modern(proxy);
1555
1556    config = proxy->pci_dev.config;
1557    if (proxy->class_code) {
1558        pci_config_set_class(config, proxy->class_code);
1559    }
1560
1561    if (legacy) {
1562        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1563            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
1564                       " neither legacy nor transitional device");
1565            return ;
1566        }
1567        /*
1568         * Legacy and transitional devices use specific subsystem IDs.
1569         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
1570         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
1571         */
1572        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
1573    } else {
1574        /* pure virtio-1.0 */
1575        pci_set_word(config + PCI_VENDOR_ID,
1576                     PCI_VENDOR_ID_REDHAT_QUMRANET);
1577        pci_set_word(config + PCI_DEVICE_ID,
1578                     0x1040 + virtio_bus_get_vdev_id(bus));
1579        pci_config_set_revision(config, 1);
1580    }
1581    config[PCI_INTERRUPT_PIN] = 1;
1582
1583
1584    if (modern) {
1585        struct virtio_pci_cap cap = {
1586            .cap_len = sizeof cap,
1587        };
1588        struct virtio_pci_notify_cap notify = {
1589            .cap.cap_len = sizeof notify,
1590            .notify_off_multiplier =
1591                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
1592        };
1593        struct virtio_pci_cfg_cap cfg = {
1594            .cap.cap_len = sizeof cfg,
1595            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
1596        };
1597        struct virtio_pci_notify_cap notify_pio = {
1598            .cap.cap_len = sizeof notify,
1599            .notify_off_multiplier = cpu_to_le32(0x0),
1600        };
1601
1602        struct virtio_pci_cfg_cap *cfg_mask;
1603
1604        virtio_pci_modern_regions_init(proxy);
1605
1606        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
1607        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
1608        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
1609        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
1610
1611        if (modern_pio) {
1612            memory_region_init(&proxy->io_bar, OBJECT(proxy),
1613                               "virtio-pci-io", 0x4);
1614
1615            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
1616                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
1617
1618            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
1619                                            &notify_pio.cap);
1620        }
1621
1622        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
1623                         PCI_BASE_ADDRESS_SPACE_MEMORY |
1624                         PCI_BASE_ADDRESS_MEM_PREFETCH |
1625                         PCI_BASE_ADDRESS_MEM_TYPE_64,
1626                         &proxy->modern_bar);
1627
1628        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
1629        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
1630        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
1631        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
1632        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
1633        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
1634    }
1635
1636    if (proxy->nvectors) {
1637        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
1638                                          proxy->msix_bar_idx, NULL);
1639        if (err) {
1640            /* Notice when a system that supports MSIx can't initialize it */
1641            if (err != -ENOTSUP) {
1642                warn_report("unable to init msix vectors to %" PRIu32,
1643                            proxy->nvectors);
1644            }
1645            proxy->nvectors = 0;
1646        }
1647    }
1648
1649    proxy->pci_dev.config_write = virtio_write_config;
1650    proxy->pci_dev.config_read = virtio_read_config;
1651
1652    if (legacy) {
1653        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
1654            + virtio_bus_get_vdev_config_len(bus);
1655        size = pow2ceil(size);
1656
1657        memory_region_init_io(&proxy->bar, OBJECT(proxy),
1658                              &virtio_pci_config_ops,
1659                              proxy, "virtio-pci", size);
1660
1661        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
1662                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
1663    }
1664}
1665
1666static void virtio_pci_device_unplugged(DeviceState *d)
1667{
1668    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1669    bool modern = virtio_pci_modern(proxy);
1670    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1671
1672    virtio_pci_stop_ioeventfd(proxy);
1673
1674    if (modern) {
1675        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
1676        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
1677        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
1678        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
1679        if (modern_pio) {
1680            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
1681        }
1682    }
1683}
1684
1685static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
1686{
1687    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
1688    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
1689    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
1690                     !pci_bus_is_root(pci_get_bus(pci_dev));
1691
1692    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
1693        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
1694    }
1695
1696    /*
1697     * virtio pci bar layout used by default.
1698     * subclasses can re-arrange things if needed.
1699     *
1700     *   region 0   --  virtio legacy io bar
1701     *   region 1   --  msi-x bar
1702     *   region 4+5 --  virtio modern memory (64bit) bar
1703     *
1704     */
1705    proxy->legacy_io_bar_idx  = 0;
1706    proxy->msix_bar_idx       = 1;
1707    proxy->modern_io_bar_idx  = 2;
1708    proxy->modern_mem_bar_idx = 4;
1709
1710    proxy->common.offset = 0x0;
1711    proxy->common.size = 0x1000;
1712    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
1713
1714    proxy->isr.offset = 0x1000;
1715    proxy->isr.size = 0x1000;
1716    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
1717
1718    proxy->device.offset = 0x2000;
1719    proxy->device.size = 0x1000;
1720    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
1721
1722    proxy->notify.offset = 0x3000;
1723    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
1724    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1725
1726    proxy->notify_pio.offset = 0x0;
1727    proxy->notify_pio.size = 0x4;
1728    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
1729
1730    /* subclasses can enforce modern, so do this unconditionally */
1731    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
1732                       /* PCI BAR regions must be powers of 2 */
1733                       pow2ceil(proxy->notify.offset + proxy->notify.size));
1734
1735    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
1736        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
1737    }
1738
1739    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
1740        error_setg(errp, "device cannot work as neither modern nor legacy mode"
1741                   " is enabled");
1742        error_append_hint(errp, "Set either disable-modern or disable-legacy"
1743                          " to off\n");
1744        return;
1745    }
1746
1747    if (pcie_port && pci_is_express(pci_dev)) {
1748        int pos;
1749
1750        pos = pcie_endpoint_cap_init(pci_dev, 0);
1751        assert(pos > 0);
1752
1753        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
1754                                 PCI_PM_SIZEOF, errp);
1755        if (pos < 0) {
1756            return;
1757        }
1758
1759        pci_dev->exp.pm_cap = pos;
1760
1761        /*
1762         * Indicates that this function complies with revision 1.2 of the
1763         * PCI Power Management Interface Specification.
1764         */
1765        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
1766
1767        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
1768            /* Init error enabling flags */
1769            pcie_cap_deverr_init(pci_dev);
1770        }
1771
1772        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
1773            /* Init Link Control Register */
1774            pcie_cap_lnkctl_init(pci_dev);
1775        }
1776
1777        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
1778            /* Init Power Management Control Register */
1779            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
1780                         PCI_PM_CTRL_STATE_MASK);
1781        }
1782
1783        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
1784            pcie_ats_init(pci_dev, 256);
1785        }
1786
1787        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
1788            /* Set Function Level Reset capability bit */
1789            pcie_cap_flr_init(pci_dev);
1790        }
1791    } else {
1792        /*
1793         * make future invocations of pci_is_express() return false
1794         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
1795         */
1796        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
1797    }
1798
1799    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
1800    if (k->realize) {
1801        k->realize(proxy, errp);
1802    }
1803}
1804
1805static void virtio_pci_exit(PCIDevice *pci_dev)
1806{
1807    msix_uninit_exclusive_bar(pci_dev);
1808}
1809
1810static void virtio_pci_reset(DeviceState *qdev)
1811{
1812    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1813    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
1814    PCIDevice *dev = PCI_DEVICE(qdev);
1815    int i;
1816
1817    virtio_pci_stop_ioeventfd(proxy);
1818    virtio_bus_reset(bus);
1819    msix_unuse_all_vectors(&proxy->pci_dev);
1820
1821    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1822        proxy->vqs[i].enabled = 0;
1823        proxy->vqs[i].num = 0;
1824        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
1825        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
1826        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
1827    }
1828
1829    if (pci_is_express(dev)) {
1830        pcie_cap_deverr_reset(dev);
1831        pcie_cap_lnkctl_reset(dev);
1832
1833        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
1834    }
1835}
1836
1837static Property virtio_pci_properties[] = {
1838    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
1839                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
1840    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
1841                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
1842    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
1843                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
1844    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
1845                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
1846    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
1847                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
1848    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
1849                     ignore_backend_features, false),
1850    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
1851                    VIRTIO_PCI_FLAG_ATS_BIT, false),
1852    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
1853                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
1854    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
1855                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
1856    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
1857                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
1858    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
1859                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
1860    DEFINE_PROP_END_OF_LIST(),
1861};
1862
1863static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
1864{
1865    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
1866    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
1867    PCIDevice *pci_dev = &proxy->pci_dev;
1868
1869    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
1870        virtio_pci_modern(proxy)) {
1871        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
1872    }
1873
1874    vpciklass->parent_dc_realize(qdev, errp);
1875}
1876
1877static void virtio_pci_class_init(ObjectClass *klass, void *data)
1878{
1879    DeviceClass *dc = DEVICE_CLASS(klass);
1880    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1881    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
1882
1883    dc->props = virtio_pci_properties;
1884    k->realize = virtio_pci_realize;
1885    k->exit = virtio_pci_exit;
1886    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
1887    k->revision = VIRTIO_PCI_ABI_VERSION;
1888    k->class_id = PCI_CLASS_OTHERS;
1889    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
1890                                    &vpciklass->parent_dc_realize);
1891    dc->reset = virtio_pci_reset;
1892}
1893
1894static const TypeInfo virtio_pci_info = {
1895    .name          = TYPE_VIRTIO_PCI,
1896    .parent        = TYPE_PCI_DEVICE,
1897    .instance_size = sizeof(VirtIOPCIProxy),
1898    .class_init    = virtio_pci_class_init,
1899    .class_size    = sizeof(VirtioPCIClass),
1900    .abstract      = true,
1901};
1902
1903static Property virtio_pci_generic_properties[] = {
1904    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
1905                            ON_OFF_AUTO_AUTO),
1906    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
1907    DEFINE_PROP_END_OF_LIST(),
1908};
1909
1910static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
1911{
1912    const VirtioPCIDeviceTypeInfo *t = data;
1913    if (t->class_init) {
1914        t->class_init(klass, NULL);
1915    }
1916}
1917
1918static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
1919{
1920    DeviceClass *dc = DEVICE_CLASS(klass);
1921
1922    dc->props = virtio_pci_generic_properties;
1923}
1924
1925static void virtio_pci_transitional_instance_init(Object *obj)
1926{
1927    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
1928
1929    proxy->disable_legacy = ON_OFF_AUTO_OFF;
1930    proxy->disable_modern = false;
1931}
1932
1933static void virtio_pci_non_transitional_instance_init(Object *obj)
1934{
1935    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
1936
1937    proxy->disable_legacy = ON_OFF_AUTO_ON;
1938    proxy->disable_modern = false;
1939}
1940
1941void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
1942{
1943    char *base_name = NULL;
1944    TypeInfo base_type_info = {
1945        .name          = t->base_name,
1946        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
1947        .instance_size = t->instance_size,
1948        .instance_init = t->instance_init,
1949        .class_size    = t->class_size,
1950        .abstract      = true,
1951        .interfaces    = t->interfaces,
1952    };
1953    TypeInfo generic_type_info = {
1954        .name = t->generic_name,
1955        .parent = base_type_info.name,
1956        .class_init = virtio_pci_generic_class_init,
1957        .interfaces = (InterfaceInfo[]) {
1958            { INTERFACE_PCIE_DEVICE },
1959            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1960            { }
1961        },
1962    };
1963
1964    if (!base_type_info.name) {
1965        /* No base type -> register a single generic device type */
1966        /* use intermediate %s-base-type to add generic device props */
1967        base_name = g_strdup_printf("%s-base-type", t->generic_name);
1968        base_type_info.name = base_name;
1969        base_type_info.class_init = virtio_pci_generic_class_init;
1970
1971        generic_type_info.parent = base_name;
1972        generic_type_info.class_init = virtio_pci_base_class_init;
1973        generic_type_info.class_data = (void *)t;
1974
1975        assert(!t->non_transitional_name);
1976        assert(!t->transitional_name);
1977    } else {
1978        base_type_info.class_init = virtio_pci_base_class_init;
1979        base_type_info.class_data = (void *)t;
1980    }
1981
1982    type_register(&base_type_info);
1983    if (generic_type_info.name) {
1984        type_register(&generic_type_info);
1985    }
1986
1987    if (t->non_transitional_name) {
1988        const TypeInfo non_transitional_type_info = {
1989            .name          = t->non_transitional_name,
1990            .parent        = base_type_info.name,
1991            .instance_init = virtio_pci_non_transitional_instance_init,
1992            .interfaces = (InterfaceInfo[]) {
1993                { INTERFACE_PCIE_DEVICE },
1994                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1995                { }
1996            },
1997        };
1998        type_register(&non_transitional_type_info);
1999    }
2000
2001    if (t->transitional_name) {
2002        const TypeInfo transitional_type_info = {
2003            .name          = t->transitional_name,
2004            .parent        = base_type_info.name,
2005            .instance_init = virtio_pci_transitional_instance_init,
2006            .interfaces = (InterfaceInfo[]) {
2007                /*
2008                 * Transitional virtio devices work only as Conventional PCI
2009                 * devices because they require PIO ports.
2010                 */
2011                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2012                { }
2013            },
2014        };
2015        type_register(&transitional_type_info);
2016    }
2017    g_free(base_name);
2018}
2019
2020/* virtio-pci-bus */
2021
2022static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2023                               VirtIOPCIProxy *dev)
2024{
2025    DeviceState *qdev = DEVICE(dev);
2026    char virtio_bus_name[] = "virtio-bus";
2027
2028    qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev,
2029                        virtio_bus_name);
2030}
2031
2032static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
2033{
2034    BusClass *bus_class = BUS_CLASS(klass);
2035    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2036    bus_class->max_dev = 1;
2037    k->notify = virtio_pci_notify;
2038    k->save_config = virtio_pci_save_config;
2039    k->load_config = virtio_pci_load_config;
2040    k->save_queue = virtio_pci_save_queue;
2041    k->load_queue = virtio_pci_load_queue;
2042    k->save_extra_state = virtio_pci_save_extra_state;
2043    k->load_extra_state = virtio_pci_load_extra_state;
2044    k->has_extra_state = virtio_pci_has_extra_state;
2045    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2046    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2047    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2048    k->vmstate_change = virtio_pci_vmstate_change;
2049    k->pre_plugged = virtio_pci_pre_plugged;
2050    k->device_plugged = virtio_pci_device_plugged;
2051    k->device_unplugged = virtio_pci_device_unplugged;
2052    k->query_nvectors = virtio_pci_query_nvectors;
2053    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2054    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2055    k->get_dma_as = virtio_pci_get_dma_as;
2056}
2057
2058static const TypeInfo virtio_pci_bus_info = {
2059    .name          = TYPE_VIRTIO_PCI_BUS,
2060    .parent        = TYPE_VIRTIO_BUS,
2061    .instance_size = sizeof(VirtioPCIBusState),
2062    .class_init    = virtio_pci_bus_class_init,
2063};
2064
2065static void virtio_pci_register_types(void)
2066{
2067    /* Base types: */
2068    type_register_static(&virtio_pci_bus_info);
2069    type_register_static(&virtio_pci_info);
2070}
2071
2072type_init(virtio_pci_register_types)
2073
2074